# Overview

They all require `madplotlib.pyplot as plt` and each requires relevent modeling from `sklearn`. Most require `pandas`, `numpy` or both

## Mine

### DBSCAN params visualization

I created the following function to iterate through possible `eps` and `min_samples` parameters. First define lists of both then create *combination* via `itertools.product` then run following using combo object

In [2]:
def DBSCAN_test(data, combination, graph_number):
    """Number of graphs must equal combos length for proper mapping
            - for ideal visualization both graph numbers should be equal
        figsize should be a list with two elements
        data and combos should be nparrays
    
    df = np.array([1, 2, 3, 4], [3, 4, 5, 6])
    combination = np.array([0.1, 0.1], [0.1, 1], [0.1, 20], [5, 0.1], [5, 1], [5, 20])
    dimension = math.sqrt(len(combination))
    
    ex: DBSCAN_test(df, combination, [dimension, dimension])
            --> output: supblotted visual of input data shape with DBSCAN outcomes plotted 
            for each combination in color
    """
    
    fig, axes = plt.subplots(graph_number[0], graph_number[1], figsize=(12, 12))
    fig.tight_layout()

    # Plot the data using one of Matplotlib's plotting functions
    for i in range(len(combination)):
        db = DBSCAN(eps=combination[i][0],
                    min_samples=combination[i][1],
                    metric='euclidean')
        
        data_db = db.fit_predict(data)

        # Plot the data using one of Matplotlib's plotting functions
         #create empty centroid lists
        X_centroids = []
        Y_centroids = []
    
        #append each new cluster mean for each iteration
        for cluster in set(data_db):
            x = data[data_db == cluster,0]
            y = data[data_db == cluster,1]
            X_centroids.append(np.mean(x))
            Y_centroids.append(np.mean(y))
        
            #visualize    
            axes.flat[i].scatter(x, y, s=50, marker='s', label=f"cluster {cluster}")
            axes.flat[i].set_title(combos[i])

### Radar chart clustering

In [1]:
def radar_plot(data, cluster, 
               title = "Clustering", 
               save=False, 
               cluster_map=False):
    """Creates a radar plot from inputed data and clustering model output
    data should be DataFrame and cluster an np.array
    
    title can be specified to set plot title or it will default to 'Clustering'
    
    save can be specified as a string in which case the image is saved to the path
    of that inputted string. If no string is specified the image will not be saved.
    
    cluster_map specifies how many of the identified clusters to draw. 
    Must be integer > 0
    """
    
    #concatinate clustering and data
    data_cluster = pd.concat([data,pd.DataFrame(cluster)], axis=1)
    data_cluster = data_cluster.rename(columns= {0: 'cluster'})
    #group by cluster with means and save to new frame
    clustered = data_cluster.groupby('cluster').mean()
    #generate category labels
    categories = [*clustered.columns, clustered.columns[0]]

    #empty object for defining groups
    radar = []
    #define groups, last element must be first element to close radar group
    for i in range(len(clustered)):
        radar.append([*clustered.iloc[i], clustered.iloc[i, 0]])
    
    #label location for radar plot
    label_loc = np.linspace(start=0, stop=2 * np.pi, 
                            num=len(radar[0]))
    #color scheme
    colors = ['royalblue', 'darkorange', 'forestgreen', 'sienna', 'mediumpurple']  
    
    #initiate plot
    plt.figure(figsize=(8,8))
    plt.subplot(polar = True)
    
    #if cluster map is defined
    if bool(cluster_map):
        cluster_map = cluster_map
    else:
        cluster_map = len(radar)
    
    #loop through every cluster
    for i in range(0,cluster_map):
        #if there are outliers (ex with DBSCAN) id them
        if clustered.index[i] < 0:
            plt.plot(label_loc, radar[i], label=f"Outliers", color='red')
        #else plot as cluster
        else: 
            plt.plot(label_loc, radar[i], label=f"Cluster {i}", color=colors[i])
    
    lines, labels = plt.thetagrids(np.degrees(label_loc), labels=categories)

    plt.title(title)
    plt.legend()
    
    #if save path is defined save figure
    if bool(save):
        plt.savefig(save)
    #if not just show
    else:
        plt.show()

## Lighthouse

### Cluster Plots

Following is to plot outcomes of clustered models in color

In [3]:
#Function to visualize how the algorith clustered the data
def plot_clusters(data, data_fit, plt_cluster_centers = False):
    #create empty centroid lists
    X_centroids = []
    Y_centroids = []
    
    #append each new cluster mean for each iteration
    for cluster in set(data_fit):
        x = data[data_fit == cluster,0]
        y = data[data_fit == cluster,1]
        X_centroids.append(np.mean(x))
        Y_centroids.append(np.mean(y))
        
        #visualize    
        plt.scatter(x, y, s=50, marker='s', label=f"cluster {cluster}")
    
    #if this is set to true also plot the cluster centers
    if plt_cluster_centers:
        plt.scatter(X_centroids,
                    Y_centroids,
                    marker='*',
                    c='red',
                    s=250,
                    label='centroids')
    plt.legend()
    plt.grid()
    plt.show()

### Kmeans Elbow Plot

In [5]:
#function to calculate distortion for multiple k values and plot the result
def plot_distortion(data, max_clusters = 10):
    #empty object to distore distortion
    distortions = []
    
    #iterate through each number, initialize and run kmeans, append distortion
    for i in range(1, max_clusters+1):
        km = KMeans(n_clusters=i,
                    init='k-means++',
                    n_init=10,
                    random_state=0)
        km.fit(data)
        #.inertia_ attribute retrieves distortions
        distortions.append(km.inertia_)
    
    #visualize
    plt.plot(range(1, max_clusters+1), distortions, marker='o')
    plt.xlabel('Number of clusters')
    plt.ylabel('Distortion')  
    plt.show()

### Dendrogram


In [6]:
#Requires following import
import scipy.cluster.hierarchy as sch

In [7]:
#let's create a function to plot a dendogram
def plot_dendrogram(X, method='ward'):
   #let's plot the data we just generated
    fig, ax = plt.subplots(figsize=(12,8))

    dendrogram = sch.dendrogram(sch.linkage(X, method=method))
    plt.title("Dendrogram")
    plt.ylabel("Euclidean distances")
    plt.xlabel('Points')
    plt.show()