## kmeans with custom distance

In [1]:
import pandas as pd
import customkmeans as kmeansclustering
import numpy as np
import warnings
warnings.filterwarnings("ignore")

`load data`

In [78]:
df_data = pd.read_csv('sampledata.csv')
df_dataloc = df_data[['id', 'longitude', 'latitude']].sample(30).reset_index()
df_dataloc

Unnamed: 0.1,Unnamed: 0,longitude,latitude,id
0,0,107.498860,-7.059288,1665377702169
1,1,107.499217,-7.059852,1665378883355
2,2,107.504312,-7.024727,1665371261082
3,3,107.506686,-7.031440,1665373304845
4,4,107.513834,-7.023707,1665374194751
...,...,...,...,...
406,406,107.650265,-6.972450,1665302258565
407,407,107.651817,-6.973245,1665367784163
408,408,107.652107,-6.983104,1665201080750
409,409,107.652652,-6.976278,1665368427832


`set coordinate location pair`

In [91]:
df_dataloc['locpair'] = list(zip(df_dataloc['longitude'], df_dataloc['latitude']))
df_dataloc

Unnamed: 0,id,longitude,latitude,locpair
0,1665284061913,107.5843,-7.00701,"(107.5843003, -7.0070096)"
1,1665379249062,107.572807,-6.943279,"(107.5728074, -6.9432791)"
2,1665184324237,107.539736,-7.020594,"(107.5397355, -7.0205943)"
3,1665374018203,107.593823,-6.91052,"(107.5938232, -6.9105203)"
4,1665138468287,107.536441,-7.026125,"(107.5364412, -7.0261249)"
5,1665380323120,107.594645,-7.023936,"(107.5946448, -7.0239364)"
6,1665278909715,107.593983,-7.039326,"(107.5939834, -7.0393256)"
7,1665277369257,107.532395,-6.988611,"(107.5323952, -6.9886107)"
8,1665199697218,107.548599,-6.996415,"(107.5485993, -6.9964151)"
9,1665151571636,107.570864,-6.975183,"(107.5708636, -6.9751832)"


`kmeans clustering`

In [94]:
# fit centroids to dataset
kmeans =  kmeansclustering.KMeans(n_clusters=5)
X_train = np.array(list(df_dataloc['locpair']))
# set random initial centroid
initcentroid = np.array(df_dataloc.sample(1)['locpair'].values)
kmeans.fit(X_train, initcentroid)
# view results
class_centers, classification = kmeans.evaluate(X_train)
df_cluster = pd.DataFrame(list(zip(X_train, classification, class_centers)), columns=['locpair', 'cluster', 'centroid'])
df_cluster[['longitude','latitude']] = pd.DataFrame(df_cluster.locpair.tolist(), index=df_cluster.index)
df_cluster['longitude'] = round(df_cluster['longitude'], 4)
df_cluster['latitude'] = round(df_cluster['latitude'], 4)
df_cluster

Unnamed: 0,locpair,cluster,centroid,longitude,latitude
0,"[107.5843003, -7.0070096]",3,"[107.61829821111111, -6.996434344444443]",107.5843,-7.007
1,"[107.5728074, -6.9432791]",4,"[107.58740146, -6.922639559999999]",107.5728,-6.9433
2,"[107.5397355, -7.0205943]",2,"[107.54416061428572, -7.019618271428571]",107.5397,-7.0206
3,"[107.5938232, -6.9105203]",4,"[107.58740146, -6.922639559999999]",107.5938,-6.9105
4,"[107.5364412, -7.0261249]",2,"[107.54416061428572, -7.019618271428571]",107.5364,-7.0261
5,"[107.5946448, -7.0239364]",3,"[107.61829821111111, -6.996434344444443]",107.5946,-7.0239
6,"[107.5939834, -7.0393256]",3,"[107.61829821111111, -6.996434344444443]",107.594,-7.0393
7,"[107.5323952, -6.9886107]",1,"[107.5318093, -6.9652343000000005]",107.5324,-6.9886
8,"[107.5485993, -6.9964151]",2,"[107.54416061428572, -7.019618271428571]",107.5486,-6.9964
9,"[107.5708636, -6.9751832]",0,"[107.57203252, -6.973460800000001]",107.5709,-6.9752


In [96]:
import pandas as pd
import folium
import branca.colormap as cm
import folium.plugins as plugins

datatoplot = df_cluster[['longitude', 'latitude', 'cluster']]
plotmaps = folium.Map(location=[datatoplot.latitude.mean(), datatoplot.longitude.mean()], zoom_start=10, control_scale=True)

colormap = {0:'red', 1:'blue', 2:'green', 3:'pink', 4:'darkred', 5:'orange', 6:'cadetblue', 7:'green', 8:'darkgreen',
    9:'lightgreen', 10:'darkblue', 11:'lightblue', 12:'darkpurple', 13:'lightgray', 14:'darkred', 15:'beige', 16:'gray', 17:'black'}

#folium.TileLayer('cartodbdark_matter').add_to(plotmaps)

for index, location_info in datatoplot.iterrows():
    try:
        icon_color = colormap[location_info['cluster']]
    except: #Catch NaNs
        icon_color = 'gray'
    folium.Marker([location_info["latitude"], location_info["longitude"]], popup=location_info[['cluster']], 
                  icon=plugins.BeautifyIcon(icon='arrow-down', icon_shape='marker', background_color=icon_color, border_color=icon_color, 
                                            number=location_info['cluster'])).add_to(plotmaps)

plotmaps