In [727]:
import json
import pandas as pd
from pandas.io.json import json_normalize
import ast
import time
import matplotlib.pyplot as plt
from shapely.geometry import MultiPoint
from scipy import stats
from sklearn.cluster import AgglomerativeClustering
import folium
%autosave 60
%matplotlib inline

Autosaving every 60 seconds


In [974]:
#This JSON is an export of the versions table as a json. It includes all versions for our selected customers
with open("C:/Users/alexd/Downloads/cgr3.json") as data_file:
    data = json.load(data_file)

In [975]:
#Get our data into a dataframe
json_df = pd.DataFrame(data)

In [976]:
#Extract data from our json strings intothe column
json_df['object'] = json_df['object'].map(lambda x: dict(eval(x)))
json_df = json_df['object'].apply(pd.Series)

In [977]:
#Keep only the columns we need
json_df = json_df[['id','lat','lon','name','phone_number']]

In [978]:
#Get variables in the right type
json_df.lat = json_df.lat.astype('float') 
json_df.lon = json_df.lon.astype('float') 

In [979]:
#Drop null rows
json_df = json_df.dropna()

In [1112]:
#Drop all the rows with 0 values and trim outliers
json_df = json_df.loc[json_df['lon'] > 36] 
json_df = json_df.loc[json_df['lat'] < 0] 
#Filter out a test shop
json_df = json_df.loc[json_df['id'] != 19960] 
json_df = json_df.loc[json_df['id'] != 11417] 

In [981]:
#Drop duplicates
json_df = json_df.round({'lat': 5, 'lon': 5})
cols = ["id","lat","lon","name","phone_number"]
json_df = json_df[cols].loc[(json_df[cols].shift() != json_df[cols]).any(axis=1)]

In [982]:
#Drop values of the Nairobi warehouses and Nairobi Dam
json_df = json_df[(json_df.lat >= -1.28685) | (json_df.lat <= -1.2895) | (json_df.lon <= 36.7555) | (json_df.lon >= 36.76)]
json_df = json_df[(json_df.lat >= -1.3185) | (json_df.lat <= -1.3222) | (json_df.lon <= 36.898) | (json_df.lon >= 36.906)]
json_df = json_df[(json_df.lat >= -1.31879) | (json_df.lat <= -1.31975) | (json_df.lon <= 36.7967) | (json_df.lon >= 36.8035)]                                                                      

In [983]:
json_df.id = json_df.id.astype('int') 

In [984]:
# define the number of kilometers in one radian
kms_per_radian = 6371.0088

In [985]:
def get_centermost_point(cluster):
    centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
    centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m)
    return tuple(centermost_point)

In [986]:
def dbscan_reduce(df, epsilon, x='lon', y='lat'):
    start_time = time.time()
    # represent points consistently as (lat, lon) and convert to radians to fit using haversine metric
    coords = df.as_matrix(columns=['lat', 'lon'])   
    db = DBSCAN(eps=epsilon, min_samples=1, algorithm='ball_tree', metric='haversine').fit(np.radians(coords))
    cluster_labels = db.labels_
    num_clusters = len(set(cluster_labels))
    print('Number of clusters: {:,}'.format(num_clusters))
    
    clusters = pd.Series([coords[cluster_labels==n] for n in range(num_clusters)])
    
    # find the point in each cluster that is closest to its centroid
    centermost_points = clusters.map(get_centermost_point)

    # unzip the list of centermost points (lat, lon) tuples into separate lat and lon lists
    lats, lons = zip(*centermost_points)
    rep_points = pd.DataFrame({x:lons, y:lats})
    rep_points.tail()
    
    # pull row from original data set where lat/lon match the lat/lon of each row of representative points
    rs = rep_points.apply(lambda row: df[(df[y]==row[y]) & (df[x]==row[x])].iloc[0], axis=1)
    
    clusterdf = pd.DataFrame(columns=['points'])
    clusterdf['points'] = pd.Series(clusters)
    clusterdf['points'] = clusterdf.points.str.len()
    
    rs['number_of_points'] = clusterdf['points']
    
    # all done, print outcome
    message = 'Clustered {:,} points down to {:,} points, for {:.2f}% compression in {:,.2f} seconds.'
    print(message.format(len(df), len(rs), 100*(1 - float(len(rs)) / len(df)), time.time()-start_time))    
    return rs

In [987]:
Uniqueids = json_df.id.unique()

#create a data frame dictionary to store your data frames
DataFrameDict = {elem : pd.DataFrame for elem in Uniqueids}

for key in DataFrameDict.keys():
    DataFrameDict[key] = json_df[:][json_df.id == key]

In [988]:
# epsilon=0.05km in radians
eps_rad = 0.05 / kms_per_radian

In [989]:
#Run the clustering algorithm and return the point nearest to the centroid for each cluster
#Then append it to our dataframe
df_clustered = pd.DataFrame()
for key in DataFrameDict:
    df_clustered = df_clustered.append(dbscan_reduce(DataFrameDict[key], epsilon=eps_rad))

  after removing the cwd from sys.path.


Number of clusters: 45
Clustered 56 points down to 45 points, for 19.64% compression in 0.24 seconds.
Number of clusters: 22
Clustered 34 points down to 22 points, for 35.29% compression in 0.09 seconds.
Number of clusters: 11
Clustered 12 points down to 11 points, for 8.33% compression in 0.04 seconds.
Number of clusters: 55


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 78 points down to 55 points, for 29.49% compression in 0.17 seconds.
Number of clusters: 117


  after removing the cwd from sys.path.


Clustered 165 points down to 117 points, for 29.09% compression in 0.34 seconds.
Number of clusters: 51
Clustered 82 points down to 51 points, for 37.80% compression in 0.16 seconds.
Number of clusters: 18


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 30 points down to 18 points, for 40.00% compression in 0.07 seconds.
Number of clusters: 42
Clustered 107 points down to 42 points, for 60.75% compression in 0.15 seconds.
Number of clusters: 100


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 218 points down to 100 points, for 54.13% compression in 0.32 seconds.
Number of clusters: 10
Clustered 12 points down to 10 points, for 16.67% compression in 0.05 seconds.
Number of clusters: 17
Clustered 36 points down to 17 points, for 52.78% compression in 0.07 seconds.
Number of clusters: 49


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 76 points down to 49 points, for 35.53% compression in 0.15 seconds.
Number of clusters: 25
Clustered 58 points down to 25 points, for 56.90% compression in 0.09 seconds.
Number of clusters: 69


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 114 points down to 69 points, for 39.47% compression in 0.22 seconds.
Number of clusters: 32
Clustered 37 points down to 32 points, for 13.51% compression in 0.11 seconds.
Number of clusters: 8
Clustered 9 points down to 8 points, for 11.11% compression in 0.04 seconds.
Number of clusters: 35


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 48 points down to 35 points, for 27.08% compression in 0.12 seconds.
Number of clusters: 4
Clustered 5 points down to 4 points, for 20.00% compression in 0.03 seconds.
Number of clusters: 50
Clustered 92 points down to 50 points, for 45.65% compression in 0.15 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 45
Clustered 52 points down to 45 points, for 13.46% compression in 0.14 seconds.
Number of clusters: 40


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 58 points down to 40 points, for 31.03% compression in 0.13 seconds.
Number of clusters: 68
Clustered 100 points down to 68 points, for 32.00% compression in 0.21 seconds.
Number of clusters: 27
Clustered 35 points down to 27 points, for 22.86% compression in 0.10 seconds.
Number of clusters: 64


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 129 points down to 64 points, for 50.39% compression in 0.21 seconds.
Number of clusters: 93


  after removing the cwd from sys.path.


Clustered 222 points down to 93 points, for 58.11% compression in 0.31 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 30
Clustered 80 points down to 30 points, for 62.50% compression in 0.10 seconds.
Number of clusters: 117


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 245 points down to 117 points, for 52.24% compression in 0.40 seconds.
Number of clusters: 61
Clustered 98 points down to 61 points, for 37.76% compression in 0.19 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 41
Clustered 51 points down to 41 points, for 19.61% compression in 0.13 seconds.
Number of clusters: 51


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 65 points down to 51 points, for 21.54% compression in 0.16 seconds.
Number of clusters: 33
Clustered 51 points down to 33 points, for 35.29% compression in 0.11 seconds.
Number of clusters: 34


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 57 points down to 34 points, for 40.35% compression in 0.12 seconds.
Number of clusters: 91
Clustered 185 points down to 91 points, for 50.81% compression in 0.32 seconds.
Number of clusters: 56
Clustered 105 points down to 56 points, for 46.67% compression in 0.18 seconds.


  after removing the cwd from sys.path.


Number of clusters: 30
Clustered 39 points down to 30 points, for 23.08% compression in 0.10 seconds.
Number of clusters: 53


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 118 points down to 53 points, for 55.08% compression in 0.17 seconds.
Number of clusters: 53
Clustered 59 points down to 53 points, for 10.17% compression in 0.16 seconds.
Number of clusters: 55


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 72 points down to 55 points, for 23.61% compression in 0.17 seconds.
Number of clusters: 30
Clustered 44 points down to 30 points, for 31.82% compression in 0.11 seconds.
Number of clusters: 19
Clustered 22 points down to 19 points, for 13.64% compression in 0.07 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 18
Clustered 22 points down to 18 points, for 18.18% compression in 0.07 seconds.
Number of clusters: 52
Clustered 96 points down to 52 points, for 45.83% compression in 0.17 seconds.
Number of clusters: 7
Clustered 9 points down to 7 points, for 22.22% compression in 0.04 seconds.
Number of clusters: 21
Clustered 40 points down to 21 points, for 47.50% compression in 0.08 seconds.
Number of clusters: 53


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 92 points down to 53 points, for 42.39% compression in 0.17 seconds.
Number of clusters: 75


  after removing the cwd from sys.path.


Clustered 160 points down to 75 points, for 53.12% compression in 0.24 seconds.
Number of clusters: 2
Clustered 5 points down to 2 points, for 60.00% compression in 0.03 seconds.
Number of clusters: 41
Clustered 63 points down to 41 points, for 34.92% compression in 0.14 seconds.
Number of clusters: 68


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 116 points down to 68 points, for 41.38% compression in 0.21 seconds.
Number of clusters: 53
Clustered 71 points down to 53 points, for 25.35% compression in 0.16 seconds.
Number of clusters: 5
Clustered 7 points down to 5 points, for 28.57% compression in 0.03 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 34
Clustered 73 points down to 34 points, for 53.42% compression in 0.13 seconds.
Number of clusters: 39


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 69 points down to 39 points, for 43.48% compression in 0.14 seconds.
Number of clusters: 71
Clustered 238 points down to 71 points, for 70.17% compression in 0.27 seconds.
Number of clusters: 56
Clustered 114 points down to 56 points, for 50.88% compression in 0.20 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 73
Clustered 109 points down to 73 points, for 33.03% compression in 0.23 seconds.
Number of clusters: 32
Clustered 70 points down to 32 points, for 54.29% compression in 0.11 seconds.
Number of clusters: 29
Clustered 51 points down to 29 points, for 43.14% compression in 0.10 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 71
Clustered 89 points down to 71 points, for 20.22% compression in 0.21 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 27
Clustered 32 points down to 27 points, for 15.62% compression in 0.10 seconds.
Number of clusters: 59
Clustered 125 points down to 59 points, for 52.80% compression in 0.19 seconds.
Number of clusters: 49
Clustered 64 points down to 49 points, for 23.44% compression in 0.15 seconds.
Number of clusters: 57


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 95 points down to 57 points, for 40.00% compression in 0.18 seconds.
Number of clusters: 53
Clustered 78 points down to 53 points, for 32.05% compression in 0.16 seconds.
Number of clusters: 68


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 89 points down to 68 points, for 23.60% compression in 0.21 seconds.
Number of clusters: 91


  after removing the cwd from sys.path.


Clustered 114 points down to 91 points, for 20.18% compression in 0.27 seconds.
Number of clusters: 31
Clustered 33 points down to 31 points, for 6.06% compression in 0.11 seconds.
Number of clusters: 61


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 85 points down to 61 points, for 28.24% compression in 0.21 seconds.
Number of clusters: 81


  after removing the cwd from sys.path.


Clustered 143 points down to 81 points, for 43.36% compression in 0.25 seconds.
Number of clusters: 53
Clustered 56 points down to 53 points, for 5.36% compression in 0.16 seconds.
Number of clusters: 26


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 42 points down to 26 points, for 38.10% compression in 0.09 seconds.
Number of clusters: 51
Clustered 85 points down to 51 points, for 40.00% compression in 0.16 seconds.
Number of clusters: 56


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 70 points down to 56 points, for 20.00% compression in 0.18 seconds.
Number of clusters: 64


  after removing the cwd from sys.path.


Clustered 184 points down to 64 points, for 65.22% compression in 0.22 seconds.
Number of clusters: 4
Clustered 5 points down to 4 points, for 20.00% compression in 0.03 seconds.
Number of clusters: 32
Clustered 88 points down to 32 points, for 63.64% compression in 0.13 seconds.
Number of clusters: 15


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 31 points down to 15 points, for 51.61% compression in 0.07 seconds.
Number of clusters: 113


  after removing the cwd from sys.path.


Clustered 214 points down to 113 points, for 47.20% compression in 0.36 seconds.
Number of clusters: 40
Clustered 58 points down to 40 points, for 31.03% compression in 0.12 seconds.
Number of clusters: 47


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 78 points down to 47 points, for 39.74% compression in 0.16 seconds.
Number of clusters: 55
Clustered 100 points down to 55 points, for 45.00% compression in 0.18 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 23
Clustered 27 points down to 23 points, for 14.81% compression in 0.08 seconds.
Number of clusters: 57


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 70 points down to 57 points, for 18.57% compression in 0.17 seconds.
Number of clusters: 61
Clustered 107 points down to 61 points, for 42.99% compression in 0.19 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 43
Clustered 65 points down to 43 points, for 33.85% compression in 0.14 seconds.
Number of clusters: 11
Clustered 16 points down to 11 points, for 31.25% compression in 0.05 seconds.


  after removing the cwd from sys.path.


Number of clusters: 85
Clustered 131 points down to 85 points, for 35.11% compression in 0.26 seconds.
Number of clusters: 62


  after removing the cwd from sys.path.


Clustered 114 points down to 62 points, for 45.61% compression in 0.22 seconds.
Number of clusters: 13
Clustered 73 points down to 13 points, for 82.19% compression in 0.05 seconds.
Number of clusters: 25
Clustered 37 points down to 25 points, for 32.43% compression in 0.07 seconds.
Number of clusters: 17


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 19 points down to 17 points, for 10.53% compression in 0.09 seconds.
Number of clusters: 43
Clustered 53 points down to 43 points, for 18.87% compression in 0.19 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 49


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 113 points down to 49 points, for 56.64% compression in 0.20 seconds.
Number of clusters: 29
Clustered 36 points down to 29 points, for 19.44% compression in 0.10 seconds.
Number of clusters: 78
Clustered 146 points down to 78 points, for 46.58% compression in 0.25 seconds.
Number of clusters: 51
Clustered 77 points down to 51 points, for 33.77% compression in 0.17 seconds.
Number of clusters: 39


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 65 points down to 39 points, for 40.00% compression in 0.13 seconds.
Number of clusters: 43
Clustered 73 points down to 43 points, for 41.10% compression in 0.13 seconds.
Number of clusters: 32


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 44 points down to 32 points, for 27.27% compression in 0.11 seconds.
Number of clusters: 85


  after removing the cwd from sys.path.


Clustered 133 points down to 85 points, for 36.09% compression in 0.26 seconds.
Number of clusters: 10
Clustered 10 points down to 10 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 35
Clustered 36 points down to 35 points, for 2.78% compression in 0.11 seconds.
Number of clusters: 37


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 51 points down to 37 points, for 27.45% compression in 0.12 seconds.
Number of clusters: 36
Clustered 66 points down to 36 points, for 45.45% compression in 0.13 seconds.
Number of clusters: 73


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 114 points down to 73 points, for 35.96% compression in 0.26 seconds.
Number of clusters: 77


  after removing the cwd from sys.path.


Clustered 109 points down to 77 points, for 29.36% compression in 0.25 seconds.
Number of clusters: 118


  after removing the cwd from sys.path.


Clustered 190 points down to 118 points, for 37.89% compression in 0.41 seconds.
Number of clusters: 8
Clustered 9 points down to 8 points, for 11.11% compression in 0.05 seconds.
Number of clusters: 28
Clustered 34 points down to 28 points, for 17.65% compression in 0.10 seconds.
Number of clusters: 8
Clustered 8 points down to 8 points, for 0.00% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 18
Clustered 23 points down to 18 points, for 21.74% compression in 0.06 seconds.
Number of clusters: 22
Clustered 26 points down to 22 points, for 15.38% compression in 0.08 seconds.
Number of clusters: 9
Clustered 12 points down to 9 points, for 25.00% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 44
Clustered 68 points down to 44 points, for 35.29% compression in 0.15 seconds.
Number of clusters: 55
Clustered 85 points down to 55 points, for 35.29% compression in 0.16 seconds.
Number of clusters: 27
Clustered 33 points down to 27 points, for 18.18% compression in 0.08 seconds.
Number of clusters: 32
Clustered 39 points down to 32 points, for 17.95% compression in 0.10 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 32
Clustered 80 points down to 32 points, for 60.00% compression in 0.12 seconds.
Number of clusters: 21
Clustered 43 points down to 21 points, for 51.16% compression in 0.08 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 41
Clustered 62 points down to 41 points, for 33.87% compression in 0.13 seconds.
Number of clusters: 35
Clustered 58 points down to 35 points, for 39.66% compression in 0.12 seconds.
Number of clusters: 4
Clustered 5 points down to 4 points, for 20.00% compression in 0.03 seconds.
Number of clusters: 36
Clustered 59 points down to 36 points, for 38.98% compression in 0.12 seconds.
Number of clusters: 66


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 118 points down to 66 points, for 44.07% compression in 0.25 seconds.
Number of clusters: 28
Clustered 39 points down to 28 points, for 28.21% compression in 0.10 seconds.
Number of clusters: 19
Clustered 23 points down to 19 points, for 17.39% compression in 0.08 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 43
Clustered 62 points down to 43 points, for 30.65% compression in 0.15 seconds.
Number of clusters: 16


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 25 points down to 16 points, for 36.00% compression in 0.08 seconds.
Number of clusters: 33
Clustered 69 points down to 33 points, for 52.17% compression in 0.13 seconds.
Number of clusters: 23


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 28 points down to 23 points, for 17.86% compression in 0.11 seconds.
Number of clusters: 29
Clustered 37 points down to 29 points, for 21.62% compression in 0.11 seconds.
Number of clusters: 58


  after removing the cwd from sys.path.


Clustered 89 points down to 58 points, for 34.83% compression in 0.21 seconds.
Number of clusters: 58
Clustered 84 points down to 58 points, for 30.95% compression in 0.20 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 89
Clustered 178 points down to 89 points, for 50.00% compression in 0.32 seconds.
Number of clusters: 12
Clustered 20 points down to 12 points, for 40.00% compression in 0.04 seconds.
Number of clusters: 51
Clustered 79 points down to 51 points, for 35.44% compression in 0.15 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 58
Clustered 71 points down to 58 points, for 18.31% compression in 0.18 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 161
Clustered 210 points down to 161 points, for 23.33% compression in 0.48 seconds.
Number of clusters: 21
Clustered 31 points down to 21 points, for 32.26% compression in 0.07 seconds.
Number of clusters: 48


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 74 points down to 48 points, for 35.14% compression in 0.15 seconds.
Number of clusters: 18
Clustered 29 points down to 18 points, for 37.93% compression in 0.07 seconds.
Number of clusters: 41


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 79 points down to 41 points, for 48.10% compression in 0.14 seconds.
Number of clusters: 16
Clustered 18 points down to 16 points, for 11.11% compression in 0.07 seconds.
Number of clusters: 63
Clustered 97 points down to 63 points, for 35.05% compression in 0.20 seconds.
Number of clusters: 43
Clustered 52 points down to 43 points, for 17.31% compression in 0.13 seconds.
Number of clusters: 20
Clustered 32 points down to 20 points, for 37.50% compression in 0.07 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 27
Clustered 37 points down to 27 points, for 27.03% compression in 0.09 seconds.
Number of clusters: 28
Clustered 46 points down to 28 points, for 39.13% compression in 0.10 seconds.


  after removing the cwd from sys.path.


Number of clusters: 70
Clustered 102 points down to 70 points, for 31.37% compression in 0.23 seconds.
Number of clusters: 38
Clustered 52 points down to 38 points, for 26.92% compression in 0.13 seconds.
Number of clusters: 52


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 76 points down to 52 points, for 31.58% compression in 0.16 seconds.
Number of clusters: 29
Clustered 38 points down to 29 points, for 23.68% compression in 0.10 seconds.
Number of clusters: 53


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 82 points down to 53 points, for 35.37% compression in 0.16 seconds.
Number of clusters: 42
Clustered 59 points down to 42 points, for 28.81% compression in 0.14 seconds.
Number of clusters: 6
Clustered 6 points down to 6 points, for 0.00% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 13
Clustered 15 points down to 13 points, for 13.33% compression in 0.07 seconds.
Number of clusters: 16
Clustered 20 points down to 16 points, for 20.00% compression in 0.06 seconds.
Number of clusters: 30
Clustered 54 points down to 30 points, for 44.44% compression in 0.10 seconds.
Number of clusters: 29
Clustered 36 points down to 29 points, for 19.44% compression in 0.10 seconds.
Number of clusters: 35


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 57 points down to 35 points, for 38.60% compression in 0.13 seconds.
Number of clusters: 51
Clustered 78 points down to 51 points, for 34.62% compression in 0.17 seconds.
Number of clusters: 54


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 73 points down to 54 points, for 26.03% compression in 0.17 seconds.
Number of clusters: 84


  after removing the cwd from sys.path.


Clustered 133 points down to 84 points, for 36.84% compression in 0.26 seconds.
Number of clusters: 47
Clustered 64 points down to 47 points, for 26.56% compression in 0.15 seconds.
Number of clusters: 44


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 49 points down to 44 points, for 10.20% compression in 0.14 seconds.
Number of clusters: 24
Clustered 46 points down to 24 points, for 47.83% compression in 0.09 seconds.
Number of clusters: 40


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 52 points down to 40 points, for 23.08% compression in 0.13 seconds.
Number of clusters: 55
Clustered 64 points down to 55 points, for 14.06% compression in 0.17 seconds.
Number of clusters: 104


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 168 points down to 104 points, for 38.10% compression in 0.32 seconds.
Number of clusters: 19
Clustered 31 points down to 19 points, for 38.71% compression in 0.07 seconds.
Number of clusters: 42


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 46 points down to 42 points, for 8.70% compression in 0.14 seconds.
Number of clusters: 16
Clustered 26 points down to 16 points, for 38.46% compression in 0.06 seconds.
Number of clusters: 14
Clustered 17 points down to 14 points, for 17.65% compression in 0.06 seconds.
Number of clusters: 35


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 43 points down to 35 points, for 18.60% compression in 0.12 seconds.
Number of clusters: 16
Clustered 24 points down to 16 points, for 33.33% compression in 0.06 seconds.
Number of clusters: 12
Clustered 13 points down to 12 points, for 7.69% compression in 0.06 seconds.
Number of clusters: 17
Clustered 26 points down to 17 points, for 34.62% compression in 0.06 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 80
Clustered 95 points down to 80 points, for 15.79% compression in 0.24 seconds.
Number of clusters: 11
Clustered 15 points down to 11 points, for 26.67% compression in 0.05 seconds.
Number of clusters: 33
Clustered 36 points down to 33 points, for 8.33% compression in 0.10 seconds.
Number of clusters: 13


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 14 points down to 13 points, for 7.14% compression in 0.05 seconds.
Number of clusters: 25
Clustered 30 points down to 25 points, for 16.67% compression in 0.09 seconds.
Number of clusters: 16
Clustered 25 points down to 16 points, for 36.00% compression in 0.06 seconds.
Number of clusters: 25


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 26 points down to 25 points, for 3.85% compression in 0.09 seconds.
Number of clusters: 44
Clustered 74 points down to 44 points, for 40.54% compression in 0.14 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 17


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 27 points down to 17 points, for 37.04% compression in 0.07 seconds.
Number of clusters: 13
Clustered 14 points down to 13 points, for 7.14% compression in 0.05 seconds.
Number of clusters: 27
Clustered 44 points down to 27 points, for 38.64% compression in 0.10 seconds.
Number of clusters: 4
Clustered 5 points down to 4 points, for 20.00% compression in 0.03 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 22
Clustered 32 points down to 22 points, for 31.25% compression in 0.08 seconds.
Number of clusters: 16
Clustered 17 points down to 16 points, for 5.88% compression in 0.06 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 76


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 111 points down to 76 points, for 31.53% compression in 0.25 seconds.
Number of clusters: 20
Clustered 22 points down to 20 points, for 9.09% compression in 0.07 seconds.
Number of clusters: 116


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 165 points down to 116 points, for 29.70% compression in 0.35 seconds.
Number of clusters: 61


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 75 points down to 61 points, for 18.67% compression in 0.21 seconds.
Number of clusters: 39
Clustered 72 points down to 39 points, for 45.83% compression in 0.13 seconds.
Number of clusters: 63
Clustered 74 points down to 63 points, for 14.86% compression in 0.21 seconds.
Number of clusters: 21
Clustered 30 points down to 21 points, for 30.00% compression in 0.09 seconds.
Number of clusters: 7
Clustered 8 points down to 7 points, for 12.50% compression in 0.03 seconds.
Number of clusters: 4
Clustered 5 points down to 4 points, for 20.00% compression in 0.03 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 35
Clustered 43 points down to 35 points, for 18.60% compression in 0.13 seconds.
Number of clusters: 11
Clustered 12 points down to 11 points, for 8.33% compression in 0.05 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 17
Clustered 23 points down to 17 points, for 26.09% compression in 0.07 seconds.
Number of clusters: 26
Clustered 32 points down to 26 points, for 18.75% compression in 0.09 seconds.
Number of clusters: 31


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 44 points down to 31 points, for 29.55% compression in 0.10 seconds.
Number of clusters: 34
Clustered 56 points down to 34 points, for 39.29% compression in 0.13 seconds.
Number of clusters: 7
Clustered 8 points down to 7 points, for 12.50% compression in 0.05 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 22
Clustered 45 points down to 22 points, for 51.11% compression in 0.09 seconds.
Number of clusters: 14
Clustered 16 points down to 14 points, for 12.50% compression in 0.06 seconds.
Number of clusters: 37


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 50 points down to 37 points, for 26.00% compression in 0.12 seconds.
Number of clusters: 22
Clustered 29 points down to 22 points, for 24.14% compression in 0.08 seconds.
Number of clusters: 39


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 43 points down to 39 points, for 9.30% compression in 0.12 seconds.
Number of clusters: 64
Clustered 91 points down to 64 points, for 29.67% compression in 0.20 seconds.
Number of clusters: 10
Clustered 10 points down to 10 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 34
Clustered 52 points down to 34 points, for 34.62% compression in 0.12 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 33
Clustered 37 points down to 33 points, for 10.81% compression in 0.13 seconds.
Number of clusters: 51


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 56 points down to 51 points, for 8.93% compression in 0.18 seconds.
Number of clusters: 27
Clustered 39 points down to 27 points, for 30.77% compression in 0.09 seconds.
Number of clusters: 20
Clustered 21 points down to 20 points, for 4.76% compression in 0.08 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 41
Clustered 53 points down to 41 points, for 22.64% compression in 0.18 seconds.
Number of clusters: 18
Clustered 26 points down to 18 points, for 30.77% compression in 0.09 seconds.
Number of clusters: 55
Clustered 63 points down to 55 points, for 12.70% compression in 0.19 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 21
Clustered 29 points down to 21 points, for 27.59% compression in 0.07 seconds.
Number of clusters: 21
Clustered 27 points down to 21 points, for 22.22% compression in 0.08 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 41
Clustered 46 points down to 41 points, for 10.87% compression in 0.13 seconds.
Number of clusters: 11
Clustered 16 points down to 11 points, for 31.25% compression in 0.05 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 16
Clustered 22 points down to 16 points, for 27.27% compression in 0.07 seconds.
Number of clusters: 37
Clustered 50 points down to 37 points, for 26.00% compression in 0.13 seconds.
Number of clusters: 14
Clustered 14 points down to 14 points, for 0.00% compression in 0.06 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 48


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 91 points down to 48 points, for 47.25% compression in 0.16 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 4
Clustered 4 points down to 4 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 30
Clustered 36 points down to 30 points, for 16.67% compression in 0.10 seconds.
Number of clusters: 3
Clustered 5 points down to 3 points, for 40.00% compression in 0.03 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 15
Clustered 16 points down to 15 points, for 6.25% compression in 0.06 seconds.
Number of clusters: 41


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 52 points down to 41 points, for 21.15% compression in 0.16 seconds.
Number of clusters: 72
Clustered 131 points down to 72 points, for 45.04% compression in 0.24 seconds.
Number of clusters: 18
Clustered 20 points down to 18 points, for 10.00% compression in 0.07 seconds.
Number of clusters: 39
Clustered 50 points down to 39 points, for 22.00% compression in 0.12 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 14
Clustered 22 points down to 14 points, for 36.36% compression in 0.06 seconds.
Number of clusters: 69
Clustered 81 points down to 69 points, for 14.81% compression in 0.21 seconds.
Number of clusters: 12
Clustered 14 points down to 12 points, for 14.29% compression in 0.05 seconds.
Number of clusters: 21
Clustered 41 points down to 21 points, for 48.78% compression in 0.08 seconds.
Number of clusters: 25


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 44 points down to 25 points, for 43.18% compression in 0.09 seconds.
Number of clusters: 12
Clustered 12 points down to 12 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 23
Clustered 28 points down to 23 points, for 17.86% compression in 0.08 seconds.
Number of clusters: 37


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 47 points down to 37 points, for 21.28% compression in 0.12 seconds.
Number of clusters: 14
Clustered 19 points down to 14 points, for 26.32% compression in 0.05 seconds.
Number of clusters: 31
Clustered 36 points down to 31 points, for 13.89% compression in 0.10 seconds.
Number of clusters: 21


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 28 points down to 21 points, for 25.00% compression in 0.08 seconds.
Number of clusters: 26
Clustered 33 points down to 26 points, for 21.21% compression in 0.09 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 27


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 30 points down to 27 points, for 10.00% compression in 0.10 seconds.
Number of clusters: 27
Clustered 31 points down to 27 points, for 12.90% compression in 0.10 seconds.
Number of clusters: 40


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 59 points down to 40 points, for 32.20% compression in 0.13 seconds.
Number of clusters: 22
Clustered 25 points down to 22 points, for 12.00% compression in 0.08 seconds.
Number of clusters: 15
Clustered 16 points down to 15 points, for 6.25% compression in 0.06 seconds.
Number of clusters: 18


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 22 points down to 18 points, for 18.18% compression in 0.07 seconds.
Number of clusters: 22
Clustered 22 points down to 22 points, for 0.00% compression in 0.08 seconds.
Number of clusters: 52


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 68 points down to 52 points, for 23.53% compression in 0.16 seconds.
Number of clusters: 45
Clustered 54 points down to 45 points, for 16.67% compression in 0.14 seconds.
Number of clusters: 12
Clustered 18 points down to 12 points, for 33.33% compression in 0.05 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 9
Clustered 9 points down to 9 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 5
Clustered 6 points down to 5 points, for 16.67% compression in 0.04 seconds.
Number of clusters: 18
Clustered 20 points down to 18 points, for 10.00% compression in 0.07 seconds.
Number of clusters: 48


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 61 points down to 48 points, for 21.31% compression in 0.15 seconds.
Number of clusters: 26
Clustered 29 points down to 26 points, for 10.34% compression in 0.10 seconds.
Number of clusters: 4
Clustered 9 points down to 4 points, for 55.56% compression in 0.03 seconds.
Number of clusters: 4
Clustered 4 points down to 4 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 9
Clustered 14 points down to 9 points, for 35.71% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 11
Clustered 13 points down to 11 points, for 15.38% compression in 0.05 seconds.
Number of clusters: 26
Clustered 44 points down to 26 points, for 40.91% compression in 0.09 seconds.
Number of clusters: 49
Clustered 73 points down to 49 points, for 32.88% compression in 0.15 seconds.
Number of clusters: 71


  after removing the cwd from sys.path.


Clustered 94 points down to 71 points, for 24.47% compression in 0.21 seconds.
Number of clusters: 17
Clustered 20 points down to 17 points, for 15.00% compression in 0.07 seconds.
Number of clusters: 19
Clustered 22 points down to 19 points, for 13.64% compression in 0.09 seconds.
Number of clusters: 134


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 183 points down to 134 points, for 26.78% compression in 0.42 seconds.
Number of clusters: 31
Clustered 39 points down to 31 points, for 20.51% compression in 0.10 seconds.
Number of clusters: 72


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 96 points down to 72 points, for 25.00% compression in 0.21 seconds.
Number of clusters: 9
Clustered 9 points down to 9 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 46
Clustered 74 points down to 46 points, for 37.84% compression in 0.14 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 7
Clustered 14 points down to 7 points, for 50.00% compression in 0.04 seconds.
Number of clusters: 46
Clustered 57 points down to 46 points, for 19.30% compression in 0.14 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 39
Clustered 50 points down to 39 points, for 22.00% compression in 0.13 seconds.
Number of clusters: 7
Clustered 9 points down to 7 points, for 22.22% compression in 0.04 seconds.
Number of clusters: 18


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 18 points down to 18 points, for 0.00% compression in 0.07 seconds.
Number of clusters: 7
Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 40
Clustered 60 points down to 40 points, for 33.33% compression in 0.13 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 6
Clustered 6 points down to 6 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 8
Clustered 9 points down to 8 points, for 11.11% compression in 0.04 seconds.
Number of clusters: 9
Clustered 9 points down to 9 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 13
Clustered 13 points down to 13 points, for 0.00% compression in 0.06 seconds.
Number of clusters: 4
Clustered 4 points down to 4 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 45
Clustered 65 points down to 45 points, for 30.77% compression in 0.15 seconds.
Number of clusters: 15


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 15 points down to 15 points, for 0.00% compression in 0.06 seconds.
Number of clusters: 10
Clustered 12 points down to 10 points, for 16.67% compression in 0.04 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 19
Clustered 19 points down to 19 points, for 0.00% compression in 0.07 seconds.
Number of clusters: 15
Clustered 17 points down to 15 points, for 11.76% compression in 0.06 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 27
Clustered 31 points down to 27 points, for 12.90% compression in 0.08 seconds.
Number of clusters: 26
Clustered 28 points down to 26 points, for 7.14% compression in 0.10 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 14
Clustered 15 points down to 14 points, for 6.67% compression in 0.06 seconds.
Number of clusters: 25
Clustered 26 points down to 25 points, for 3.85% compression in 0.08 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 14
Clustered 17 points down to 14 points, for 17.65% compression in 0.06 seconds.
Number of clusters: 30
Clustered 36 points down to 30 points, for 16.67% compression in 0.09 seconds.
Number of clusters: 23


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 33 points down to 23 points, for 30.30% compression in 0.09 seconds.
Number of clusters: 10
Clustered 10 points down to 10 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 77


  after removing the cwd from sys.path.


Clustered 145 points down to 77 points, for 46.90% compression in 0.25 seconds.
Number of clusters: 15
Clustered 18 points down to 15 points, for 16.67% compression in 0.06 seconds.
Number of clusters: 13
Clustered 13 points down to 13 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 88


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 120 points down to 88 points, for 26.67% compression in 0.27 seconds.
Number of clusters: 13
Clustered 13 points down to 13 points, for 0.00% compression in 0.06 seconds.
Number of clusters: 33
Clustered 38 points down to 33 points, for 13.16% compression in 0.10 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 67
Clustered 89 points down to 67 points, for 24.72% compression in 0.21 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 11
Clustered 13 points down to 11 points, for 15.38% compression in 0.05 seconds.
Number of clusters: 37
Clustered 48 points down to 37 points, for 22.92% compression in 0.12 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 6
Clustered 6 points down to 6 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 28
Clustered 30 points down to 28 points, for 6.67% compression in 0.09 seconds.
Number of clusters: 27


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 38 points down to 27 points, for 28.95% compression in 0.10 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 21
Clustered 27 points down to 21 points, for 22.22% compression in 0.08 seconds.
Number of clusters: 16
Clustered 22 points down to 16 points, for 27.27% compression in 0.06 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 23
Clustered 23 points down to 23 points, for 0.00% compression in 0.09 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 17
Clustered 28 points down to 17 points, for 39.29% compression in 0.07 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 6
Clustered 6 points down to 6 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 13
Clustered 15 points down to 13 points, for 13.33% compression in 0.06 seconds.
Number of clusters: 6
Clustered 6 points down to 6 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 9
Clustered 9 points down to 9 points, for 0.00% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 29
Clustered 35 points down to 29 points, for 17.14% compression in 0.10 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 9
Clustered 9 points down to 9 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 7


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 7
Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 20
Clustered 23 points down to 20 points, for 13.04% compression in 0.07 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 10
Clustered 11 points down to 10 points, for 9.09% compression in 0.05 seconds.
Number of clusters: 41
Clustered 48 points down to 41 points, for 14.58% compression in 0.13 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 19
Clustered 22 points down to 19 points, for 13.64% compression in 0.07 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 11
Clustered 11 points down to 11 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 21
Clustered 24 points down to 21 points, for 12.50% compression in 0.09 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 4
Clustered 4 points down to 4 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 8
Clustered 8 points down to 8 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 14
Clustered 16 points down to 14 points, for 12.50% compression in 0.06 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 18
Clustered 23 points down to 18 points, for 21.74% compression in 0.08 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 7
Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 6
Clustered 6 points down to 6 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 7
Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 24
Clustered 33 points down to 24 points, for 27.27% compression in 0.08 seconds.
Number of clusters: 33
Clustered 47 points down to 33 points, for 29.79% compression in 0.11 seconds.
Number of clusters: 9
Clustered 9 points down to 9 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 44
Clustered 55 points down to 44 points, for 20.00% compression in 0.14 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 31
Clustered 39 points down to 31 points, for 20.51% compression in 0.11 seconds.
Number of clusters: 14
Clustered 14 points down to 14 points, for 0.00% compression in 0.06 seconds.
Number of clusters: 11


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 17 points down to 11 points, for 35.29% compression in 0.05 seconds.
Number of clusters: 31
Clustered 50 points down to 31 points, for 38.00% compression in 0.11 seconds.
Number of clusters: 12
Clustered 13 points down to 12 points, for 7.69% compression in 0.05 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 13
Clustered 15 points down to 13 points, for 13.33% compression in 0.05 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 7
Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 3
Clustered 4 points down to 3 points, for 25.00% compression in 0.03 seconds.
Number of clusters: 7
Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 13


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 14 points down to 13 points, for 7.14% compression in 0.06 seconds.
Number of clusters: 4
Clustered 4 points down to 4 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 12
Clustered 12 points down to 12 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 20
Clustered 24 points down to 20 points, for 16.67% compression in 0.08 seconds.
Number of clusters: 4
Clustered 4 points down to 4 points, for 0.00% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 7
Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 39
Clustered 44 points down to 39 points, for 11.36% compression in 0.12 seconds.
Number of clusters: 11
Clustered 11 points down to 11 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 14
Clustered 14 points down to 14 points, for 0.00% compression in 0.08 seconds.
Number of clusters: 7
Clustered 7 points down to 7 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 14


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 16 points down to 14 points, for 12.50% compression in 0.06 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 6
Clustered 8 points down to 6 points, for 25.00% compression in 0.04 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 16


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Clustered 17 points down to 16 points, for 5.88% compression in 0.06 seconds.
Number of clusters: 5
Clustered 6 points down to 5 points, for 16.67% compression in 0.04 seconds.
Number of clusters: 8
Clustered 12 points down to 8 points, for 33.33% compression in 0.04 seconds.
Number of clusters: 31
Clustered 52 points down to 31 points, for 40.38% compression in 0.11 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 14
Clustered 14 points down to 14 points, for 0.00% compression in 0.06 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.02 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 8
Clustered 8 points down to 8 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 14
Clustered 15 points down to 14 points, for 6.67% compression in 0.05 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 8
Clustered 9 points down to 8 points, for 11.11% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 13
Clustered 13 points down to 13 points, for 0.00% compression in 0.06 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 6
Clustered 6 points down to 6 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 9
Clustered 9 points down to 9 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 9
Clustered 9 points down to 9 points, for 0.00% compression in 0.05 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.04 seconds.
Number of clusters: 9
Clustered 10 points down to 9 points, for 10.00% compression in 0.04 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 8
Clustered 9 points down to 8 points, for 11.11% compression in 0.04 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 1
Clustered 1 points down to 1 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 5
Clustered 5 points down to 5 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 2
Clustered 2 points down to 2 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.02 seconds.
Number of clusters: 4
Clustered 4 points down to 4 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 3
Clustered 3 points down to 3 points, for 0.00% compression in 0.03 seconds.
Number of clusters: 4
Clustered 4 points down to 4 points, for 0.00% compression in 0.03 seconds.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


In [990]:
idx = df_clustered.groupby(['id'])['number_of_points'].transform(max) == df_clustered['number_of_points']

In [991]:
df_clustered2 = df_clustered[idx]

In [1111]:
#Save a csv of our locations
df_clustered2.to_csv("C:/Users/alexd/Downloads/cgr_assignments.csv")

In [994]:
df_clustered2['row_order'] = df_clustered2.index

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [995]:
#If there are no clusters, take row 0.
df_clustered2 = df_clustered2[(df_clustered2.number_of_points > 1) | (df_clustered2.row_order == 0)]

In [996]:
#For competing clusters, take the earlier data point.
df_clustered2 = df_clustered2.groupby('id').first().reset_index()

In [1074]:
coords = df_clustered2.as_matrix(columns=['lat', 'lon'])
cluster = AgglomerativeClustering(n_clusters=37, affinity='euclidean', linkage='complete')
clustergroups = cluster.fit_predict(coords)
df_clustered2['clusters'] = clustergroups

  """Entry point for launching an IPython kernel.


In [1109]:
#This creates a map with the customers/assignments. I created the color assignments using trial and error.
locations = df_clustered2[['lat', 'lon']]
locationlist = locations.values.tolist()
colordict = {0: 'darkblue', 1: 'green', 2: 'red', 3: 'cadetblue', 4: 'darkred', 5: 'red', 6: 'lightgreen', 7: 'orange', 8: 'red', 9: 'green', 10: 'purple', 11: 'red', 12: 'cadetblue', 13: 'darkblue', 14: 'white', 15: 'purple', 16: 'darkblue', 17: 'orange', 18: 'darkblue', 19: 'green', 20: 'red', 21: 'darkblue', 22: 'red', 23: 'white', 24: 'red', 25: 'white', 26: 'green', 27: 'white', 28: 'darkblue', 29: 'orange', 30: 'green', 31: 'purple', 32: 'darkblue', 33: 'orange', 34: 'darkblue' , 35: 'green', 36: 'darkblue', 37: 'beige'}
map = folium.Map(location=[df_clustered2['lat'].mean(), df_clustered2['lon'].mean()], zoom_start=12)
for lat, lon, shop, cluster, name, phone in zip(df_clustered2['lat'], df_clustered2['lon'], df_clustered2['id'], df_clustered2['clusters'], df_clustered2['name'], df_clustered2['phone_number']):
    folium.CircleMarker([lat, lon], popup='Name: ' + str(name).capitalize() + '  Phone: ' + str(phone) + '  Cluster: ' + str(cluster) + '  ID: ' + str(shop), color=colordict[cluster], fill=True).add_to(map)
map

In [1116]:
#After reviewing and manually reassigning certain customers in excel, here is the final result.
df = pd.read_csv("C:/Users/alexd/Downloads/cgr_assignments.csv", header=0)

In [1120]:
locations = df[['lat', 'lon']]
locationlist = locations.values.tolist()
map = folium.Map(location=[df['lat'].mean(), df['lon'].mean()], zoom_start=12)
for lat, lon, shop, name, color in zip(df['lat'], df['lon'], df['id'], df['name'], df['colors']):
    folium.CircleMarker([lat, lon], popup='Name: ' + str(name).capitalize() + '  Region: ' + str(color).capitalize() + '  ID: ' + str(shop), color=(color), fill=True).add_to(map)
map

In [1121]:
map.save("C:/Users/alexd/Downloads/NW_map.html")