In [1]:
%matplotlib inline
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations
from sklearn.cluster import KMeans
import folium
# Load data
data = pd.read_excel('Map_of_Schools_filter.xlsx')

# Color options
color_options = {'Campus Name': 'red',
                 'Category': 'yellow',
                 'flow': 'purple',
                 'cog': 'blue',
                 'candidate': 'black',
                 'other': 'gray'}
# Instantiate map
m = folium.Map(location=data[['Latitude', 'Longitude']].mean(),
               fit_bounds=[[data['Latitude'].min(),
                            data['Longitude'].min()],
                           [data['Latitude'].max(),
                            data['Longitude'].max()]])
# Add volume points
for _, row in data.iterrows():
    folium.CircleMarker(location=[row['Latitude'],
                                  row['Longitude']],
                        radius=(row['Volume']**0.5),
                        tooltip=str(row['Campus Name'])+' '+str(row['Volume'])).add_to(m)
                                #row['Longitude']]).add_to(m)
# Zoom based on volume points
m.fit_bounds(data[['Latitude', 'Longitude']].values.tolist())
# Show the map
m

ModuleNotFoundError: No module named 'folium'

In [None]:
# The outbound shipments cost twice as much as inbound shipments
IB_OB_ratio = 2
def loc_type_mult(x):
    """A function to get the volume multiplier based on the location type and the IB-OB ratio.
    x: The location type
    """
    if x.lower() == 'supply':
        # No need to divide since we are already multiplying the demand
        return 1
    elif x.lower() == 'demand':
        # Only apply multiplier to demand
        return IB_OB_ratio
    else:
        # If neither supply nor demand, remove entirely
        return 0
# Adjust volumes used in the computation based on IB-OB ratio
data['Calc_Vol'] = data['Location Type'].apply(str).apply(loc_type_mult)*data['Volume']

In [None]:
# Fit K-means for 2 centroids
kmeans = KMeans(n_clusters=2, 
                random_state=0).fit(data.loc[data['Calc_Vol']>0, ['Latitude', 
                                                                  'Longitude']], 
                                    sample_weight=data.loc[data['Calc_Vol']>0, 
                                                           'Calc_Vol'])
# Get centers of gravity from K-means
cogs = kmeans.cluster_centers_
cogs = pd.DataFrame(cogs, columns=['Latitude',
                                   'Longitude'])
# Get volume assigned to each cluster
data['Cluster'] = kmeans.predict(data[['Latitude', 'Longitude']])
cogs = cogs.join(data.groupby('Cluster')['Volume'].sum())
# Include assigned COG coordinates in data by point 
data = data.join(cogs, on='Cluster', rsuffix='_COG')

In [None]:
# Add flow lines to centers of gravity to map
for _, row in data.iterrows():
    # Flow lines
    if str(row['Location Type']).lower() in (['demand', 'supply']):
        folium.PolyLine([(row['Latitude'],
                          row['Longitude']),
                         (row['Latitude_COG'],
                          row['Longitude_COG'])],
                        color=color_options['flow'],
                        weight=(row['Volume']**0.5),
                        opacity=0.8).add_to(m)
                        
# Add centers of gravity to map
for _, row in cogs.iterrows():
    # New centers of gravity
    folium.CircleMarker(location=[row['Latitude'],
                                  row['Longitude']],
                        radius=(row['Volume']**0.5),
                        color=color_options['other'],
                        tooltip=row['Volume']).add_to(m)
    
# Show map
m