In [10]:
%matplotlib inline
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations 
from sklearn.cluster import KMeans
import folium
# Load data
data = pd.read_csv('C:/Users\dmckenzie\Downloads\Dummy Data For Center Of Gravity.csv')

In [11]:
data.dtypes

Location Name     object
Latitude         float64
Longitude        float64
Volume           float64
Location Type     object
dtype: object

In [12]:
# Color options
color_options = {'demand': 'green',
                 'supply': 'blue',
                 'flow': 'black',
                 'cog': 'blue',
                 'candidate': 'black',
                 'other': 'gray'}
# Instantiate map
m = folium.Map(location=data[['Latitude', 'Longitude']].mean(),
               fit_bounds=[[data['Latitude'].min(),
                            data['Longitude'].min()], 
                           [data['Latitude'].max(),
                            data['Longitude'].max()]])
                            
# Add volume points
for _, row in data.iterrows():
    folium.CircleMarker(location=[row['Latitude'], 
                                  row['Longitude']],
                        radius=(row['Volume']**0.35),
                        color=color_options.get(str(row['Location Type']).lower(), 'gray'),
                        tooltip=str(row['Location Name'])+' '+str(row['Volume'])).add_to(m)
                                #row['Longitude']]).add_to(m)
        
# Zoom based on volume points
m.fit_bounds(data[['Latitude', 'Longitude']].values.tolist())
# Show the map
m

In [13]:
m.save("Healthy life expectancy.html")

In [14]:
# The outbound shipments cost twice as much as inbound shipments
IB_OB_ratio = 2
def loc_type_mult(x):
    """A function to get the volume multiplier based on the location type and the IB-OB ratio.
    x: The location type
    """
    if x.lower() == 'supply':
        # No need to divide since we are already multiplying the demand
        return 1
    elif x.lower() == 'demand':
        # Only apply multiplier to demand
        return IB_OB_ratio
    else:
        # If neither supply nor demand, remove entirely
        return 0
# Adjust volumes used in the computation based on IB-OB ratio
data['Calc_Vol'] = data['Location Type'].apply(str).apply(loc_type_mult)*data['Volume']

In [15]:
# Fit K-means for 2 centroids
kmeans = KMeans(n_clusters=2, 
                random_state=0).fit(data.loc[data['Calc_Vol']>0, ['Latitude', 
                                                                  'Longitude']], 
                                    sample_weight=data.loc[data['Calc_Vol']>0, 
                                                           'Calc_Vol'])
# Get centers of gravity from K-means
cogs = kmeans.cluster_centers_
cogs = pd.DataFrame(cogs, columns=['Latitude',
                                   'Longitude'])
# Get volume assigned to each cluster
data['Cluster'] = kmeans.predict(data[['Latitude', 'Longitude']])
cogs = cogs.join(data.groupby('Cluster')['Volume'].sum())
# Include assigned COG coordinates in data by point 
data = data.join(cogs, on='Cluster', rsuffix='_COG')

In [16]:
data

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol,Cluster,Latitude_COG,Longitude_COG,Volume_COG
0,"Mahattan, NY",40.7831,-73.9712,5000.0,Demand,10000.0,0,39.656208,-74.717415,17000.0
1,"New Haven, CT",41.2982,-72.9991,1000.0,Demand,2000.0,0,39.656208,-74.717415,17000.0
2,"Chicago, IL",41.8333,-88.0121,4000.0,Demand,8000.0,1,39.661377,-87.647985,9000.0
3,"Boston, MA",42.3142,-71.1103,3000.0,Demand,6000.0,0,39.656208,-74.717415,17000.0
4,"Raleigh, NC",35.8436,-78.7851,8000.0,Supply,8000.0,0,39.656208,-74.717415,17000.0
5,"Nashville, TN",36.1863,-87.0654,5000.0,Supply,5000.0,1,39.661377,-87.647985,9000.0
6,"Charlottesville, VA",38.04,-78.5199,,Candidate,,0,39.656208,-74.717415,17000.0
7,"Harrisburg, PA",40.3394,-77.0077,,Candidate,,0,39.656208,-74.717415,17000.0
8,"Columbus, OH",39.9828,-83.1309,,Candidate,,1,39.661377,-87.647985,9000.0
9,"Lafayette, IN",40.4049,-86.9282,,Candidate,,1,39.661377,-87.647985,9000.0


In [18]:
# Add flow lines to centers of gravity to map
for _, row in data.iterrows():
    # Flow lines
    if str(row['Location Type']).lower() in (['demand', 'supply']):
        folium.PolyLine([(row['Latitude'],
                          row['Longitude']),
                         (row['Latitude_COG'],
                          row['Longitude_COG'])],
                        color=color_options['flow'],
                        weight=(row['Volume']**0.25),
                        opacity=0.8).add_to(m)
                        
# Add centers of gravity to map
for _, row in cogs.iterrows():
    # New centers of gravity
    folium.CircleMarker(location=[row['Latitude'],
                                  row['Longitude']],
                        radius=(row['Volume']**0.35),
                        color=color_options['cog'],
                        tooltip=row['Volume']).add_to(m)
    
# Show map
m

KeyError: 'Latitude_COG'

In [17]:
%matplotlib inline
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations 
from tqdm import tqdm
from sklearn.cluster import KMeans
import folium
# Load data
data = pd.read_csv('C:/Users\dmckenzie\Downloads\Dummy Data For Center Of Gravity.csv')                             
# Color options
color_options = {'demand': 'red',
                 'supply': 'yellow',
                 'flow': 'black',
                 'cog': 'blue',
                 'candidate': 'green',
                 'other': 'gray'}
# Instantiate map
m = folium.Map(location=data[['Latitude', 'Longitude']].mean(),
               fit_bounds=[[data['Latitude'].min(),
                            data['Longitude'].min()], 
                           [data['Latitude'].max(),
                            data['Longitude'].max()]])
                            
# Add volume points
for _, row in data.iterrows():
    folium.CircleMarker(location=[row['Latitude'], 
                                  row['Longitude']],
                        radius=(20),
                        color=color_options.get(str(row['Location Type']).lower(), 'gray'),
                        tooltip=str(row['Location Name'])+' '+str(row['Volume'])).add_to(m)
                                #row['Longitude']]).add_to(m)
        
# Zoom based on volume points
m.fit_bounds(data[['Latitude', 'Longitude']].values.tolist())
# Show the map
m


In [177]:
data

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type
0,"Mahattan, NY",40.7831,-73.9712,5000.0,Demand
1,"New Haven, CT",41.2982,-72.9991,1000.0,Demand
2,"Chicago, IL",41.8333,-88.0121,4000.0,Demand
3,"Boston, MA",42.3142,-71.1103,3000.0,Demand
4,"Raleigh, NC",35.8436,-78.7851,8000.0,Supply
5,"Nashville, TN",36.1863,-87.0654,5000.0,Supply
6,"Charlottesville, VA",38.04,-78.5199,,Candidate
7,"Harrisburg, PA",40.3394,-77.0077,,Candidate
8,"Columbus, OH",39.9828,-83.1309,,Candidate
9,"Lafayette, IN",40.4049,-86.9282,,Candidate


In [178]:
n=2

In [179]:
data['Calc_Vol'] = data['Location Type'].apply(str).apply(loc_type_mult)*data['Volume']
cands = data.loc[data['Location Type'].str.lower()=='candidate']
locs = data.loc[data['Calc_Vol']>0]
total_dist = np.inf
best_cogs = []


In [180]:
data

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol
0,"Mahattan, NY",40.7831,-73.9712,5000.0,Demand,10000.0
1,"New Haven, CT",41.2982,-72.9991,1000.0,Demand,2000.0
2,"Chicago, IL",41.8333,-88.0121,4000.0,Demand,8000.0
3,"Boston, MA",42.3142,-71.1103,3000.0,Demand,6000.0
4,"Raleigh, NC",35.8436,-78.7851,8000.0,Supply,8000.0
5,"Nashville, TN",36.1863,-87.0654,5000.0,Supply,5000.0
6,"Charlottesville, VA",38.04,-78.5199,,Candidate,
7,"Harrisburg, PA",40.3394,-77.0077,,Candidate,
8,"Columbus, OH",39.9828,-83.1309,,Candidate,
9,"Lafayette, IN",40.4049,-86.9282,,Candidate,


In [181]:
cands

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol
6,"Charlottesville, VA",38.04,-78.5199,,Candidate,
7,"Harrisburg, PA",40.3394,-77.0077,,Candidate,
8,"Columbus, OH",39.9828,-83.1309,,Candidate,
9,"Lafayette, IN",40.4049,-86.9282,,Candidate,
10,"Buffalo, NY",42.8962,-78.9344,,Candidate,


In [182]:
locs

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol
0,"Mahattan, NY",40.7831,-73.9712,5000.0,Demand,10000.0
1,"New Haven, CT",41.2982,-72.9991,1000.0,Demand,2000.0
2,"Chicago, IL",41.8333,-88.0121,4000.0,Demand,8000.0
3,"Boston, MA",42.3142,-71.1103,3000.0,Demand,6000.0
4,"Raleigh, NC",35.8436,-78.7851,8000.0,Supply,8000.0
5,"Nashville, TN",36.1863,-87.0654,5000.0,Supply,5000.0


In [183]:
total_dist

inf

In [187]:
qq=list(combinations(cands.index, n))[0]

In [188]:
cands.loc[list(qq)]

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol
6,"Charlottesville, VA",38.04,-78.5199,,Candidate,
7,"Harrisburg, PA",40.3394,-77.0077,,Candidate,


In [189]:
locs

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol
0,"Mahattan, NY",40.7831,-73.9712,5000.0,Demand,10000.0
1,"New Haven, CT",41.2982,-72.9991,1000.0,Demand,2000.0
2,"Chicago, IL",41.8333,-88.0121,4000.0,Demand,8000.0
3,"Boston, MA",42.3142,-71.1103,3000.0,Demand,6000.0
4,"Raleigh, NC",35.8436,-78.7851,8000.0,Supply,8000.0
5,"Nashville, TN",36.1863,-87.0654,5000.0,Supply,5000.0


In [193]:
for i_l, r_l in locs.iterrows():
    print( i_l,r_l)

0 Location Name    Mahattan, NY
Latitude              40.7831
Longitude            -73.9712
Volume                 5000.0
Location Type          Demand
Calc_Vol              10000.0
Name: 0, dtype: object
1 Location Name    New Haven, CT
Latitude               41.2982
Longitude             -72.9991
Volume                  1000.0
Location Type           Demand
Calc_Vol                2000.0
Name: 1, dtype: object
2 Location Name    Chicago, IL
Latitude             41.8333
Longitude           -88.0121
Volume                4000.0
Location Type         Demand
Calc_Vol              8000.0
Name: 2, dtype: object
3 Location Name    Boston, MA
Latitude            42.3142
Longitude          -71.1103
Volume               3000.0
Location Type        Demand
Calc_Vol             6000.0
Name: 3, dtype: object
4 Location Name    Raleigh, NC
Latitude             35.8436
Longitude           -78.7851
Volume                8000.0
Location Type         Supply
Calc_Vol              8000.0
Name: 4, dtype: 

In [197]:
list(locs.iterrows())[0]

(0,
 Location Name    Mahattan, NY
 Latitude              40.7831
 Longitude            -73.9712
 Volume                 5000.0
 Location Type          Demand
 Calc_Vol              10000.0
 Name: 0, dtype: object)

In [171]:
data['Calc_Vol'] = data['Location Type'].apply(str).apply(loc_type_mult)*data['Volume']
cands = data.loc[data['Location Type'].str.lower()=='candidate']
locs = data.loc[data['Calc_Vol']>0]
total_dist = np.inf
best_cogs = []
# Loop to find best combination of candidate sites
for i in tqdm(list(combinations(cands.index, n))):
    temp_cands = cands.loc[list(i)]
    locs['Cluster'] = 0
    locs['Distance_COG'] = np.inf
    for i_l, r_l in locs.iterrows():
        for i_c, r_c in temp_cands.iterrows():
            # Get distance
            dist = (r_l['Latitude']-r_c['Latitude'])**2
            dist += (r_l['Longitude']-r_c['Longitude'])**2
            dist **= 0.5
            # Save values if distance is shorter
            if dist < locs.loc[i_l, 'Distance_COG']:
                # Save distance
                locs.loc[i_l, 'Distance_COG'] = dist
                # Save index of nearest point
                locs.loc[i_l, 'Cluster'] = i_c
    # Weight distance by volume
    locs['Weighted_Distance_COG'] = locs['Distance_COG'] * locs['Calc_Vol']
    # Save scenario if total weighted distance is smaller
    if locs['Weighted_Distance_COG'].sum() < total_dist:
        total_dist = locs['Weighted_Distance_COG'].sum()
        best_cogs = list(list(i))
# Get centers of gravity
cogs = cands.loc[best_cogs, ['Latitude',
                             'Longitude']]
# Reloop to get site assignment
locs['Cluster'] = 0
locs['Distance_COG'] = np.inf
for i_l, r_l in locs.iterrows():
    for i_c, r_c in cogs.iterrows():
        # Get distance
        dist = (r_l['Latitude']-r_c['Latitude'])**2
        dist += (r_l['Longitude']-r_c['Longitude'])**2
        dist **= 0.5
        # Save values if distance is shorter
        if dist < locs.loc[i_l, 'Distance_COG']:
            # Save distance
            locs.loc[i_l, 'Distance_COG'] = dist
            # Save index of nearest point
            locs.loc[i_l, 'Cluster'] = i_c
# Get volume assigned to each cog
cogs = cogs.join(locs.groupby('Cluster')['Volume'].sum())
# Include assigned COG coordinates in data by point 
data = data.join(locs['Cluster'])
data = data.join(cogs, on='Cluster', rsuffix='_COG')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[

In [172]:
# Add flow lines to centers of gravity to map
for _, row in data.iterrows():
    # Flow lines
    if str(row['Location Type']).lower() in (['demand', 'supply']):
        folium.PolyLine([(row['Latitude'],
                          row['Longitude']),
                         (row['Latitude_COG'],
                          row['Longitude_COG'])],
                        color=color_options['flow'],
                        weight=(row['Volume']**0.25),
                        opacity=0.8).add_to(m)
                        
# Add centers of gravity to map
for _, row in cogs.iterrows():
    # New centers of gravity
    folium.CircleMarker(location=[row['Latitude'],
                                  row['Longitude']],
                        radius=(row['Volume']**0.5),
                        color=color_options['cog'],
                        tooltip=row['Volume']).add_to(m)
    
# Show map
m

In [291]:
IDs=data[data['Location Type']=='Candidate'].index.values

In [281]:
IDs=data.index.values

In [292]:
dlist=[]
for i in IDs:
    TempFrame=pd.concat([data[data.index.values==i][['Latitude','Longitude']].reset_index(drop=True),
        data[(data.index.values!=i) & (data['Location Type']!='Candidate')][['Latitude','Longitude']].reset_index(drop=True)],
          axis=1,
          join='outer',
          ignore_index=True).ffill().iloc[1:,:].\
    rename(columns={0:'LA1',1:'LO1',2:'LA2',3:'LO2'})
    dist=sum((TempFrame['LA1']-TempFrame['LA2'])**2)
    dist+=sum((TempFrame['LO1']-TempFrame['LO2'])**2)
    dlist.append(dist)
    
dlist    

[300.11332135000004,
 320.77282707,
 347.47701919,
 556.8153094700004,
 343.76842486000004]

In [293]:
data[data['Location Type']=='Candidate']

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol
6,"Charlottesville, VA",38.04,-78.5199,,Candidate,
7,"Harrisburg, PA",40.3394,-77.0077,,Candidate,
8,"Columbus, OH",39.9828,-83.1309,,Candidate,
9,"Lafayette, IN",40.4049,-86.9282,,Candidate,
10,"Buffalo, NY",42.8962,-78.9344,,Candidate,


In [284]:
data.assign(Score=dlist)

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol,Score
0,"Mahattan, NY",40.7831,-73.9712,5000.0,Demand,10000.0,448.939746
1,"New Haven, CT",41.2982,-72.9991,1000.0,Demand,2000.0,517.499097
2,"Chicago, IL",41.8333,-88.0121,4000.0,Demand,8000.0,665.377494
3,"Boston, MA",42.3142,-71.1103,3000.0,Demand,6000.0,683.389523
4,"Raleigh, NC",35.8436,-78.7851,8000.0,Supply,8000.0,353.696523
5,"Nashville, TN",36.1863,-87.0654,5000.0,Supply,5000.0,617.574353
6,"Charlottesville, VA",38.04,-78.5199,,Candidate,,300.113321
7,"Harrisburg, PA",40.3394,-77.0077,,Candidate,,320.772827
8,"Columbus, OH",39.9828,-83.1309,,Candidate,,347.477019
9,"Lafayette, IN",40.4049,-86.9282,,Candidate,,556.815309


In [279]:
data[data['Location Type']!='Candidate']

Unnamed: 0,Location Name,Latitude,Longitude,Volume,Location Type,Calc_Vol
0,"Mahattan, NY",40.7831,-73.9712,5000.0,Demand,10000.0
1,"New Haven, CT",41.2982,-72.9991,1000.0,Demand,2000.0
2,"Chicago, IL",41.8333,-88.0121,4000.0,Demand,8000.0
3,"Boston, MA",42.3142,-71.1103,3000.0,Demand,6000.0
4,"Raleigh, NC",35.8436,-78.7851,8000.0,Supply,8000.0
5,"Nashville, TN",36.1863,-87.0654,5000.0,Supply,5000.0


In [276]:
TempFrame=pd.concat([data[data.index.values==IDs[2]][['Latitude','Longitude']].reset_index(drop=True),
        data[data.index.values!=IDs[2]][['Latitude','Longitude']].reset_index(drop=True)],
          axis=1,
          join='outer',
          ignore_index=True).\
    rename(columns={0:'LA1',1:'LO1',2:'LA2',3:'LO2'})
TempFrame

Unnamed: 0,LA1,LO1,LA2,LO2
0,41.8333,-88.0121,40.7831,-73.9712
1,,,41.2982,-72.9991
2,,,42.3142,-71.1103
3,,,35.8436,-78.7851
4,,,36.1863,-87.0654
5,,,38.04,-78.5199
6,,,40.3394,-77.0077
7,,,39.9828,-83.1309
8,,,40.4049,-86.9282
9,,,42.8962,-78.9344


In [273]:
data[data.index.values!=IDs[0]][['Latitude','Longitude']]

Unnamed: 0,Latitude,Longitude
1,41.2982,-72.9991
2,41.8333,-88.0121
3,42.3142,-71.1103
4,35.8436,-78.7851
5,36.1863,-87.0654
6,38.04,-78.5199
7,40.3394,-77.0077
8,39.9828,-83.1309
9,40.4049,-86.9282
10,42.8962,-78.9344


In [301]:
dist=sum((TempFrame['LA1']-TempFrame['LA2'])**2)
dist+=sum((TempFrame['LO1']-TempFrame['LO2'])**2)

In [302]:
dist**=.5

In [303]:
dist

18.540993092604293

In [309]:
pd.read_excel('C:/Users\dmckenzie\Downloads\00 PRDE School Data List_Jan-25-2022 V.1 DAN COPY.xslx', delimiter = ',', 
                             dtype = None, names = True)

TypeError: read_excel() got an unexpected keyword argument 'delimiter'

In [324]:
GOPR=pd.read_csv('C:/Users\dmckenzie\Downloads\Test.csv')

In [332]:
GOPR['MUNICIPALITY']=='BAYAMON'

0      False
1      False
2       True
3      False
4      False
       ...  
847    False
848    False
849    False
850    False
851    False
Name: MUNICIPALITY, Length: 852, dtype: bool

In [329]:
# Add centers of gravity to map
for _, row in GOPR.iterrows():
    # New centers of gravity
    folium.CircleMarker(location=[row['LATITUDE'],
                                  row['LONGITUDE']],
                        radius=10).add_to(m)

In [336]:
m = folium.Map(location=GOPR[GOPR['MUNICIPALITY']=='BAYAMON'].reset_index(drop=True)[['LATITUDE', 'LONGITUDE']].mean(),
               fit_bounds=[[GOPR[GOPR['MUNICIPALITY']=='BAYAMON'].reset_index(drop=True)['LATITUDE'].min(),
                            GOPR[GOPR['MUNICIPALITY']=='BAYAMON'].reset_index(drop=True)['LONGITUDE'].min()], 
                           [GOPR[GOPR['MUNICIPALITY']=='BAYAMON'].reset_index(drop=True)['LATITUDE'].max(),
                            GOPR[GOPR['MUNICIPALITY']=='BAYAMON'].reset_index(drop=True)['LONGITUDE'].max()]])

In [350]:
GOPR.iloc[:,18]

0      166.0
1      327.0
2      293.0
3      105.0
4       64.0
       ...  
847    241.0
848    219.0
849    196.0
850    389.0
851      NaN
Name: ENROLLMENT 2022-23, Length: 852, dtype: float64

In [359]:
BAYA=GOPR[GOPR['MUNICIPALITY']=='BAYAMON'].reset_index(drop=True)

In [None]:
BAYA

In [None]:
#basic demographics
#most different, regionally 

In [None]:
dlist=[]
for i in IDs:
    TempFrame=pd.concat([data[data.index.values==i][['Latitude','Longitude']].reset_index(drop=True),
        data[(data.index.values!=i) & (data['Location Type']!='Candidate')][['Latitude','Longitude']].reset_index(drop=True)],
          axis=1,
          join='outer',
          ignore_index=True).ffill().iloc[1:,:].\
    rename(columns={0:'LA1',1:'LO1',2:'LA2',3:'LO2'})
    dist=sum((TempFrame['LA1']-TempFrame['LA2'])**2)
    dist+=sum((TempFrame['LO1']-TempFrame['LO2'])**2)
    dlist.append(dist)
    
dlist    

In [361]:
m = folium.Map(location=BAYA[['LATITUDE', 'LONGITUDE']].mean(),
               fit_bounds=[[BAYA['LATITUDE'].min(),
                            BAYA['LONGITUDE'].min()], 
                           BAYA['LATITUDE'].max(),
                            BAYA['LONGITUDE'].max()])
# Add centers of gravity to map
for _, row in BAYA.iterrows():
    # New centers of gravity
    folium.CircleMarker(location=[row['LATITUDE'],
                                  row['LONGITUDE']],
                        radius=row['ENROLLMENT 2022-23']/60).add_to(m)
m    