In [5]:
import geopandas
import pandas as pd
import pgeocode
import plotly.graph_objects as go
nomi = pgeocode.Nominatim('ca')
import numpy as np
import folium
try:
    import haversine as hs
except:
    !pip install --user haversine
    import haversine as hs

In [2]:
data = pd.read_csv('data/Connected member addresses for network map November 2022 - Sheet1.csv', header=None, 
                   names=['Member', 'Address', 'City', 'Postal Code'])
data.iloc[56]['Postal Code'] = 'T0L 0Z0' # Someone entered this wihout a space
data.drop([36, 38], axis=0, inplace=True) # Centers in Germany and France
data

Unnamed: 0,Member,Address,City,Postal Code
0,Alberta Conference SDA Church,5816 Hwy 2A,Lacombe,T4L 2G5
1,Alberta Health Services,48 Quarry Park Blvd. SE,Calgary,T2C 5P2
2,Alberta Innovates,"1500, 10104 - 103 Avenue",Edmonton,T5J 0H8
3,Alberta University of the Arts,1407 - 14th Avenue NW,Calgary,T2N 4R3
4,Aspen View School Division,3600 - 48 Avenue,Athabasca,T9S 1M8
...,...,...,...,...
94,University of Lethbridge,4401 University Drive,Lethbridge,T1K 3M4
95,Wetaskiwin Regional Public Schools,5704 - 51 Street,Wetaskiwin,T9A 3G1
96,Whipcord Ltd,3528 30 Street North,Lethbridge,T1H 6Z4
97,Wild Rose School Division No.66,4912 - 43 Street,Rocky Mountain House,T4T 1P4


In [4]:
len(data['City'].unique())

44

In [57]:
lats = []
lons = []
for i in data['Postal Code']:
    location = nomi.query_postal_code(i)
    lats.append(location['latitude'])
    lons.append(location['longitude'])
data['Latitude'] = lats
data['Longitude'] = lons
data['geometry'] = geopandas.points_from_xy(data['lat'], data['lon'])
data.sort_values(by='lat', inplace=True)
data

Unnamed: 0,Member,Address,City,Postal Code,lat,lon,geometry,Latitude,Longitude
94,University of Lethbridge,4401 University Drive,Lethbridge,T1K 3M4,49.6511,-112.8351,POINT (49.651 -112.835),49.6511,-112.8351
54,Lethbridge College,3000 College Drive South,Lethbridge,T1K 1L6,49.6511,-112.8351,POINT (49.651 -112.835),49.6511,-112.8351
55,Lethbridge School District No.51,433 - 15 Street South,Lethbridge,T1J 2Z4,49.6581,-112.7484,POINT (49.658 -112.748),49.6581,-112.7484
21,City of Lethbridge,910 - 4th Avenue South,Lethbridge,T1J 0P6,49.6581,-112.7484,POINT (49.658 -112.748),49.6581,-112.7484
73,Palliser Regional Schools No.26,"101, 3305 - 18 Avenue North",Lethbridge,T1H 5S1,49.7000,-112.8186,POINT (49.700 -112.819),49.7000,-112.8186
...,...,...,...,...,...,...,...,...,...
70,Northland School Division No.61,"Bag 1400, 9809 - 77 Avenue",Peace River,T8S 1V2,56.2501,-117.2860,POINT (56.250 -117.286),56.2501,-117.2860
47,Holy Family Catholic Regional Division No.37,10307 - 99 Street,Peace River,T8S 1K1,56.2501,-117.2860,POINT (56.250 -117.286),56.2501,-117.2860
33,Fort McMurray Public School District,231 Hardin Street,Fort McMurray,T9H 2G2,56.6640,-111.1357,POINT (56.664 -111.136),56.6640,-111.1357
76,Peace River School Division No.10,"4702 - 51 Street, P.O. Box 380",Grimshaw,T0H 1W0,57.5403,-116.9153,POINT (57.540 -116.915),57.5403,-116.9153


In [58]:
def getCityLocation(city):
    location = data[data['City'] == city]#[['Latitude', 'Longitude']].mean()
    #location = nomi.query_location(city)
    #if not location['latitude'].mean() > 40:
    #    postal_code = data[data['City'] == city]['Postal Code'].unique()[0]
    #    location = nomi.query_postal_code(postal_code)
    return location['Latitude'].mean(), location['Longitude'].mean()

cities_dict = {}
for city in data['City'].unique():
    cities_dict[city] = getCityLocation(city)
cities = pd.DataFrame.from_dict(cities_dict, orient='index', columns=['Latitude', 'Longitude'])
cities.reset_index(inplace=True)
cities.rename(columns={'index':'Name'}, inplace=True)
cities

Unnamed: 0,Name,Latitude,Longitude
0,Lethbridge,49.674057,-112.803257
1,Taber,49.787,-112.146
2,Dunmore,49.835,-110.5203
3,Medicine Hat,50.0816,-110.5788
4,Brooks,50.5834,-111.8851
5,High River,50.5834,-113.8687
6,Morley,50.6314,-114.4089
7,Fort Macleod,50.6314,-114.4089
8,Calgary,51.044137,-114.023753
9,Hanna,50.9944,-111.4632


In [59]:
cities_map = folium.Map(location=[cities['Latitude'].mean(), cities['Longitude'].mean()], zoom_start=5)
for i in range(0,len(cities)):
    folium.Marker([cities.iloc[i]['Latitude'], cities.iloc[i]['Longitude']], popup=cities.iloc[i]['Name']).add_to(cities_map)
cities_map

In [62]:
cities.sort_values(by='Latitude', inplace=True)

distances_list = []
for i in range(len(cities)):
    for j in range(i+1, len(cities)):
        distances_list.append([cities.iloc[i]['Member'], cities.iloc[j]['Member'], 
                          hs.haversine((cities.iloc[i]['lat'], cities.iloc[i]['lon']), 
                                       (cities.iloc[j]['lat'], cities.iloc[j]['lon']))])

# add a row for the last cite to the first city
distances_list.append([cities.iloc[-1]['Member'], cities.iloc[0]['Member'], hs.haversine((cities.iloc[-1]['lat'], cities.iloc[-1]['lon']), (cities.iloc[0]['lat'], cities.iloc[0]['lon']))])


#cities['Distance to Next'] = cities['Latitude'].diff()
#cities

Unnamed: 0,Name,Latitude,Longitude,Distance to Next
0,Lethbridge,49.674057,-112.803257,
1,Taber,49.787,-112.146,0.112943
2,Dunmore,49.835,-110.5203,0.048
3,Medicine Hat,50.0816,-110.5788,0.2466
4,Brooks,50.5834,-111.8851,0.5018
5,High River,50.5834,-113.8687,0.0
6,Morley,50.6314,-114.4089,0.048
7,Fort Macleod,50.6314,-114.4089,0.0
9,Hanna,50.9944,-111.4632,0.363
8,Calgary,51.044137,-114.023753,0.049737


In [None]:
fig0 = go.Figure(data=go.Scattergeo(
    lat = data['lat'],
    lon = data['lon'],
    text = data['Member']
)
               )

fig0.update_layout(
    geo = dict(
        scope = 'north america',
        showland = True,
        #landcolor = "rgb(212, 212, 212)",
        landcolor = "rgb(255, 255, 255)",
        subunitcolor = "rgb(0, 0, 0)",
        countrycolor = "rgb(255, 255, 255)",
        showlakes = False,
        lakecolor = "rgb(255, 255, 255)",
        showsubunits = True,
        showcountries = True,
        resolution = 50,
        projection = dict(
            type = 'conic conformal',
            rotation_lon = -100
        ),
        lonaxis = dict(
            showgrid = True,
            gridwidth = 0.5,
            range= [ -140.0, -55.0 ],
            dtick = 5
        ),
        lataxis = dict (
            showgrid = True,
            gridwidth = 0.5,
            range= [ 20.0, 60.0 ],
            dtick = 5
        )
    ),
    title='Cybera Member locations',
    height=600,
    width=600,
    margin={"r":0,"t":0,"l":0,"b":0}
)
fig0.update_geos(lataxis_range=[48,61],
               lonaxis_range=[-120,-110])
fig0.show()
#fig.write_html('membermap.html')

In [None]:
#!pip install kaleido
#fig0.write_image('membermap.svg')

## Grouping dots together

In [None]:
def getCityLocation(city):
    location = nomi.query_location(city)
    return location['latitude'].mean(), location['longitude'].mean()

cities_data = pd.read_html('https://en.wikipedia.org/wiki/List_of_cities_in_Alberta')[0]
cities = cities_data['Name'].copy()
cities.drop(cities.tail(1).index,inplace=True) # drop the last row
cities.loc[len(cities)] = 'Fort McMurray' # add Fort McMurray
cities['Name'] = cities['Name'].str.split('[').str[0].str.split('(').str[0].str.strip()
cities['Coordinates'] = cities['Name'].apply(getCityLocation)
cities['Latitude'] = cities['Coordinates'].str[0]
cities['Longitude'] = cities['Coordinates'].str[1]
cities

In [None]:
cities_list = ['Banff','Calgary','Edmonton','Fort McMurray','Grande Prairie','Lethbridge','Lloydminster','Medicine Hat','Red Deer']

In [None]:
# folium map of cities
cities_map = folium.Map(location=[cities['Latitude'].mean(), cities['Longitude'].mean()], zoom_start=5)
for i in range(0,len(cities)):
    folium.Marker([cities.iloc[i]['Latitude'], cities.iloc[i]['Longitude']], popup=cities.iloc[i]['Name']).add_to(cities_map)
cities_map

In [None]:
try:
    import haversine as hs
except:
    !pip install --user haversine
    import haversine as hs

# Calculate the distance between each member
distances_list = []
for i in range(len(data)):
    for j in range(i+1, len(data)):
        distances_list.append([data.iloc[i]['Member'], data.iloc[j]['Member'], 
                          hs.haversine((data.iloc[i]['lat'], data.iloc[i]['lon']), 
                                       (data.iloc[j]['lat'], data.iloc[j]['lon']))])

# add a row for the last member to the first member
distances_list.append([data.iloc[-1]['Member'], data.iloc[0]['Member'], hs.haversine((data.iloc[-1]['lat'], data.iloc[-1]['lon']), (data.iloc[0]['lat'], data.iloc[0]['lon']))])

distances_between = pd.DataFrame(distances_list, columns=['Member', 'Member2', 'Distance'])

# join the latitude and longitude of each member to the distances
distances_between = distances_between.merge(data[['Member', 'lat', 'lon']], left_on='Member', right_on='Member', how='left')
distances_between

In [None]:
# for each member create a list of the other members that are within 100 km

def findNeighbours(m):
    return [m] + distances_between[(distances_between['Member'] == m) & (distances_between['Distance'] < 100)]['Member2'].values.tolist()

distances_between['Neighbours'] = distances_between['Member'].apply(findNeighbours)
distances_between

In [None]:
import numpy as np

def createRectangle(neighbours):
    latList = []
    lonList = []
    for place in neighbours:
        #print(place)
        latList.append(distances_between[distances_between['Member'] == place]['lat'].values[0])
        lonList.append(distances_between[distances_between['Member'] == place]['lon'].values[0])
    return [np.max(latList), np.min(latList), np.max(lonList), np.min(lonList)]

def findLatMidpoint(neighbours):
    latList = []
    for place in neighbours:
        latList.append(distances_between[distances_between['Member'] == place]['lat'].values[0])
    return np.mean(latList)

def findLonMidpoint(neighbours):
    lonList = []
    for place in neighbours:
        lonList.append(distances_between[distances_between['Member'] == place]['lon'].values[0])
    return np.mean(lonList)

#distances_between['Rectangle'] = distances_between['Neighbours'].apply(createRectangle)
#distances_between[['Midpoint Lat','Midpoint Lon']] = distances_between['Neighbours'].apply(findMidpoint)
#distances_between['Midpoint Lat'], distances_between['Midpoint Lon'] = distances_between['Neighbours'].apply(findMidpoint) #findMidpoint(distances_between['Neighbours'])

distances_between['Midpoint Lat'] = distances_between['Neighbours'].apply(findLatMidpoint)
distances_between['Midpoint Lon'] = distances_between['Neighbours'].apply(findLonMidpoint)

print('There are now {} unique points'.format(len(distances_between['Midpoint Lat'].unique())))

distances_between

In [None]:
median_latitude = distances_between['lat'].median()
median_longitude = distances_between['lon'].median()
new_map = folium.Map(location=[median_latitude, median_longitude], zoom_start=6, tiles='stamen terrain')
for i in distances_between.itertuples():
    folium.Marker([i.lat, i.lon], popup=i.Member).add_to(new_map)
display(new_map)

### Try different size rectangles

In [None]:
separation_distance = 200

def findNeighbours(m):
    return [m] + distances_between[(distances_between['Member'] == m) & (distances_between['Distance'] < separation_distance)]['Member2'].values.tolist()

distances_between['Neighbours'] = distances_between['Member'].apply(findNeighbours)

import numpy as np

def findLatMidpoint(neighbours):
    latList = []
    for place in neighbours:
        latList.append(distances_between[distances_between['Member'] == place]['lat'].values[0])
    return np.mean(latList)

def findLonMidpoint(neighbours):
    lonList = []
    for place in neighbours:
        lonList.append(distances_between[distances_between['Member'] == place]['lon'].values[0])
    return np.mean(lonList)

distances_between['Midpoint Lat'] = distances_between['Neighbours'].apply(findLatMidpoint)
distances_between['Midpoint Lon'] = distances_between['Neighbours'].apply(findLonMidpoint)

print('There are now {} unique points'.format(len(distances_between['Midpoint Lat'].unique())))

In [None]:
# this will only work if they are sorted, but it's not great
# we should elimilate one of the two members in a pair that is too close
# but there are lots of groups that are three or more members close together

separation_distance = 150

previous_row = None
rows_to_remove = []
for row in data.sort_values(by='lat').itertuples():
    #print(row.Member, row.lat, row.lon)
    if previous_row is not None:
        #print(previous_row.Member, row.Member, 
        d = hs.haversine((previous_row.lat, previous_row.lon), (row.lat, row.lon))
        if d < separation_distance:
            #print('remove row', row.Index)
            rows_to_remove.append(row.Index)
    previous_row = row
filtered_distances = data.drop(rows_to_remove, axis=0)
import folium
#from folium.plugins import FastMarkerCluster
median_latitude = filtered_distances['lat'].median()
median_longitude = filtered_distances['lon'].median()
new_map = folium.Map(location=[median_latitude, median_longitude], zoom_start=6, tiles='stamen terrain')
#new_map.add_child(FastMarkerCluster(filtered_distances[['lat','lon']].values.tolist()))
for i in filtered_distances.itertuples():
    folium.Marker([i.lat, i.lon], popup=i.Member).add_to(new_map)
display(new_map)

In [None]:
len(distances_list[distances_list['Distance'] < 20].sort_values('Distance')['Member1'].unique())

In [None]:
# map the members that are close to each other
distances_list['Member 1 Lat'] = distances_list['Member1'].map(distances_list.set_index('Member')['lat'])
distances_list['Member 1 Lon'] = distances_list['Member1'].map(distances_list.set_index('Member')['lon'])
distances_list

In [None]:
distances_list[(distances_list['Member1']=='Alberta Conference SDA Church') & (distances_list['Member2']=='Alberta Conference SDA Church')]

In [None]:
fig0 = go.Figure(data=go.Scattergeo(
    lat = distances_list['Member 1 Lat'],
    lon = distances_list['Member 1 Lon'],
    text = distances_list['Member1']
)
               )

fig0.update_layout(
    geo = dict(
        scope = 'north america',
        showland = True,
        #landcolor = "rgb(212, 212, 212)",
        landcolor = "rgb(255, 255, 255)",
        subunitcolor = "rgb(0, 0, 0)",
        countrycolor = "rgb(255, 255, 255)",
        showlakes = False,
        lakecolor = "rgb(255, 255, 255)",
        showsubunits = True,
        showcountries = True,
        resolution = 50,
        projection = dict(
            type = 'conic conformal',
            rotation_lon = -100
        ),
        lonaxis = dict(
            showgrid = True,
            gridwidth = 0.5,
            range= [ -140.0, -55.0 ],
            dtick = 5
        ),
        lataxis = dict (
            showgrid = True,
            gridwidth = 0.5,
            range= [ 20.0, 60.0 ],
            dtick = 5
        )
    ),
    title='Cybera Member locations',
    height=600,
    width=600,
    margin={"r":0,"t":0,"l":0,"b":0}
)
fig0.update_geos(lataxis_range=[48,61],
               lonaxis_range=[-120,-110])
fig0.show()

## Finding scale distances on a 4x8 sheet of plywood

In [None]:
x_range = [distances_list['lon'].max(), distances_list['lon'].min()]
y_range = [distances_list['lat'].max(), distances_list['lat'].min()]
distances_list['x'] = (distances_list['lon'] - x_range[1]) / (x_range[0] - x_range[1])
distances_list['y'] = (distances_list['lat'] - y_range[1]) / (y_range[0] - y_range[1])
distances_list['x inches'] = distances_list['x'] * 46
distances_list['y inches'] = distances_list['y'] * 46
distances_list

In [None]:
import plotly.express as px
fig2 = px.scatter(distances_list, x='x inches', y='y inches', hover_name='Member', hover_data=['Address', 'City', 'Postal Code'], height=800, width=400)
fig2.update_xaxes(range=[0, 48])
fig2.update_yaxes(range=[0, 96])
fig2.show()
fig0.show()