### Importing libraries

In [28]:
import pandas as pd
import numpy as np

In [29]:
#!pip install geopy 
#!pip install Nominatim

In [30]:
# Nominatim generates longitude, latitude of most of the city,state around the world
from  geopy.geocoders import Nominatim
geolocator = Nominatim(timeout=None)

  This is separate from the ipykernel package so we can avoid doing imports until


In [31]:
data=pd.read_csv('ANPA data.csv')
data.head()

Unnamed: 0,City,Country,State,Current Affiliation,Field of Study/Interest,Level
0,Worcester,US,MA,Worcester State,Physics,Lifetime Membership
1,Pittsburgh,US,PA,NETL,Applied Physics,Lifetime Membership
2,,US,Illinois,Argonne National Laboratory,"Accelerator Physics, Optics, Laser-Plasma, Com...",-
3,Lubbock,US,Texas,"Texas Tech University, CMS experiment at the L...",High Energy Physics,Lifetime Membership
4,Kansas City,US,Missouri,University of Missouri-Kansas City,Solid-state Physics,Lifetime Membership


In [32]:
data['City']=data['City'].fillna('') # replacing nan with " " was helpful when trying to generate location in next step
data.head()

Unnamed: 0,City,Country,State,Current Affiliation,Field of Study/Interest,Level
0,Worcester,US,MA,Worcester State,Physics,Lifetime Membership
1,Pittsburgh,US,PA,NETL,Applied Physics,Lifetime Membership
2,,US,Illinois,Argonne National Laboratory,"Accelerator Physics, Optics, Laser-Plasma, Com...",-
3,Lubbock,US,Texas,"Texas Tech University, CMS experiment at the L...",High Energy Physics,Lifetime Membership
4,Kansas City,US,Missouri,University of Missouri-Kansas City,Solid-state Physics,Lifetime Membership


In [33]:
data.loc[data['Country'] != 'US','Country']= 'US'

In [34]:
# combine city, state, and country to create new columns address
data['address']=data['City'].astype(str)+','+data['State'].astype(str)+ ','+data['Country'].astype(str)
data.head()

Unnamed: 0,City,Country,State,Current Affiliation,Field of Study/Interest,Level,address
0,Worcester,US,MA,Worcester State,Physics,Lifetime Membership,"Worcester,MA,US"
1,Pittsburgh,US,PA,NETL,Applied Physics,Lifetime Membership,"Pittsburgh,PA,US"
2,,US,Illinois,Argonne National Laboratory,"Accelerator Physics, Optics, Laser-Plasma, Com...",-,",Illinois,US"
3,Lubbock,US,Texas,"Texas Tech University, CMS experiment at the L...",High Energy Physics,Lifetime Membership,"Lubbock,Texas,US"
4,Kansas City,US,Missouri,University of Missouri-Kansas City,Solid-state Physics,Lifetime Membership,"Kansas City,Missouri,US"


In [35]:
data.shape

(379, 7)

In [36]:
## Drop duplicate addresses
data = data.drop_duplicates(subset= ["City","Country","State"], keep = 'first')

In [37]:
# getting new column with longitude and latitude based off address 
data['Location']=data['address'].apply(lambda x:geolocator.geocode(x))
data.head()

Unnamed: 0,City,Country,State,Current Affiliation,Field of Study/Interest,Level,address,Location
0,Worcester,US,MA,Worcester State,Physics,Lifetime Membership,"Worcester,MA,US","(Worcester, Worcester County, Massachusetts, U..."
1,Pittsburgh,US,PA,NETL,Applied Physics,Lifetime Membership,"Pittsburgh,PA,US","(Pittsburgh, Allegheny County, Pennsylvania, U..."
2,,US,Illinois,Argonne National Laboratory,"Accelerator Physics, Optics, Laser-Plasma, Com...",-,",Illinois,US","(Illinois, South Denley Drive, Dallas, Dallas ..."
3,Lubbock,US,Texas,"Texas Tech University, CMS experiment at the L...",High Energy Physics,Lifetime Membership,"Lubbock,Texas,US","(Lubbock, Lubbock County, Texas, United States..."
4,Kansas City,US,Missouri,University of Missouri-Kansas City,Solid-state Physics,Lifetime Membership,"Kansas City,Missouri,US","(Kansas City, Jackson County, Missouri, United..."


In [38]:
# It seems the location columns has returned null for 10 entries.
data.isnull().sum()

City                       0
Country                    0
State                      1
Current Affiliation        0
Field of Study/Interest    1
Level                      0
address                    0
Location                   0
dtype: int64

In [39]:
data[data['Location'].isnull()]
# There are mismatch in the city, country, and state. For example,  the code didn't find the valid address for "Bedford, Texas,NP". 
# We have 10 such entries where location is returned as None. 
#I will discard these 10 entries for now and use only rest of the valid entries for creating map.

Unnamed: 0,City,Country,State,Current Affiliation,Field of Study/Interest,Level,address,Location


In [40]:
data.loc[data['Level'].isin(['TN', 'MI', 'California', 'NC', 'Michigan', 'Florida', 'Illinois', 'TX']),"Level"]="Lifetime Membership"
data.loc[data["Level"]=='-',"Level"]="Student Membership"
data['Level'].value_counts()

Lifetime Membership    80
Student Membership     74
First Time Student     58
Regular Membership     28
Name: Level, dtype: int64

In [41]:
# selecting only valid entries
data1=data[data['Location'].notnull()]
data1.head()

Unnamed: 0,City,Country,State,Current Affiliation,Field of Study/Interest,Level,address,Location
0,Worcester,US,MA,Worcester State,Physics,Lifetime Membership,"Worcester,MA,US","(Worcester, Worcester County, Massachusetts, U..."
1,Pittsburgh,US,PA,NETL,Applied Physics,Lifetime Membership,"Pittsburgh,PA,US","(Pittsburgh, Allegheny County, Pennsylvania, U..."
2,,US,Illinois,Argonne National Laboratory,"Accelerator Physics, Optics, Laser-Plasma, Com...",Student Membership,",Illinois,US","(Illinois, South Denley Drive, Dallas, Dallas ..."
3,Lubbock,US,Texas,"Texas Tech University, CMS experiment at the L...",High Energy Physics,Lifetime Membership,"Lubbock,Texas,US","(Lubbock, Lubbock County, Texas, United States..."
4,Kansas City,US,Missouri,University of Missouri-Kansas City,Solid-state Physics,Lifetime Membership,"Kansas City,Missouri,US","(Kansas City, Jackson County, Missouri, United..."


In [42]:
# creating two new columns for latitude and longitude from the location columns
data1['Latitude']=data1['Location'].apply(lambda x:x.latitude)
data1['Longitude']=data1['Location'].apply(lambda x:x.longitude)
data1.head()

Unnamed: 0,City,Country,State,Current Affiliation,Field of Study/Interest,Level,address,Location,Latitude,Longitude
0,Worcester,US,MA,Worcester State,Physics,Lifetime Membership,"Worcester,MA,US","(Worcester, Worcester County, Massachusetts, U...",42.262562,-71.801888
1,Pittsburgh,US,PA,NETL,Applied Physics,Lifetime Membership,"Pittsburgh,PA,US","(Pittsburgh, Allegheny County, Pennsylvania, U...",40.441694,-79.990086
2,,US,Illinois,Argonne National Laboratory,"Accelerator Physics, Optics, Laser-Plasma, Com...",Student Membership,",Illinois,US","(Illinois, South Denley Drive, Dallas, Dallas ...",32.72318,-96.805191
3,Lubbock,US,Texas,"Texas Tech University, CMS experiment at the L...",High Energy Physics,Lifetime Membership,"Lubbock,Texas,US","(Lubbock, Lubbock County, Texas, United States...",33.563521,-101.879336
4,Kansas City,US,Missouri,University of Missouri-Kansas City,Solid-state Physics,Lifetime Membership,"Kansas City,Missouri,US","(Kansas City, Jackson County, Missouri, United...",39.100105,-94.578142


### Map using Folium

In [43]:
import folium

In [44]:
world_map=folium.Map(location=[23.63, -102.35],tiles='Stamen Terrain' ,zoom_start=4)
world_map

In [45]:
data1.columns

Index(['City', 'Country', 'State', 'Current Affiliation',
       'Field of Study/Interest', 'Level', 'address', 'Location', 'Latitude',
       'Longitude'],
      dtype='object')

In [46]:
data1.Level.unique() # I want to use as Level feature as color code in the map. Looking at the unique values.

array(['Lifetime Membership', 'Student Membership', 'Regular Membership',
       'First Time Student'], dtype=object)

In [47]:
#assigning color to each features 
color_map={'Lifetime Membership':'red','Student Membership':'green',
       'Regular Membership':'blue', 'First Time Student':'orange',
       'General Term Membership':'yellow','-':'gray'}

In [48]:
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

# loop through the 100 crimes and add each to the incidents feature group
for index,record in data1.iterrows():
  folium.Marker(location=[record[8],
  record[9]],
  popup=record[5],
  icon=folium.Icon(color=color_map[record[5]],
  icon='')
  ).add_to(incidents)
# add incidents to map
world_map.add_child(incidents)

In [49]:
# Creating legends
import branca

legend_html = '''
{% macro html(this, kwargs) %}
<div style="
    position: fixed; 
    bottom: 10px;
    left: 20px;
    width: 250px;
    height: 200px;
    z-index:9999;
    font-size:14px;
    font-weight:bold;
    background-color:white;
    opacity: .85;
    ">
    <p><a style="color:#FF0000 ;font-size:150%;margin-left:20px;">&marker;</a>&emsp;Lifetime Membership</p>
    <p><a style="color:#32CD32;font-size:150%;margin-left:20px;">&marker;</a>&emsp;Student Membership</p>
    <p><a style="color:#00BFFF;font-size:150%;margin-left:20px;">&marker;</a>&emsp;Regular Membership</p>
    <p><a style="color:#FFA500;font-size:150%;margin-left:20px;">&marker;</a>&emsp;First Time Student</p>
    <p><a style="color:#FFFF00;font-size:150%;margin-left:20px;">&marker;</a>&emsp;General Term Membership</p>
</div>
{% endmacro %}
'''
legend = branca.element.MacroElement()
legend._template = branca.element.Template(legend_html)
folium.LayerControl().add_to(world_map)
world_map.get_root().add_child(legend)
world_map

In [50]:
world_map.save('Anpa_members.html') # saving html for interactive maps.