# Neuromap Project Neurohackademy 2019

In [1]:
# install libraries/ set up script
import pandas as pd
import numpy as np
import folium
import geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="neuromap")

In [2]:
# sets the defaults for viewing the dataframe
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
#identify missing values
missing_values = ["  ", "-"]
df = pd.read_csv('NeuroMap-38responses.csv', na_values = missing_values, encoding='latin-1')

#view data
#df.head(20)

In [4]:
# #clean data
# # rename column questions to shorter
df.columns = ['timestamp','consent','name','birthplace','birth_year', 'gender',
              'undergrad_deg','undergrad_loc',  'undergrad_inst','undergrad_research',
              'ra_qual','ra_lm_loc', 'ra_lm_inst', 'ra_lm_research',
              'masters_qual','masters_loc', 'masters_inst', 'masters_research', 
              'phd_qual','phd_loc', 'phd_inst', 'phd_research',
              'post_doc_qual', 'postdoc_loc' ,'postdoc_inst','postdoc_research', 
            'faculty_qual', 'faculty_loc', 'faculty_inst', 'faculty_research',
              'google_scholar']

# change white spaces (blanks) to NaN
#df.replace(r'^\s+$', np.nan, regex=True)

replace_dict_undergrad = {
    "Tijuana, Baja California, MÌ©xico": "Tijuana, Baja California, Mexico",
    'Raleigh & Chapel Hill, North Carolina, USA': 'Chapel Hill, North Carolina, USA'
}
df['undergrad_loc'] = df['undergrad_loc'].replace(replace_dict_undergrad) 

replace_dict_research = {
    'Central Institute of Chemistry and Mechanics': 'Nagatinskaya, Moscow, Russia'
}
df['ra_lm_loc'] = df['ra_lm_loc'].replace(replace_dict_research)

replace_dict_phd = {
    'Rio de Janeiro, Brazil AND Montreal, Canada': 'Rio de Janeiro, Brazil'
}
df['phd_loc'] = df['phd_loc'].replace(replace_dict_phd) 

In [5]:
#Loop through rows to get the longitude and latitude of the hometowns
lat=[]
long=[]
for home_location in df.loc[:,"birthplace"]:
    location_1 = geolocator.geocode(home_location, timeout=10)
    #print((location_1.latitude, location_1.longitude))
    
    lat.append(location_1.latitude)
    long.append(location_1.longitude)

#Saving lat and long in separate columns in the dataframe    
df['HometownLatitude'] = lat
df['HometownLatitude'] = df['HometownLatitude'].astype('float')

df['HometownLongitude'] = long
df['HometownLatitude'] = df['HometownLongitude'].astype('float')
#df.head(20)

In [10]:
#print(home_location)
lat_list

[41.8239891,
 45.886548,
 -34.4243941,
 22.2793278,
 42.2681569,
 38.9719384,
 -34.6075616,
 55.7504461,
 18.9387711,
 37.4443293,
 40.4258686,
 32.527002,
 25.0375198,
 29.8693496,
 42.0128695,
 37.5666791,
 32.5010188,
 None,
 52.1518157,
 36.1556805,
 33.5778631,
 34.0536909,
 33.7872568,
 -22.9110137,
 45.421106,
 31.778345,
 35.9131542,
 55.0282171,
 29.9499323,
 43.0821793,
 35.7006177,
 40.7127281,
 38.8339578,
 43.653963,
 31.778345,
 43.157285,
 34.0966764,
 51.4893335]

In [None]:
#print(locs)

In [7]:
#Get the column with hometown location
locs = df.loc[:,"birthplace"]

#Plot hometown locations on the map!
birth_map = folium.Map()

#(location=[location_1.latitude, location_1.longitude])

#Loop through locations and add the markers on the map
for home_location in range(len(locs)): 
    folium.Marker([lat[home_location], long[home_location]], popup=locs[home_location]).add_to(birth_map)

#birth_map

In [None]:
#Try plotting the same but with different markers (circles!)
locs = df.loc[:,"birthplace"]
#Plot hometown locations on the map!
m = folium.Map()
#Loop through locations and add the markers on the map
for home_location in range(len(locs)):
   folium.Circle(
   radius=300,
   location=[lat[home_location], long[home_location]],
   popup=locs[home_location],
   color='crimson',
   fill=False,
).add_to(m)
m

In [8]:
#Loop through rows to get the longitude and latitude of the undergrad_cities (be aware of missing data)
lat_list = []
long_list = []
for undergrad_location in df['undergrad_loc']:
    #print(location_1)
    
    if pd.isnull(undergrad_location):
        lat2 = None
        long2 = None
    else:
        location_1 = geolocator.geocode(undergrad_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat2 = location_1.latitude
        long2 = location_1.longitude
    
    lat_list.append(lat2)
    long_list.append(long2)
    #print((lat,long,location))
        
# #Saving lat and long in separate columns in the dataframe    
df['UndergradLatitude' ] = lat_list
df['UndergradLatitude'] = df['UndergradLatitude'].astype('float')

df['UndergradLongitude'] = long_list
df['UndergradLatitude'] = df['UndergradLongitude'].astype('float')
#df.head(40)

In [11]:
# create a map with undergrad locations
df_undergrad = df.loc[:,"UndergradLatitude":"UndergradLongitude"] #create a subset of df to deal with the na problem
df_undergrad = df_undergrad.dropna()
undergrad_locs = df.loc[:,"undergrad_loc"]
#print(df_undergrad)

if None in lat_list: lat_list.remove(None)
if None in long_list: long_list.remove(None)
undergrad_locs = [x for x in undergrad_locs if pd.notna(x)]

In [None]:
# check formats are correct with the NA values removed
print(lat_list)
print(long_list)
print(undergrad_locs)
type(undergrad_locs)

In [None]:
# undergrad locations plotting 
undergrad_map = folium.Map()
#Loop through locations and add the markers on the map
for undergrad_location in range(len(undergrad_locs)): 
    folium.Marker([lat_list[undergrad_location], long_list[undergrad_location]], popup=undergrad_locs[undergrad_location]).add_to(undergrad_map)
    
#display map
undergrad_map

In [None]:
# create RA locations 
# Loop through rows to get the longitude and latitude of the RA_locations (be aware of missing data)
lat_list = []
long_list = []
for research_location in df['ra_lm_loc']:
    #print(location_1)
    
    if pd.isnull(research_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(research_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    print((lat,long,research_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['RA_LM_Latitude' ] = lat_list
df['RA_LM_Latitude'] = df['RA_LM_Latitude'].astype('float')

df['RA_LM_Longitude'] = long_list
df['RA_LM_Latitude'] = df['RA_LM_Longitude'].astype('float')
#df.head(40)


In [None]:
# create a map with research position locations
df_research = df.loc[:,"RA_LM_Latitude":"RA_LM_Longitude"] #create a subset of df to deal with the na problem
df_research = df_research.dropna()
research_locs = df.loc[:,"ra_lm_loc"]
#print(df_undergrad)

lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
research_locs = [x for x in research_locs if pd.notna(x)]

In [None]:
# check formats are correct with the NA values removed
print(lat_list)
print(long_list)
print(research_locs)
type(research_locs)

In [None]:
# research locations plotting 
research_map = folium.Map()
#Loop through locations and add the markers on the map
for research_location in range(len(research_locs)): 
    folium.Marker([lat_list[research_location], long_list[research_location]], popup=research_locs[research_location]).add_to(research_map)

#display map
research_map

In [None]:
# create PHD/Doctoral locations 
# Loop through rows to get the longitude and latitude of the RA_locations (be aware of missing data)
lat_list = []
long_list = []
for phd_location in df['phd_loc']:
    #print(location_1)
    
    if pd.isnull(phd_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(phd_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    print((lat,long,phd_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['PHD_Latitude' ] = lat_list
df['PHD_Latitude'] = df['PHD_Latitude'].astype('float')

df['PHD_Longitude'] = long_list
df['PHD_Latitude'] = df['PHD_Longitude'].astype('float')
#df.head(40)

# create a map with research position locations
df_phd = df.loc[:,"PHD_Longitude":"PHD_Latitude"] #create a subset of df to deal with the na problem
df_phd = df_phd.dropna()
phd_locs = df.loc[:,"phd_loc"]
#print(df_undergrad)

lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
phd_locs = [x for x in phd_locs if pd.notna(x)]

# research locations plotting 
phd_map = folium.Map()
#Loop through locations and add the markers on the map
for phd_location in range(len(phd_locs)): 
    folium.Marker([lat_list[phd_location], long_list[phd_location]], popup=phd_locs[phd_location]).add_to(phd_map)

#display map    
phd_map


In [None]:
# practice plotting with circles 

m = folium.Map(
    location=[-34.4243941, 150.89385],
    zoom_start=13
)

folium.Circle(
    radius=100,
    location=[-34.4243941, 150.89385],
    popup='The Waterfront',
    color='crimson',
    fill=False,
).add_to(m)

folium.CircleMarker(
    location=[-34.4243941, 150.89385],
    radius=50,
    popup='Laurelhurst Park',
    color='#3186cc',
    fill=True,
    fill_color='#3186cc'
).add_to(m)


m

In [None]:
from ipyleaflet import (Map, basemaps, basemap_to_tiles, Circle, LayersControl)
    
#Try plotting the same but with different markers (circles!)
locs = df.loc[:,"birthplace"]
#Plot hometown locations on the map!
m = Map(zoom=0)

#Loop through locations and add the markers on the map
for home_location in range(len(locs)):
    c = Circle()
    c.radius=300
    c.location=[lat[home_location], long[home_location]]
    c.color='crimson'
    c.fill=False
    m.add_layer(c)
    
m2 = Map(zoom=4)

#Loop through locations and add the markers on the map
for home_location in range(len(locs)):
    c = Circle()
    c.radius=300
    c.location=[lat[home_location], long[home_location]]
    c.color='blue'
    c.fill=False
    m2.add_layer(c)

m.add_layer(m2)
m.add_control(LayersControl(position='topright'))
m

In [93]:
#Dasha's attempt
import ipyleaflet as lf
from ipyleaflet import (Map, basemaps, basemap_to_tiles, Circle, LayersControl)
from ipyleaflet import Map, FullScreenControl


    
#Try plotting the same but with different markers (circles!)
# locs = df.loc[:,"birthplace"]
#Plot hometown locations on the map!
m = Map(zoom=1)

#fake layer
c0 = Circle()
c0.radius=1000
c0.location=[lat[0], long[0]]
c0.color='green'
c0.fill=True
c0.name="green"
m.add_layer(c0)

#group of two red circles
ctest1 = Circle()
ctest1.radius=300
ctest1.location=[lat_list[1], long_list[1]]
ctest1.color='blue'
ctest1.fill=True

ctest2 = Circle()
ctest2.radius=300
ctest2.location=[lat_list[3], long_list[3]]
ctest2.color='blue'
ctest2.fill=True

bluecircles = lf.LayerGroup(name='blue circles', layers=[ctest1, ctest2])
m.add_layer(bluecircles)


#     c2.name="Undergrad location"
#     #layergroup2=c2

# coords = []
# #Loop through locations and add the markers on the map
# for home_location in range(len(locs)):
#     coords.append([lat[home_location], long[home_location]])
# c = Circle()
# c.radius=300
# c.location=coords
# c.color='crimson'
# c.fill=True
# c.name="Hometown location"
# #layergroup1=c
# #group1 = lf.LayerGroup(name='Hometown location', layers=[c])
# #m.add_layer(group1)
# m.add_layer(c)
    

# #group2 = lf.LayerGroup(name='Group2', layers=[lf.Marker(location=t) for t in stops[10:20]])
    

# #Loop through locations and add the markers on the map
# for undergrad_location in range(len(undergrad_locs)):
#     c2 = Circle()
#     c2.radius=300
#     c2.location=[lat_list[undergrad_location], long_list[undergrad_location]]
#     c2.color='blue'
#     c2.fill=True
#     c2.name="Undergrad location"
#     #layergroup2=c2
#     #m.add_layer(layergroup2)
#     m.add_layer(c2)

#m.add_control(LayersControl(position='topright'))
m.add_control(LayersControl())

m


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

In [89]:
#coords

In [90]:
#long

In [None]:
from ipyleaflet import (
    Map, basemaps, basemap_to_tiles,
    WMSLayer, LayersControl
)

mp = Map(center=(50, 354), zoom=4)
nasa_layer = basemap_to_tiles(basemaps.NASAGIBS.ModisTerraTrueColorCR, "2018-03-30")
mp.add_layer(nasa_layer)

wms = WMSLayer(
    url="https://demo.boundlessgeo.com/geoserver/ows?",
    layers="nasa:bluemarble",
    name="nasa:bluemarble"
)
mp.add_layer(wms)
mp.add_control(LayersControl())

mp

In [21]:
undergrad_locs

['Providence, RI, USA',
 'Rovereto, Trentino, Italy',
 'Wollongong, NSW, Australia',
 'Hong Kong',
 'Ann Arbor, MI, USA',
 'Lawrence, KS, United States',
 'Buenos Aires, Argentina',
 'Moscow/Russia',
 'Mumbai, India',
 'Palo Alto, CA, USA',
 'West Lafayette, IN, United States',
 'Auburn, AL, USA',
 'Taipei, Taiwan',
 'Roorkee, India',
 'Annandale-on-Hudson, NY, USA',
 'Seoul, South Korea',
 'Tijuana, Baja California, Mexico',
 'Leiden, the Netherlands',
 'Tulsa, OK, USA',
 'Lubbock, Texas, USA',
 'Los Angeles, California, USA',
 'Orange, CA, USA',
 'Rio de Janeiro, Rio de Janeiro, Brazil',
 'Ottawa, Canada',
 'Jerusalem, Israel',
 'Chapel Hill, North Carolina, USA',
 'Novosibirsk, Russia',
 'New Orleans, LA, USA',
 'Saratoga Springs, NY, USA',
 'Tehran, Tehran, Iran',
 'New York, NY, USA',
 'colorado springs, CO, US',
 'Toronto, Ontario, Canada',
 'Jerusalem, Israel',
 'Rochester, NY . USA',
 'Claremont, CA, USA',
 'London, UK']