# Neuromap Project Neurohackademy 2019

In [32]:
# install libraries/ set up script
import pandas as pd
import numpy as np
import folium
import geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="neurohackmap")
#! pip install ipyleaflet 
import ipyleaflet as lf
from ipyleaflet import (Map, basemaps, basemap_to_tiles, Circle, LayersControl, FullScreenControl)

In [2]:
# sets the defaults for viewing the dataframe in the jupyter notebook
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
# define missing values
missing_values = ["  ", "-"]

#load in the questionnaire response data from a csv file 
df = pd.read_csv('NeuroMap 2.csv', na_values = missing_values, encoding='latin-1')

#optionally view the data
#df.head(20)

In [4]:
## data cleaning steps 
## rename column questions to shorter
df.columns = ['timestamp','consent','name','birthplace','birth_year', 'gender',
              'undergrad_deg','undergrad_loc',  'undergrad_inst','undergrad_research',
              'ra_qual','ra_lm_loc', 'ra_lm_inst', 'ra_lm_research',
              'masters_qual','masters_loc', 'masters_inst', 'masters_research', 
              'phd_qual','phd_loc', 'phd_inst', 'phd_research',
              'post_doc_qual', 'postdoc_loc' ,'postdoc_inst','postdoc_research', 
            'faculty_qual', 'faculty_loc', 'faculty_inst', 'faculty_research',
              'google_scholar']

## during code development, irregularities in input style that caused geopy errors were identified.
## these have been manually corrected as per below
replace_dict_birth = {
    "Tijuana, Baja California, MÌ©xico": "Tijuana, Baja California, Mexico",
    "Ìävreux, Normandy, France": "Normandy, France"
}
df['birthplace'] = df['birthplace'].replace(replace_dict_birth) 


replace_dict_undergrad = {
    'Tijuana, Baja California, MÌ©xico': 'Tijuana, Baja California, Mexico',
    'Raleigh & Chapel Hill, North Carolina, USA': 'Chapel Hill, North Carolina, USA'
}
df['undergrad_loc'] = df['undergrad_loc'].replace(replace_dict_undergrad) 

replace_dict_research = {
    'Central Institute of Chemistry and Mechanics': 'Nagatinskaya, Moscow, Russia',
    'Boston, MA & San Francisco, CA': 'San Francisco, CA'    
}

df['ra_lm_loc'] = df['ra_lm_loc'].replace(replace_dict_research)

replace_dict_phd = {
    'Rio de Janeiro, Brazil AND Montreal, Canada': 'Rio de Janeiro, Brazil'
}
df['phd_loc'] = df['phd_loc'].replace(replace_dict_phd) 


replace_dict_masters = {
    'Neuroscience': 'Strasbourg, Alsace, France',
    'Mexico city, MÌ©xico': 'Tijuana, Baja California, Mexico'
}
df['masters_loc'] = df['masters_loc'].replace(replace_dict_masters)


replace_dict_postdoc = {
    'New Haven, CT, United States AND Durham, NC, United States': 'New Haven, CT, United States',
    '1) York, UK; 2) Sussex, UK': 'York, UK',
    'Seattle, WA and Cambridge, MA':'Seattle, WA'
}
df['postdoc_loc'] = df['postdoc_loc'].replace(replace_dict_postdoc)


In [6]:
# The following code uses geopy to city, state, country information to get latitude and longitude
# This is done for birth_location, undergrad_location, research position location, PhD location and 
# post doc location

# Birth_location is done first
# First create empty structures for your lat and long values
lat_list = []
long_list = []

#loop through birthplace locations
for birth_location in df['birthplace']:
    #print(location_1)
    
    if pd.isnull(birth_location):
        lat = None
        long = None #this section is to manage missing data
    else:
        location_1 = geolocator.geocode(birth_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed") # this section is to manage data in wrong format
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    #print((lat,long,birth_location))
        
#Saving lat and long in separate columns in the dataframe    
df['HometownLatitude'] = lat_list
df['HometownLatitude'] = df['HometownLatitude'].astype('float')

df['HometownLongitude'] = long_list
df['HometownLongitude'] = df['HometownLongitude'].astype('float')
df.head(20)

#Get the column with hometown location
locs = df.loc[:,"birthplace"]

#Plot hometown locations on the map!
birth_map = folium.Map()

#Loop through locations and add the markers on the map
for home_location in range(len(locs)): 
    folium.Marker([lat_list[home_location], long_list[home_location]], popup=locs[home_location]).add_to(birth_map)
    
# output the map
#birth_map


(43.5073034, 16.4414229, 'Split, Croatia')
(45.4384958, 10.9924122, 'Verona, Veneto, Italy')
(52.2385239, -0.902493, 'Northampton, United Kingdom')
(22.2793278, 114.1628131, 'Hong Kong')
(41.8755616, -87.6244212, 'Chicago, IL, USA')
(37.6922361, -97.3375448, 'Wichita, KS, United States')
(-34.6075616, -58.437076, 'Buenos Aires, Argentina')
(58.014965, 56.246723, 'Perm/Russia')
(18.9387711, 72.8353355, 'Mumbai, India')
(39.1014537, -84.5124602, 'Cincinnati, Ohio')
(40.4167022, -86.8752869, 'Lafayette, IN, United States')
(39.0840054, -77.1527573, 'Rockville, MD, USA')
(25.0375198, 121.5636796, 'Taipei, Taiwan')
(16.5087586, 80.6185102, 'Vijayawada, India')
(40.6501038, -73.9495823, 'Brooklyn, NY')
(35.1799528, 129.0752365, 'Busan, South Korea')
(32.5010188, -116.9646629, 'Tijuana, Baja California, Mexico')
(31.2525238, 34.7905787, "Be'er Sheva, Israel")
(52.1518157, 4.48110886662043, 'Leiden, the Netherlands')
(38.7604815, -92.5617875, 'Missouri, USA')
(33.9137085, -98.4933873, 'Wichita

In [7]:
#Check your dataframe  
#df.head(60)

Unnamed: 0,timestamp,consent,name,birthplace,birth_year,gender,undergrad_deg,undergrad_loc,undergrad_inst,undergrad_research,ra_qual,ra_lm_loc,ra_lm_inst,ra_lm_research,masters_qual,masters_loc,masters_inst,masters_research,phd_qual,phd_loc,phd_inst,phd_research,post_doc_qual,postdoc_loc,postdoc_inst,postdoc_research,faculty_qual,faculty_loc,faculty_inst,faculty_research,google_scholar,HometownLatitude,HometownLongitude
0,2019/08/06 12:34:12 PM MDT,Yes,Antonija Kolobaric,"Split, Croatia",1994.0,Female,Experimental Psychology,"Providence, RI, USA",Brown University,"Theory of mind, suicide, development, adolesce...",Yes,"New Haven, CT, USA",Yale University,"early course psychosis, thalamus, mental illne...",No,,,,Yes,"Pittsburgh, PA, USA",University of Pittsburgh,"development, psychosis, fMRI, thalamus",No,,,,No,,,,,43.507303,16.441423
1,2019/08/06 12:37:29 PM MDT,Yes,Giacomo,"Verona, Veneto, Italy",1988.0,Male,Cognitive Psychology,"Rovereto, Trentino, Italy",Unversity of Trento,"healthy, behavioral tracking with IR markers",No,,,,Yes,"Rovereto, Trentino, Italy",University of Trento,"healthy, eyetracking on pointlight displays",Yes,"Rovereto, Trentino, Italy",University of Trento,"healthy, fMRI, movement decoding",Yes,"London, Ontario, Canada",Western University,"healthy, fMRI, skill learning, motor represent...",No,,,,https://scholar.google.com/citations?user=IYbg...,45.438496,10.992412
2,2019/08/06 12:38:08 PM MDT,Yes,Hannah Coyle,"Northampton, United Kingdom",1991.0,Female,Psychology,"Wollongong, NSW, Australia","University of Wollongong, Australia","cannabis use, EEG, working memory",Yes,"Melbourne, Victoria, Australia",Monash University,"depression, TMS, DLPFC",No,,,,Yes,"Melbourne, Victoria, Australia",Monash University,"mild traumatic brain injury, TMS-EEG, neuropsy...",No,,,,No,,,,https://scholar.google.com/citations?view_op=l...,52.238524,-0.902493
3,2019/08/06 12:39:18 PM MDT,Yes,JKL,Hong Kong,1986.0,Male,Psychology,Hong Kong,University of Hong Kong,"sexual health, clinical, survey",Yes,"Birmingham, UK",University of Birmingham,"stroke, cognitive assessment battery",Yes,"Birmingham, UK",University of Birmingham,"developmental disabilities, eating, cognitive ...",Yes,"Birmingham, UK",University of Birmingham,"temporal, parietal areas; lesion analysis, fmr...",Yes,"Reading, UK",University of Reading,"reward network, striatum, medial frontal areas...",No,,,,,22.279328,114.162813
4,2019/08/06 12:51:26 PM MDT,Yes,Liz B,"Chicago, IL, USA",1992.0,Female,Communications & Psychology,"Ann Arbor, MI, USA",University of Michigan,"Communication neuroscience, social influence, ...",Yes,"Philadelphia, PA, USA",University of Pennsylvania,"communication neuroscience, smokers, fMRI, tD...",No,,,,Yes,"Philadelphia, PA, USA",Temple University,"adolescent decision-making, fMRI, reward proce...",No,,,,No,,,,,41.875562,-87.624421
5,2019/08/06 12:53:20 PM MDT,Yes,Kendra Seaman,"Wichita, KS, United States",1979.0,Female,Biology and Psychology,"Lawrence, KS, United States",University of Kansas,,No,,,,Yes,"Washington, DC, United States",The Catholic University of America,Mental models in science education,Yes,"Washington, DC",The Catholic University of America,aging and experiential decision making,Yes,"New Haven, CT, United States",Yale University and Duke University,Neural bases of Aging and Decision making,Yes,"Dallas, TX, United States",University of Texas at Dallas,"Aging, Decision making, Learning",,37.692236,-97.337545
6,2019/08/06 12:53:47 PM MDT,Yes,Martina,"Buenos Aires, Argentina",2019.0,Female,Psychology,"Buenos Aires, Argentina",Favaloro University,"psychology, cognitive-neuroscience, eeg",Yes,"Buenos Aires, Argentina",INECO and University of San Martin,"fmri, complex-systems, eeg, language-processing",Yes,"Buenos Aires, Argentina",Favaloro University,"eeg, language-processing, bilingualism",Yes,"Frankfurt, Germany",Max-Planck-Institute AE,"MEG, fMRI, predictive-processing, schema proce...",No,,,,No,,,,https://scholar.google.com/citations?user=X_n9...,-34.607562,-58.437076
7,2019/08/06 12:54:19 PM MDT,Yes,Sofya,Perm/Russia,1988.0,Female,Applied Maths and Physics,Moscow/Russia,Moscow Institite of Physics and Technology,"brain-computer interface, EEG, Matlab, motor i...",Yes,"Nagatinskaya, Moscow, Russia",research assistant,"brain-computer interface , EEG, Matlab, motor ...",Yes,"Strasbourg, Alsace, France",University of Strasbourg,"schizophrenia, animal model, thalamo-cortical ...",Yes,Paris/France,Paris Descartes,"neuroimaging, brain development, myelin, pytho...",Yes,"Perm, Russia",Higher School of Economics,"neuroimaging, stroke, python, classifications",Yes,"Perm, Russia",Higher School of Economics,"diffusion MRI, MRI relaxometry, TMS, EEG, stro...",,58.014965,56.246723
8,2019/08/06 12:54:41 PM MDT,Yes,Aarti Nair,"Mumbai, India",1984.0,Female,BA in Psychology and Anthropology,"Mumbai, India",St. Xaviers College,Globalization and Gender Bias,Yes,"Philadelphia, USA",Childrens hospital of Philadelphia (CHOP),"Autism, DTI, imaging, neuropsychology",Yes,"Charlotte, North Carolina",UNC Charlotte,"EEG, PTSD, memory suppression",Yes,"San Diego, CA, USA",UCSD,"Autism, multimodal imaging, thalamus, neuropsy...",Yes,"Los Angeles, CA, USA",UCLA,"Autism, psychosis, multimodal imaging, social ...",No,,,,https://scholar.google.com/citations?user=AAqZ...,18.938771,72.835335
9,2019/08/06 12:55:57 PM MDT,Yes,Mai Nguyen,"Cincinnati, Ohio",1990.0,Female,"BA Psychology, BS Biology","Palo Alto, CA, USA",Stanford,"vision, retinotopy, eccentricity bias, face pe...",Yes,"New York City, New York, USA",New York University,"vision, retinotopy, broadband gamma, human fMRI",Yes,"Cambridge, UK",University of Cambridge,"vision, autism, children, behavioral",Yes,"Princeton, NJ, USA",Princeton University,"communication, naturalistic, human fMRI, neur...",No,,,,No,,,,,39.101454,-84.51246


In [8]:
# Undergrad location is done second
# First create empty structures for your lat and long values
lat_list = []
long_list = []

#loop through undergrad locations
for undergrad_location in df['undergrad_loc']:
    #print(location_1)
    
    if pd.isnull(undergrad_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(undergrad_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    #print((lat,long,undergrad_location))
        
# #Saving lat and long in separate columns in the original dataframe    
df['UndergradLatitude' ] = lat_list
df['UndergradLatitude'] = df['UndergradLatitude'].astype('float')

df['UndergradLongitude'] = long_list
df['UndergradLongitude'] = df['UndergradLongitude'].astype('float')

# because there are missing values which mean the mapping won't work you need to create a subset df

#create a subset of df
df_undergrad = df.loc[:,"UndergradLatitude":"UndergradLongitude"] 
df_undergrad = df_undergrad.dropna()

# get undergrad locations 
undergrad_locs = df.loc[:,"undergrad_loc"]
#print(df_undergrad)

#modify lat_list and long_list to remove missing values
lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
undergrad_locs = [x for x in undergrad_locs if pd.notna(x)]

# check formats are correct with the NA values removed
print(lat_list)
print(long_list)
print(undergrad_locs)
type(undergrad_locs)

# undergrad locations plotting 
undergrad_map = folium.Map()
#Loop through locations and add the markers on the map
for undergrad_location in range(len(undergrad_locs)): 
    folium.Marker([lat_list[undergrad_location], long_list[undergrad_location]],
                  popup=undergrad_locs[undergrad_location]).add_to(undergrad_map)
    
#display map
#undergrad_map

(41.8239891, -71.4128343, 'Providence, RI, USA')
(45.886548, 11.0452369, 'Rovereto, Trentino, Italy')
(-34.4243941, 150.89385, 'Wollongong, NSW, Australia')
(22.2793278, 114.1628131, 'Hong Kong')
(42.2681569, -83.7312291, 'Ann Arbor, MI, USA')
(38.9719384, -95.2359496, 'Lawrence, KS, United States')
(-34.6075616, -58.437076, 'Buenos Aires, Argentina')
(55.7504461, 37.6174943, 'Moscow/Russia')
(18.9387711, 72.8353355, 'Mumbai, India')
(37.4443293, -122.1598465, 'Palo Alto, CA, USA')
(40.4258686, -86.9080655, 'West Lafayette, IN, United States')
(32.527002, -85.4367484053398, 'Auburn, AL, USA')
(25.0375198, 121.5636796, 'Taipei, Taiwan')
(29.8693496, 77.8902124, 'Roorkee, India')
(42.0128695, -73.9081901, 'Annandale-on-Hudson, NY, USA')
(37.5666791, 126.9782914, 'Seoul, South Korea')
(32.5010188, -116.9646629, 'Tijuana, Baja California, Mexico')
(None, None, nan)
(52.1518157, 4.48110886662043, 'Leiden, the Netherlands')
(36.1556805, -95.9929113, 'Tulsa, OK, USA')
(33.5778631, -101.855166

In [9]:
# Research assistant and lab manager position locations 
# First create empty structures for your lat and long values
lat_list = []
long_list = []

# Loop through rows to get the longitude and latitude of the RA_locations (be aware of missing data)
for research_location in df['ra_lm_loc']:
    #print(location_1)
    
    if pd.isnull(research_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(research_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    #print((lat,long,research_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['RA_LM_Latitude' ] = lat_list
df['RA_LM_Latitude'] = df['RA_LM_Latitude'].astype('float')

df['RA_LM_Longitude'] = long_list
df['RA_LM_Longitude'] = df['RA_LM_Longitude'].astype('float')
#df.head(40)

# create a map with research position locations
df_research = df.loc[:,"RA_LM_Latitude":"RA_LM_Longitude"] #create a subset of df to deal with the na problem
df_research = df_research.dropna()
research_locs = df.loc[:,"ra_lm_loc"]
#print(df_undergrad)

lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
research_locs = [x for x in research_locs if pd.notna(x)]

# check formats are correct with the NA values removed
print(lat_list)
print(long_list)
print(research_locs)
type(research_locs)

# research locations plotting 
research_map = folium.Map()
#Loop through locations and add the markers on the map
for research_location in range(len(research_locs)): 
    folium.Marker([lat_list[research_location], long_list[research_location]],
                  popup=research_locs[research_location]).add_to(research_map)

#display map
#research_map

(41.3082138, -72.9250518, 'New Haven, CT, USA')
(None, None, nan)
(-37.8142176, 144.9631608, 'Melbourne, Victoria, Australia')
(52.4775396, -1.894053, 'Birmingham, UK')
(39.9527237, -75.1635262, 'Philadelphia, PA, USA')
(None, None, nan)
(-34.6075616, -58.437076, 'Buenos Aires, Argentina')
(55.6828925, 37.6223775, 'Nagatinskaya, Moscow, Russia')
(39.9527237, -75.1635262, 'Philadelphia, USA')
(40.7127281, -74.0060152, 'New York City, New York, USA')
(None, None, nan)
(None, None, nan)
(25.0375198, 121.5636796, 'Taipei, Taiwan')
(None, None, nan)
(40.7127281, -74.0060152, 'New York, NY')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(37.4443293, -122.1598465, 'Palo Alto, California, USA')
(45.5202471, -122.6741949, 'Portland, OR, USA')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(37.8708393, -122.2728639, 'Berkeley, CA, USA')
(37.7792808, -122.4192363, 'San Francisco, CA, USA')
(35

In [10]:
# PhD position locations 
# create empty structures
lat_list = []
long_list = []
for phd_location in df['phd_loc']:
    #print(location_1)
    
# Loop through rows to get the longitude and latitude of the RA_locations (be aware of missing data)    
    if pd.isnull(phd_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(phd_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    #print((lat,long,phd_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['PHD_Latitude' ] = lat_list
df['PHD_Latitude'] = df['PHD_Latitude'].astype('float')

df['PHD_Longitude'] = long_list
df['PHD_Longitude'] = df['PHD_Longitude'].astype('float')

#create a subset of df to deal with the na problem
df_phd = df.loc[:,"PHD_Longitude":"PHD_Latitude"] 
df_phd = df_phd.dropna()
phd_locs = df.loc[:,"phd_loc"]
#print(df_undergrad)

#remove NA values
lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
phd_locs = [x for x in phd_locs if pd.notna(x)]

# research locations plotting 
phd_map = folium.Map()
#Loop through locations and add the markers on the map
for phd_location in range(len(phd_locs)): 
    folium.Marker([lat_list[phd_location], long_list[phd_location]],
                  popup=phd_locs[phd_location]).add_to(phd_map)

#display map    
#phd_map

(40.4416941, -79.9900861, 'Pittsburgh, PA, USA')
(45.886548, 11.0452369, 'Rovereto, Trentino, Italy')
(-37.8142176, 144.9631608, 'Melbourne, Victoria, Australia')
(52.4775396, -1.894053, 'Birmingham, UK')
(39.9527237, -75.1635262, 'Philadelphia, PA, USA')
(38.8950092, -77.0365625, 'Washington, DC')
(50.1106444, 8.6820917, 'Frankfurt, Germany')
(48.8566101, 2.3514992, 'Paris/France')
(32.7174209, -117.1627714, 'San Diego, CA, USA')
(40.3492744, -74.6592958, 'Princeton, NJ, USA')
(47.6038321, -122.3300624, 'Seattle, WA, United States')
(25.7742658, -80.1936589, 'Miami, FL, USA')
(53.9590555, -1.0815361, 'York, UK')
(53.550341, 10.000654, 'Hamburg, Germany')
(40.4416941, -79.9900861, 'Pittsburgh, PA')
(37.5666791, 126.9782914, 'Seoul, South Korea')
(None, None, nan)
(51.5073219, -0.1276474, 'London, United Kingdom')
(52.1518157, 4.48110886662043, 'Leiden, the Netherlands')
(42.3602534, -71.0582912, 'Boston, MA, USA')
(34.4221319, -119.7026673, 'Santa Barbara, California, USA')
(43.703622,

In [11]:
# Masters locations 
lat_list = []
long_list = []
# Loop through rows to get the longitude and latitude of the PostDoc_locations (be aware of missing data)
for masters_location in df['masters_loc']:
   
    if pd.isnull(masters_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(masters_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    #print((lat,long,masters_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['Masters_Latitude' ] = lat_list
df['Masters_Latitude'] = df['Masters_Latitude'].astype('float')

df['Masters_Longitude'] = long_list
df['Masters_Longitude'] = df['Masters_Longitude'].astype('float')
#df.head(40)

# create a map with research position locations
df_masters = df.loc[:,"Masters_Longitude":"Masters_Latitude"] #create a subset of df to deal with the na problem
df_masters = df_masters.dropna()
masters_locs = df.loc[:,"masters_loc"]
#print(df_undergrad)

lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
masters_locs = [x for x in masters_locs if pd.notna(x)]

# research locations plotting 
masters_map = folium.Map()
#Loop through locations and add the markers on the map
for masters_location in range(len(masters_locs)): 
    folium.Marker([lat_list[masters_location], long_list[masters_location]],
                  popup=masters_locs[masters_location]).add_to(masters_map)

#display map    
#masters_map

(None, None, nan)
(45.886548, 11.0452369, 'Rovereto, Trentino, Italy')
(None, None, nan)
(52.4775396, -1.894053, 'Birmingham, UK')
(None, None, nan)
(38.8950092, -77.0365625, 'Washington, DC, United States')
(-34.6075616, -58.437076, 'Buenos Aires, Argentina')
(48.584614, 7.7507127, 'Strasbourg, Alsace, France')
(35.2270869, -80.8431268, 'Charlotte, North Carolina')
(52.2034823, 0.1235817, 'Cambridge, UK')
(None, None, nan)
(25.7742658, -80.1936589, 'Miami, FL, USA')
(53.9590555, -1.0815361, 'York, UK')
(55.9521476, -3.1889908, 'Edinburgh, United Kingdom')
(None, None, nan)
(None, None, nan)
(32.5010188, -116.9646629, 'Tijuana, Baja California, Mexico')
(32.0804808, 34.7805274, 'Tel Aviv, Israel')
(52.1518157, 4.48110886662043, 'Leiden, the Netherlands')
(42.3602534, -71.0582912, 'Boston, MA, USA')
(33.5778631, -101.8551665, 'Lubbock, Texas, USA')
(None, None, nan)
(45.5202471, -122.6741949, 'Portland, OR, USA')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(4

In [12]:
# Post Doc locations 
lat_list = []
long_list = []
# Loop through rows to get the longitude and latitude of the PostDoc_locations (be aware of missing data)
for postdoc_location in df['postdoc_loc']:
   
    if pd.isnull(postdoc_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(postdoc_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    #print((lat,long,postdoc_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['PostDoc_Latitude' ] = lat_list
df['PostDoc_Latitude'] = df['PostDoc_Latitude'].astype('float')

df['PostDoc_Longitude'] = long_list
df['PostDoc_Longitude'] = df['PostDoc_Longitude'].astype('float')
#df.head(40)

# create a map with research position locations
df_postdoc = df.loc[:,"PostDoc_Longitude":"PostDoc_Latitude"] #create a subset of df to deal with the na problem
df_postdoc = df_postdoc.dropna()
postdoc_locs = df.loc[:,"postdoc_loc"]
#print(df_undergrad)

lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
postdoc_locs = [x for x in postdoc_locs if pd.notna(x)]

# research locations plotting 
postdoc_map = folium.Map()
#Loop through locations and add the markers on the map
for postdoc_location in range(len(postdoc_locs)): 
    folium.Marker([lat_list[postdoc_location], long_list[postdoc_location]],
                  popup=postdoc_locs[postdoc_location]).add_to(postdoc_map)

#display map    
#postdoc_map


(None, None, nan)
(42.9537654, -81.2291529, 'London, Ontario, Canada')
(None, None, nan)
(51.45149525, -0.983634249936538, 'Reading, UK')
(None, None, nan)
(41.3082138, -72.9250518, 'New Haven, CT, United States')
(None, None, nan)
(58.014965, 56.246723, 'Perm, Russia')
(34.0536909, -118.2427666, 'Los Angeles, CA, USA')
(None, None, nan)
(39.9527237, -75.1635262, 'Philadelphia, PA, United States')
(None, None, nan)
(53.9590555, -1.0815361, 'York, UK')
(None, None, nan)
(41.3082138, -72.9250518, 'New Haven, CT')
(38.545379, -121.7445835, 'Davis, California, US')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(44.9706705, -93.268217735811, 'Minneapolis, Minnesota, USA')
(40.0149856, -105.2705456, 'Boulder, CO, USA')
(None, None, nan)
(None, None, nan)
(42.9537654, -81.2291529, 'London, Ontario, Canada')
(37.8708393, -122.2728639, 'Berkeley, CA, USA')
(47.6038321, -122.3300624, 'Seattle, WA')
(37.8708393, -122.

In [13]:
# Create a new dataframe with just lat and long info 
loc_df = df.loc[:,'HometownLatitude':'PostDoc_Longitude']
# View the dataframe 
#loc_df.head(20)

Unnamed: 0,HometownLatitude,HometownLongitude,UndergradLatitude,UndergradLongitude,RA_LM_Latitude,RA_LM_Longitude,PHD_Latitude,PHD_Longitude,Masters_Latitude,Masters_Longitude,PostDoc_Latitude,PostDoc_Longitude
0,43.507303,16.441423,41.823989,-71.412834,41.308214,-72.925052,40.441694,-79.990086,,,,
1,45.438496,10.992412,45.886548,11.045237,,,45.886548,11.045237,45.886548,11.045237,42.953765,-81.229153
2,52.238524,-0.902493,-34.424394,150.89385,-37.814218,144.963161,-37.814218,144.963161,,,,
3,22.279328,114.162813,22.279328,114.162813,52.47754,-1.894053,52.47754,-1.894053,52.47754,-1.894053,51.451495,-0.983634
4,41.875562,-87.624421,42.268157,-83.731229,39.952724,-75.163526,39.952724,-75.163526,,,,
5,37.692236,-97.337545,38.971938,-95.23595,,,38.895009,-77.036563,38.895009,-77.036563,41.308214,-72.925052
6,-34.607562,-58.437076,-34.607562,-58.437076,-34.607562,-58.437076,50.110644,8.682092,-34.607562,-58.437076,,
7,58.014965,56.246723,55.750446,37.617494,55.682893,37.622377,48.85661,2.351499,48.584614,7.750713,58.014965,56.246723
8,18.938771,72.835335,18.938771,72.835335,39.952724,-75.163526,32.717421,-117.162771,35.227087,-80.843127,34.053691,-118.242767
9,39.101454,-84.51246,37.444329,-122.159847,40.712728,-74.006015,40.349274,-74.659296,52.203482,0.123582,,


In [14]:
# Faculty locations 
lat_list = []
long_list = []
# Loop through rows to get the longitude and latitude of the PostDoc_locations (be aware of missing data)
for faculty_location in df['faculty_loc']:
   
    if pd.isnull(faculty_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(faculty_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    #print((lat,long,faculty_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['Faculty_Latitude' ] = lat_list
df['Faculty_Latitude'] = df['Faculty_Latitude'].astype('float')

df['Faculty_Longitude'] = long_list
df['Faculty_Longitude'] = df['Faculty_Longitude'].astype('float')
#df.head(40)

# create a map with research position locations
df_faculty = df.loc[:,"Faculty_Longitude":"Faculty_Latitude"] #create a subset of df to deal with the na problem
df_faculty = df_faculty.dropna()
faculty_locs = df.loc[:,"faculty_loc"]
#print(df_undergrad)

lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
faculty_locs = [x for x in faculty_locs if pd.notna(x)]

# research locations plotting 
faculty_map = folium.Map()
#Loop through locations and add the markers on the map
for faculty_location in range(len(faculty_locs)): 
    folium.Marker([lat_list[faculty_location], long_list[faculty_location]],
                  popup=faculty_locs[faculty_location]).add_to(faculty_map)

#display map    
#faculty_map


(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(32.7762719, -96.7968559, 'Dallas, TX, United States')
(None, None, nan)
(58.014965, 56.246723, 'Perm, Russia')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(35.5306348, -92.7907215, 'Austin, TX, USA')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(47.6038321, -122.3300624, 'Seattle, WA, USA')
(42.2625932, -71.8022934, 'Worcester, MA USA')
(35.5306348, -92.7907215, 'Austin, TX, USA')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(51.4893335, -0.144055084527687, 'London, UK')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, No

In [43]:
m = Map(zoom=1, fitWorld=True)
rad = 100

# hometown
hometown = []
for ii in range(len(df)):
   coords = df.HometownLatitude[ii],df.HometownLongitude[ii]
   c = Circle()
   c.radius = rad
   c.location = coords
   c.color = 'blue'
   c.fill = True
   c.name = "Hometown location"
   hometown.append(c)
ht = lf.LayerGroup(name='hometown', layers=hometown)
m.add_layer(ht)

# undergrad
undergrad = []
for ii in range(len(df)):
   coords = df.UndergradLatitude[ii],df.UndergradLongitude[ii]
   c = Circle()
   c.radius = rad
   c.location = coords
   c.color = 'green'
   c.fill = True
   c.name = "Undergrad location"
   undergrad.append(c)
ug = lf.LayerGroup(name='undergrad', layers=undergrad)
m.add_layer(ug)

# research positions
research = []
for ii in range(len(df)):
   coords = df.RA_LM_Latitude[ii],df.RA_LM_Longitude[ii]
   c = Circle()
   c.radius = rad
   c.location = coords
   c.color = 'orange'
   c.fill = True
   c.name = "RA location"
   research.append(c)
ra = lf.LayerGroup(name='research', layers=research)
m.add_layer(ra)

# master
master = []
for ii in range(len(df)):
   coords = df.Masters_Latitude[ii],df.Masters_Longitude[ii]
   c = Circle()
   c.radius = rad
   c.location = coords
   c.color = 'black'
   c.fill = True
   c.name = "Master location"
   master.append(c)
ms = lf.LayerGroup(name='master', layers=master)
m.add_layer(ms)

# phd
phd = []
for ii in range(len(df)):
   coords = df.PHD_Latitude[ii],df.PHD_Longitude[ii]
   c = Circle()
   c.radius = rad
   c.location = coords
   c.color = 'purple'
   c.fill = True
   c.name = "PhD location"
   phd.append(c)
ph = lf.LayerGroup(name='phd', layers=phd)
m.add_layer(ph)

# postdoc
postdoc = []
for ii in range(len(df)):
   coords = df.PostDoc_Latitude[ii],df.PostDoc_Longitude[ii]
   c = Circle()
   c.radius = rad
   c.location = coords
   c.color = 'red'
   c.fill = True
   c.name = "Postdoc location"
   postdoc.append(c)
psd = lf.LayerGroup(name='postdoc', layers=postdoc)
m.add_layer(psd)

# faculty
faculty = []
for ii in range(len(df)):
   coords = df.Faculty_Latitude[ii],df.Faculty_Longitude[ii]
   c = Circle()
   c.radius = rad
   c.location = coords
   c.color = 'white'
   c.fill = True
   c.name = "Faculty location"
   faculty.append(c)
fc = lf.LayerGroup(name='faculty', layers=faculty)
m.add_layer(fc)

m.add_control(LayersControl())
m

AttributeError: 'Map' object has no attribute '_build_map'