# Neuromap Project Neurohackademy 2019

In [1]:
# install libraries/ set up script
import pandas as pd
import numpy as np
import folium
import geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="neuromap")

In [2]:
# sets the defaults for viewing the dataframe
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
#identify missing values
missing_values = ["  ", "-"]
df = pd.read_csv('NeuroMap-38responses.csv', na_values = missing_values, encoding='latin-1')

#view data
#df.head(20)

In [4]:
# #clean data
# # rename column questions to shorter
df.columns = ['timestamp','consent','name','birthplace','birth_year', 'gender',
              'undergrad_deg','undergrad_loc',  'undergrad_inst','undergrad_research',
              'ra_qual','ra_lm_loc', 'ra_lm_inst', 'ra_lm_research',
              'masters_qual','masters_loc', 'masters_inst', 'masters_research', 
              'phd_qual','phd_loc', 'phd_inst', 'phd_research',
              'post_doc_qual', 'postdoc_loc' ,'postdoc_inst','postdoc_research', 
            'faculty_qual', 'faculty_loc', 'faculty_inst', 'faculty_research',
              'google_scholar']

# change white spaces (blanks) to NaN
#df.replace(r'^\s+$', np.nan, regex=True)

replace_dict_undergrad = {
    "Tijuana, Baja California, MÌ©xico": "Tijuana, Baja California, Mexico",
    'Raleigh & Chapel Hill, North Carolina, USA': 'Chapel Hill, North Carolina, USA'
}
df['undergrad_loc'] = df['undergrad_loc'].replace(replace_dict_undergrad) 

replace_dict_research = {
    'Central Institute of Chemistry and Mechanics': 'Nagatinskaya, Moscow, Russia'
}
df['ra_lm_loc'] = df['ra_lm_loc'].replace(replace_dict_research)

replace_dict_phd = {
    'Rio de Janeiro, Brazil AND Montreal, Canada': 'Rio de Janeiro, Brazil'
}
df['phd_loc'] = df['phd_loc'].replace(replace_dict_phd) 

In [5]:
#Loop through rows to get the longitude and latitude of the hometowns
lat=[]
long=[]
for home_location in df.loc[:,"birthplace"]:
    location_1 = geolocator.geocode(home_location, timeout=10)
    #print((location_1.latitude, location_1.longitude))
    
    lat.append(location_1.latitude)
    long.append(location_1.longitude)

#Saving lat and long in separate columns in the dataframe    
df['HometownLatitude'] = lat
df['HometownLatitude'] = df['HometownLatitude'].astype('float')

df['HometownLongitude'] = long
df['HometownLatitude'] = df['HometownLongitude'].astype('float')
#df.head(20)

In [6]:
print(home_location)

Trento, Trentino, Italy


In [7]:
#print(locs)

In [8]:
#Get the column with hometown location
locs = df.loc[:,"birthplace"]

#Plot hometown locations on the map!
birth_map = folium.Map()

#(location=[location_1.latitude, location_1.longitude])

#Loop through locations and add the markers on the map
for home_location in range(len(locs)): 
    folium.Marker([lat[home_location], long[home_location]], popup=locs[home_location]).add_to(birth_map)

birth_map

In [9]:
#Try plotting the same but with different markers (circles!)
locs = df.loc[:,"birthplace"]
#Plot hometown locations on the map!
m = folium.Map()
#Loop through locations and add the markers on the map
for home_location in range(len(locs)):
   folium.Circle(
   radius=300,
   location=[lat[home_location], long[home_location]],
   popup=locs[home_location],
   color='crimson',
   fill=False,
).add_to(m)
m

In [10]:
#Loop through rows to get the longitude and latitude of the undergrad_cities (be aware of missing data)
lat_list = []
long_list = []
for undergrad_location in df['undergrad_loc']:
    #print(location_1)
    
    if pd.isnull(undergrad_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(undergrad_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    #print((lat,long,location))
        
# #Saving lat and long in separate columns in the dataframe    
df['UndergradLatitude' ] = lat_list
df['UndergradLatitude'] = df['UndergradLatitude'].astype('float')

df['UndergradLongitude'] = long_list
df['UndergradLatitude'] = df['UndergradLongitude'].astype('float')
#df.head(40)

In [11]:
# create a map with undergrad locations
df_undergrad = df.loc[:,"UndergradLatitude":"UndergradLongitude"] #create a subset of df to deal with the na problem
df_undergrad = df_undergrad.dropna()
undergrad_locs = df.loc[:,"undergrad_loc"]
#print(df_undergrad)

if None in lat_list: lat_list.remove(None)
if None in long_list: long_list.remove(None)
undergrad_locs = [x for x in undergrad_locs if pd.notna(x)]

In [12]:
# check formats are correct with the NA values removed
print(lat_list)
print(long_list)
print(undergrad_locs)
type(undergrad_locs)

[41.8239891, 45.886548, -34.4243941, 22.2793278, 42.2681569, 38.9719384, -34.6075616, 55.7504461, 18.9387711, 37.4443293, 40.4258686, 32.527002, 25.0375198, 29.8693496, 42.0128695, 37.5666791, 32.5010188, 52.1518157, 36.1556805, 33.5778631, 34.0536909, 33.7872568, -22.9110137, 45.421106, 31.778345, 35.9131542, 55.0282171, 29.9499323, 43.0821793, 35.7006177, 40.7127281, 38.8339578, 43.653963, 31.778345, 43.157285, 34.0966764, 51.4893335]
[-71.4128343, 11.0452369, 150.89385, 114.1628131, -83.7312291, -95.2359496, -58.437076, 37.6174943, 72.8353355, -122.1598465, -86.9080655, -85.4367484053398, 121.5636796, 77.8902124, -73.9081901, 126.9782914, -116.9646629, 4.48110886662043, -95.9929113, -101.8551665, -118.2427666, -117.8503088, -43.2093727, -75.690308, 35.2250786, -79.05578, 82.9234509, -90.0701156, -73.7853915, 51.4013785, -74.0060152, -104.8253485, -79.387207, 35.2250786, -77.615214, -117.7197785, -0.144055084527687]
['Providence, RI, USA', 'Rovereto, Trentino, Italy', 'Wollongong, NS

list

In [13]:
# undergrad locations plotting 
undergrad_map = folium.Map()
#Loop through locations and add the markers on the map
for undergrad_location in range(len(undergrad_locs)): 
    folium.Marker([lat_list[undergrad_location], long_list[undergrad_location]], popup=undergrad_locs[undergrad_location]).add_to(undergrad_map)
    
#display map
undergrad_map

In [14]:
# create RA locations 
# Loop through rows to get the longitude and latitude of the RA_locations (be aware of missing data)
lat_list = []
long_list = []
for research_location in df['ra_lm_loc']:
    #print(location_1)
    
    if pd.isnull(research_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(research_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    print((lat,long,research_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['RA_LM_Latitude' ] = lat_list
df['RA_LM_Latitude'] = df['RA_LM_Latitude'].astype('float')

df['RA_LM_Longitude'] = long_list
df['RA_LM_Latitude'] = df['RA_LM_Longitude'].astype('float')
#df.head(40)


(41.3082138, -72.9250518, 'New Haven, CT, USA')
(None, None, nan)
(-37.8142176, 144.9631608, 'Melbourne, Victoria, Australia')
(52.4775396, -1.894053, 'Birmingham, UK')
(39.9527237, -75.1635262, 'Philadelphia, PA, USA')
(None, None, nan)
(-34.6075616, -58.437076, 'Buenos Aires, Argentina')
(55.6828925, 37.6223775, 'Nagatinskaya, Moscow, Russia')
(39.9527237, -75.1635262, 'Philadelphia, USA')
(40.7127281, -74.0060152, 'New York City, New York, USA')
(None, None, nan)
(None, None, nan)
(25.0375198, 121.5636796, 'Taipei, Taiwan')
(None, None, nan)
(40.7127281, -74.0060152, 'New York, NY')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(37.4443293, -122.1598465, 'Palo Alto, California, USA')
(45.5202471, -122.6741949, 'Portland, OR, USA')
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(None, None, nan)
(37.8708393, -122.2728639, 'Berkeley, CA, USA')
(37.7792808, -122.4192363, 'San Francisco, CA, USA')
(35

In [15]:
# create a map with research position locations
df_research = df.loc[:,"RA_LM_Latitude":"RA_LM_Longitude"] #create a subset of df to deal with the na problem
df_research = df_research.dropna()
research_locs = df.loc[:,"ra_lm_loc"]
#print(df_undergrad)

lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
research_locs = [x for x in research_locs if pd.notna(x)]

In [16]:
# check formats are correct with the NA values removed
print(lat_list)
print(long_list)
print(research_locs)
type(research_locs)

[41.3082138, -37.8142176, 52.4775396, 39.9527237, -34.6075616, 55.6828925, 39.9527237, 40.7127281, 25.0375198, 40.7127281, 37.4443293, 45.5202471, 37.8708393, 37.7792808, 35.7006177, 40.4416941, 38.8950092, 52.5170365, 34.0536909]
[-72.9250518, 144.9631608, -1.894053, -75.1635262, -58.437076, 37.6223775, -75.1635262, -74.0060152, 121.5636796, -74.0060152, -122.1598465, -122.6741949, -122.2728639, -122.4192363, 51.4013785, -79.9900861, -77.0365625, 13.3888599, -118.2427666]
['New Haven, CT, USA', 'Melbourne, Victoria, Australia', 'Birmingham, UK', 'Philadelphia, PA, USA', 'Buenos Aires, Argentina', 'Nagatinskaya, Moscow, Russia', 'Philadelphia, USA', 'New York City, New York, USA', 'Taipei, Taiwan', 'New York, NY', 'Palo Alto, California, USA', 'Portland, OR, USA', 'Berkeley, CA, USA', 'San Francisco, CA, USA', 'Tehran, Tehran, Iran', 'Pittsburgh, PA, USA', 'Washington, DC, US', 'Berlin, Germany', 'Los Angeles, CA, USA']


list

In [17]:
# research locations plotting 
research_map = folium.Map()
#Loop through locations and add the markers on the map
for research_location in range(len(research_locs)): 
    folium.Marker([lat_list[research_location], long_list[research_location]], popup=research_locs[research_location]).add_to(research_map)

#display map
research_map

In [18]:
# create PHD/Doctoral locations 
# Loop through rows to get the longitude and latitude of the RA_locations (be aware of missing data)
lat_list = []
long_list = []
for phd_location in df['phd_loc']:
    #print(location_1)
    
    if pd.isnull(phd_location):
        lat = None
        long = None
    else:
        location_1 = geolocator.geocode(phd_location, timeout=10)
        if location_1 is None:
            raise ValueError("Geocode failed")
        lat = location_1.latitude
        long = location_1.longitude
    
    lat_list.append(lat)
    long_list.append(long)
    print((lat,long,phd_location))
        
# #Saving lat and long in separate columns in the dataframe    
df['PHD_Latitude' ] = lat_list
df['PHD_Latitude'] = df['PHD_Latitude'].astype('float')

df['PHD_Longitude'] = long_list
df['PHD_Latitude'] = df['PHD_Longitude'].astype('float')
#df.head(40)

# create a map with research position locations
df_phd = df.loc[:,"PHD_Longitude":"PHD_Latitude"] #create a subset of df to deal with the na problem
df_phd = df_phd.dropna()
phd_locs = df.loc[:,"phd_loc"]
#print(df_undergrad)

lat_list = list(filter(None, lat_list))
long_list = list(filter(None, long_list))
phd_locs = [x for x in phd_locs if pd.notna(x)]

# research locations plotting 
phd_map = folium.Map()
#Loop through locations and add the markers on the map
for phd_location in range(len(phd_locs)): 
    folium.Marker([lat_list[phd_location], long_list[phd_location]], popup=phd_locs[phd_location]).add_to(phd_map)

#display map    
phd_map


(40.4416941, -79.9900861, 'Pittsburgh, PA, USA')
(45.886548, 11.0452369, 'Rovereto, Trentino, Italy')
(-37.8142176, 144.9631608, 'Melbourne, Victoria, Australia')
(52.4775396, -1.894053, 'Birmingham, UK')
(39.9527237, -75.1635262, 'Philadelphia, PA, USA')
(38.8950092, -77.0365625, 'Washington, DC')
(50.1106444, 8.6820917, 'Frankfurt, Germany')
(48.8566101, 2.3514992, 'Paris/France')
(32.7174209, -117.1627714, 'San Diego, CA, USA')
(40.3492744, -74.6592958, 'Princeton, NJ, USA')
(47.6038321, -122.3300624, 'Seattle, WA, United States')
(25.7742658, -80.1936589, 'Miami, FL, USA')
(53.9590555, -1.0815361, 'York, UK')
(53.550341, 10.000654, 'Hamburg, Germany')
(40.4416941, -79.9900861, 'Pittsburgh, PA')
(37.5666791, 126.9782914, 'Seoul, South Korea')
(None, None, nan)
(51.5073219, -0.1276474, 'London, United Kingdom')
(52.1518157, 4.48110886662043, 'Leiden, the Netherlands')
(42.3602534, -71.0582912, 'Boston, MA, USA')
(34.4221319, -119.7026673, 'Santa Barbara, California, USA')
(43.703622,

In [19]:
# practice plotting with circles 

m = folium.Map(
    location=[-34.4243941, 150.89385],
    zoom_start=13
)

folium.Circle(
    radius=100,
    location=[-34.4243941, 150.89385],
    popup='The Waterfront',
    color='crimson',
    fill=False,
).add_to(m)

folium.CircleMarker(
    location=[-34.4243941, 150.89385],
    radius=50,
    popup='Laurelhurst Park',
    color='#3186cc',
    fill=True,
    fill_color='#3186cc'
).add_to(m)


m

In [20]:
df.head(20)

Unnamed: 0,timestamp,consent,name,birthplace,birth_year,gender,undergrad_deg,undergrad_loc,undergrad_inst,undergrad_research,ra_qual,ra_lm_loc,ra_lm_inst,ra_lm_research,masters_qual,masters_loc,masters_inst,masters_research,phd_qual,phd_loc,phd_inst,phd_research,post_doc_qual,postdoc_loc,postdoc_inst,postdoc_research,faculty_qual,faculty_loc,faculty_inst,faculty_research,google_scholar,HometownLatitude,HometownLongitude,UndergradLatitude,UndergradLongitude,RA_LM_Latitude,RA_LM_Longitude,PHD_Latitude,PHD_Longitude
0,2019/08/06 12:34:12 pm GMT-6,Yes,Antonija Kolobaric,"Split, Croatia",1994.0,Female,Experimental Psychology,"Providence, RI, USA",Brown University,"Theory of mind, suicide, development, adolesce...",Yes,"New Haven, CT, USA",Yale University,"early course psychosis, thalamus, mental illne...",No,,,,Yes,"Pittsburgh, PA, USA",University of Pittsburgh,"development, psychosis, fMRI, thalamus",No,,,,No,,,,,16.441423,16.441423,-71.412834,-71.412834,-72.925052,-72.925052,-79.990086,-79.990086
1,2019/08/06 12:37:29 pm GMT-6,Yes,Giacomo,"Verona, Veneto, Italy",1988.0,Male,Cognitive Psychology,"Rovereto, Trentino, Italy",Unversity of Trento,"healthy, behavioral tracking with IR markers",No,,,,Yes,"Rovereto, Trentino, Italy",University of Trento,"healthy, eyetracking on pointlight displays",Yes,"Rovereto, Trentino, Italy",University of Trento,"healthy, fMRI, movement decoding",Yes,"London, Ontario, Canada",Western University,"healthy, fMRI, skill learning, motor represent...",No,,,,https://scholar.google.com/citations?user=IYbg...,10.992412,10.992412,11.045237,11.045237,,,11.045237,11.045237
2,2019/08/06 12:38:08 pm GMT-6,Yes,Hannah Coyle,"Northampton, United Kingdom",1991.0,Female,Psychology,"Wollongong, NSW, Australia","University of Wollongong, Australia","cannabis use, EEG, working memory",Yes,"Melbourne, Victoria, Australia",Monash University,"depression, TMS, DLPFC",No,,,,Yes,"Melbourne, Victoria, Australia",Monash University,"mild traumatic brain injury, TMS-EEG, neuropsy...",No,,,,No,,,,https://scholar.google.com/citations?view_op=l...,-0.902493,-0.902493,150.89385,150.89385,144.963161,144.963161,144.963161,144.963161
3,2019/08/06 12:39:18 pm GMT-6,Yes,JKL,Hong Kong,1986.0,Male,Psychology,Hong Kong,University of Hong Kong,"sexual health, clinical, survey",Yes,"Birmingham, UK",University of Birmingham,"stroke, cognitive assessment battery",Yes,"Birmingham, UK",University of Birmingham,"developmental disabilities, eating, cognitive ...",Yes,"Birmingham, UK",University of Birmingham,"temporal, parietal areas; lesion analysis, fmr...",Yes,"Reading, UK",University of Reading,"reward network, striatum, medial frontal areas...",No,,,,,114.162813,114.162813,114.162813,114.162813,-1.894053,-1.894053,-1.894053,-1.894053
4,2019/08/06 12:51:26 pm GMT-6,Yes,Liz B,"Chicago, IL, USA",1992.0,Female,Communications & Psychology,"Ann Arbor, MI, USA",University of Michigan,"Communication neuroscience, social influence, ...",Yes,"Philadelphia, PA, USA",University of Pennsylvania,"communication neuroscience, smokers, fMRI, tD...",No,,,,Yes,"Philadelphia, PA, USA",Temple University,"adolescent decision-making, fMRI, reward proce...",No,,,,No,,,,,-87.624421,-87.624421,-83.731229,-83.731229,-75.163526,-75.163526,-75.163526,-75.163526
5,2019/08/06 12:53:20 pm GMT-6,Yes,Kendra Seaman,"Wichita, KS, United States",1979.0,Female,Biology and Psychology,"Lawrence, KS, United States",University of Kansas,,No,,,,Yes,"Washington, DC, United States",The Catholic University of America,Mental models in science education,Yes,"Washington, DC",The Catholic University of America,aging and experiential decision making,Yes,"New Haven, CT, United States AND Durham, NC, U...",Yale University and Duke University,Neural bases of Aging and Decision making,Yes,"Dallas, TX, United States",University of Texas at Dallas,"Aging, Decision making, Learning",,-97.337545,-97.337545,-95.23595,-95.23595,,,-77.036563,-77.036563
6,2019/08/06 12:53:47 pm GMT-6,Yes,Martina,"Buenos Aires, Argentina",2019.0,Female,Psychology,"Buenos Aires, Argentina",Favaloro University,"psychology, cognitive-neuroscience, eeg",Yes,"Buenos Aires, Argentina",INECO and University of San Martin,"fmri, complex-systems, eeg, language-processing",Yes,"Buenos Aires, Argentina",Favaloro University,"eeg, language-processing, bilingualism",Yes,"Frankfurt, Germany",Max-Planck-Institute AE,"MEG, fMRI, predictive-processing, schema proce...",No,,,,No,,,,https://scholar.google.com/citations?user=X_n9...,-58.437076,-58.437076,-58.437076,-58.437076,-58.437076,-58.437076,8.682092,8.682092
7,2019/08/06 12:54:19 pm GMT-6,Yes,Sofya,Perm/Russia,1988.0,Female,Applied Maths and Physics,Moscow/Russia,Moscow Institite of Physics and Technology,"brain-computer interface, EEG, Matlab, motor i...",Yes,"Nagatinskaya, Moscow, Russia",research assistant,"brain-computer interface , EEG, Matlab, motor ...",Yes,Neuroscience,University of Strasbourg,"schizophrenia, animal model, thalamo-cortical ...",Yes,Paris/France,Paris Descartes,"neuroimaging, brain development, myelin, pytho...",Yes,"Perm, Russia",Higher School of Economics,"neuroimaging, stroke, python, classifications",Yes,"Perm, Russia",Higher School of Economics,"diffusion MRI, MRI relaxometry, TMS, EEG, stro...",,56.246723,56.246723,37.617494,37.617494,37.622377,37.622377,2.351499,2.351499
8,2019/08/06 12:54:41 pm GMT-6,Yes,Aarti Nair,"Mumbai, India",1984.0,Female,BA in Psychology and Anthropology,"Mumbai, India",St. Xaviers College,Globalization and Gender Bias,Yes,"Philadelphia, USA",Childrens hospital of Philadelphia (CHOP),"Autism, DTI, imaging, neuropsychology",Yes,"Charlotte, North Carolina",UNC Charlotte,"EEG, PTSD, memory suppression",Yes,"San Diego, CA, USA",UCSD,"Autism, multimodal imaging, thalamus, neuropsy...",Yes,"Los Angeles, CA, USA",UCLA,"Autism, psychosis, multimodal imaging, social ...",No,,,,https://scholar.google.com/citations?user=AAqZ...,72.835335,72.835335,72.835335,72.835335,-75.163526,-75.163526,-117.162771,-117.162771
9,2019/08/06 12:55:57 pm GMT-6,Yes,Mai Nguyen,"Cincinnati, Ohio",1990.0,Female,"BA Psychology, BS Biology","Palo Alto, CA, USA",Stanford,"vision, retinotopy, eccentricity bias, face pe...",Yes,"New York City, New York, USA",New York University,"vision, retinotopy, broadband gamma, human fMRI",Yes,"Cambridge, UK",University of Cambridge,"vision, autism, children, behavioral",Yes,"Princeton, NJ, USA",Princeton University,"communication, naturalistic, human fMRI, neur...",No,,,,No,,,,,-84.51246,-84.51246,-122.159847,-122.159847,-74.006015,-74.006015,-74.659296,-74.659296
