## WPI International Students Exhibit 2022: Interactive Maps

WPI Archives and Special Collections presents the digital map companion to WPI International Students Exhibit 2022. 

This Jupyter Notebook shares data manipulation work using biographical information from WPI's archives to present mapping visualizations. Exhibit maps include:

1. [Map 1](#001): Hometown of WPI International Students (1876-1943)
2. [Map 2](#002): Number of WPI International Students from Each Country (1876-1943)
3. [Map 3](#003): Number of WPI International Students from Each Country by Year (1876-1943)

*For more information, please contact Emily Ping O'Brien at epobrien@wpi.edu or archives@wpi.edu.*

In [1]:
#import pandas for data analysis and manipulation
#import python mapping libraries: plotly, plotly express, folium

import plotly
import plotly.express as px
import pandas as pd
from plotly.offline import init_notebook_mode
import folium

init_notebook_mode(connected = True)

In [2]:
#read file containing information on WPI International students from 1876-1943
#isdf = pd.read_excel('../data/wpiintlstudentsmap2.xlsx')
isdf = pd.read_excel('wpiintlstudentsmap2.xlsx')

#convert country latitude & longitude, name values as strings
isdf['clat'] = isdf['clat'].astype(str)
isdf['clong'] = isdf['clong'].astype(str)
isdf['country1'] = isdf['country1'].astype(str)

#rename class column to classof because class is already a term python uses
isdf = isdf.rename(columns={'class':'classof'})

print(isdf.values[69])
print(isdf)

['Huang, Chen S.' 'China' 'China' 'nan' 'nan' 'China' 35.8617 104.1954
 1910.0]
                      sname                      home1     country1  \
0    Abadjieff, Ivan Varban     T. Pazordjik, Bulgaria     Bulgaria   
1         Aguirre, Frank M.           Cienfuegos, Cuba         Cuba   
2        Ajemian, Shahin A.                    Armenia      Armenia   
3         Alfonso, Fernando                   Colombia     Colombia   
4            Allen, Kenneth      Yarmouth, Nova Scotia       Canada   
..                      ...                        ...          ...   
183      Yrigoyen, Juan I.   Guatemala City, Guatemala    Guatemala   
184  Yuan, Harold Hsiang-Ho   Tang Shan, Chihli, China        China   
185       Yuan, Tsong-Kyien            Hangchow, China        China   
186              Yuan, Tung              Peking, China        China   
187     Zaragoza, Jose Luis        Manila, Philippines  Philippines   

          clat        clong           home2       hlat       hlong 

<a id="001"></a>

In [3]:
#create map to display all International students home city/town or country (if no city/town was provided)
#color based on year graduated

fig = px.scatter_geo(
    isdf,
    lat=isdf['hlat'],
    lon=isdf['hlong'],
    color=isdf['classof'],
    hover_name=isdf['sname'],
    hover_data=[isdf['home1'], isdf['classof']],
    labels={'classof':'Class of','hlat':'Latitude','hlong':'Longitude','home1':'Country'},
    #projection='conic conformal',
    projection='winkel tripel',
    title='Map 1. Hometown of WPI International Students (1876-1943)'
)

fig.show()

In [4]:
#concatenate country, latitude, and longitude as column to prepare to pivot dataframe

#convert year to string, all values for concatenate must be strings
isdf['classof'] = isdf['classof'].astype(str)

#remove decimal and zero from year by taking only first 4 characters
isdf['classof'] = isdf['classof'].str[0:4]

#for map showing all international students from each country, concatenate country name, latitude, and longitude
isdf['cll'] = isdf['country1'] + '|' + isdf['clat'] + '|' + isdf['clong']

#for map of all international students from each country by year, concatenate country name, latitude, longitude, and year
isdf['cllclass'] = isdf['country1'] + '|' + isdf['clat'] + '|' + isdf['clong'] + '|' + isdf['classof']

In [5]:
#pivot isdf dataframe based on concatenated country+lat+long using student name as index and year as value
pivot = isdf.pivot(index='sname', columns='cll', values='classof')

#count the number of times the country is listed
countc = pivot.count()

#convert countc series to dataframe
countdf = countc.to_frame()

#make cll index to column
countdf.reset_index(inplace=True)

#rename column
countdf = countdf.rename(columns={0:'country_count'})

In [6]:
#split cll to 3 columns for country name, latitude, and longitude using | as delimter
countdf[['c2','latc','lonc']] = countdf.cll.str.split("|",expand=True)

<a id="002"></a>

In [7]:
#create bubble map to show different sizes of circles depending on number of students from the country from before WWII
figc = px.scatter_geo(
    countdf,
    lat=countdf['latc'],
    lon=countdf['lonc'],
    color=countdf['c2'],
    size=countdf['country_count'],
    labels={'c2':'Country','country_count':'Number from Country', 'latc':'Latitude','lonc':'Longitude'},
    hover_name= countdf['c2'],
    hover_data= [countdf['country_count']],
    #projection = 'conic conformal',
    title='Map 2. Number of WPI International Students from Each Country (1876-1943)',
    projection = 'winkel tripel'
)

figc.show()

In [8]:
#pivot isdf dataframe based on concatenated country+lat+long+class of year using student name as index and year as value
pivot2 = isdf.pivot(index='sname', columns='cllclass', values='classof')

#count the number of times the country is listed
countclass = pivot2.count()

#convert series to dataframe
classdf = countclass.to_frame()

#make cll index to column
classdf.reset_index(inplace=True)

In [9]:
#rename column
classdf = classdf.rename(columns={0:'class_count'})

#split cllclass to 4 columns for country name, latitude, longitude, and class of year using | as delimter
classdf[['cc','cclat','cclon','ccyear']] = classdf.cllclass.str.split("|",expand=True)

In [10]:
#remove records where year is blank
classdf = classdf[classdf['ccyear'] != 'nan']
#change year to datetime so that it can be sorted in ascending order, this will default to YYYY-MM-DD, 
classdf['ccyear2'] = pd.to_datetime(classdf['ccyear'])
#change ccyear to just the YEAR without month and day
classdf['ccyear3'] = classdf['ccyear2'].dt.year
#sort records by year
classdf.sort_values(by=['ccyear3'], inplace = True)

<a id="003"></a>

In [11]:
#animated map for student countries by each 'Class of' year
figclass = px.scatter_geo(
    classdf,
    lat=classdf['cclat'],
    lon=classdf['cclon'],
    color=classdf['ccyear'],
    size=classdf['class_count'],
    animation_frame = classdf['ccyear'],
    labels={'class_count':'Number from Country','ccyear':'Class of', 'cclat':'Latitude','cclon':'Longitude'},
    hover_name= classdf['cc'],
    hover_data= [classdf['class_count'], classdf['ccyear']],
    title='Map 3. Number of WPI International Students from Each Country by Year (1876-1943)',
    projection = 'natural earth',
    #projection = 'winkel tripel',
)

#Slow down animation speed in frame and transition
figclass.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1500
figclass.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 1500

figclass.show()