# Visualize Fac and Pub in a World Map 

## Fecth Location Info

The following cell is not run on local machine, because of connection error related to proxy issues. Therefore, the code segment is run on google colab and github codespace instead.

In [None]:
import pandas as pd
import glob
import re

def get_region_and_field(file_name):
    # Get region
    region = re.search('Institutions_(.*?)_', file_name).group(1)
    # Get field
    field = re.search('_(.*?)\.csv', file_name).group(1).split('_')[-1]
    return region, field

all_files = glob.glob('detailed\Institutions_*_*.csv')
df_list = []
for file in all_files:
    region, field = get_region_and_field(file)
    df = pd.read_csv(file)
    df['field'] = field
    df_list.append(df)

In [None]:
from geopy.geocoders import Nominatim

for df in df_list:
    field = df['field'][0]
    geolocator = Nominatim(user_agent="my_geocoder")
    df['location'] = df['University'].apply(lambda x: geolocator.geocode(x) if geolocator.geocode(x) else None)

    # Extract latitude and longitude from the location object
    df['lat'] = df['location'].apply(lambda x: x.latitude if x else None)
    df['lon'] = df['location'].apply(lambda x: x.longitude if x else None)
    
    df.to_csv('WithLocation/location_{}.csv'.format(field), index = False)
    # Filter out rows with missing coordinates
    df = df.dropna(subset=['lat', 'lon'])

``` python
# output of the cell on colab
print(df_list[0])

"""
    Rank                             University  Faculty Count  \
0      1                Northeastern University              8   
1      2        Univ. of California - San Diego              9   
2      3        Georgia Institute of Technology             17   
3      4             Carnegie Mellon University             12   
4      5    University of Massachusetts Amherst              8   
5      6                               TU Delft              7   
6      7          Pennsylvania State University             10   
7      8  Massachusetts Institute of Technology              8   
8      9                 University of Michigan              9   
9     10                    Columbia University              9   
10    11                  Ohio State University              6   
11    12  Univ. of Illinois at Urbana-Champaign             10   
12    13     California Institute of Technology              2   
13    13                     Max Planck Society              4   
14    15                 Stony Brook University              8   
15    15        Univ. of California - Riverside              8   
16    17      University of Southern California              4   
17    17      University of Wisconsin - Madison              8   
18    19                  University of Chicago              4   
19    20               IMDEA Networks Institute              5   
20    20                    Tsinghua University             17   
21    22  University of Maryland - College Park              6   
22    23               University of Washington              6   
23    24                             ETH Zurich              6   
24    25                                   EPFL              7   
25    26                   Princeton University              8   
26    27                Northwestern University              4   
27    27                  University of Waikato              3   
28    29                                  KAIST              6   
29    30                      Purdue University              8   

    Publication Count field  \
0                15.0  metr   
1                13.3  metr   
2                12.0  metr   
3                 9.8  metr   
4                 9.0  metr   
5                 8.5  metr   
6                 8.3  metr   
7                 7.3  metr   
8                 7.2  metr   
9                 6.9  metr   
10                6.5  metr   
11                6.3  metr   
12                6.2  metr   
13                6.2  metr   
14                6.1  metr   
15                6.1  metr   
16                6.0  metr   
17                6.0  metr   
18                5.9  metr   
19                5.7  metr   
20                5.7  metr   
21                5.6  metr   
22                5.1  metr   
23                4.9  metr   
24                4.8  metr   
25                4.6  metr   
26                4.5  metr   
27                4.5  metr   
28                4.4  metr   
29                4.3  metr   

                                             location        lat         lon  
0   (Northeastern University, 360, Huntington Aven...  42.339070  -71.087838  
1   (University of California, San Diego, 9500, Gi...  32.879244 -117.231125  
2   (Georgia Tech, Ferst Drive Northwest, Atlanta,...  33.776095  -84.398808  
3   (Carnegie Mellon University, Zebina Way, Belle...  40.444190  -79.942719  
4   (University of Massachusetts Amherst, North Ha...  42.386938  -72.529915  
5   (Technische Universiteit Delft, Rijksstraatweg...  51.998827    4.373960  
6   (Touro College, 320, West 31st Street, Manhatt...  40.750536  -73.995675  
7   (Massachusetts Institute of Technology, Bishop...  42.358253  -71.096627  
8   (University of Michigan, 500, South State Stre...  42.294214  -83.710039  
9   (Columbia University, West 120th Street, Manha...  40.807756  -73.961649  
10  (The Ohio State University at Mansfield, 1760,...  40.798811  -82.578229  
11  (Atkins Golf Club at the University of Illinoi...  40.090629  -88.176418  
12  (California Institute of Technology, San Pasqu...  34.137014 -118.125288  
13  (SKA-MPI (Max Planck Society Precursor), Karee... -30.717988   21.413052  
14  (Stony Brook University, Town of Brookhaven, S...  40.909883  -73.121550  
15  (University of California, Riverside, 900, Uni...  33.964257 -117.339786  
16  (University of Southern California, Flower Str...  34.021883 -118.285867  
17  (University of Wisconsin-Madison, South Allen ...  43.080274  -89.430959  
18  (The University of Chicago, 5801, South Ellis ...  41.791397  -87.600844  
19                                               None        NaN         NaN  
20  (清华大学, 30, 双清路, 五道口, 清华园街道, 东升镇, 海淀区, 北京市, 100...  40.002290  116.320963  
21  (University of Maryland, College Park, Baltimo...  38.990412  -76.943859  
22  (University of Washington, Yesler Swamp Trail,...  47.655623 -122.302722  
23  (ETH Zürich, 130, Binzmühlestrasse, Oerlikon, ...  47.413218    8.537491  
24  (École Polytechnique Fédérale de Lausanne, Rou...  46.518659    6.566562  
25  (Princeton University, Alexander Road, Princet...  40.338675  -74.658365  
26  (Northwestern University, 633, Clark Street, D...  42.055716  -87.675295  
27  (University of Waikato, Tralee Place, Ruakura,... -37.785874  175.316766  
28  (카이스트안경, 명동8나길, 충무로1가, 명동, 중구, 서울특별시, 04536, 대...  37.561230  126.983240  
29  (Purdue University, 610, West Lafayette, Tippe...  40.430028  -86.926421  

"""
```

## Visualize the data with Dash and Plotly


### Generate data dict

This portion is run on colab. For local running, refer to the following section.

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import numpy as np
import glob
from utils import get_region_and_field

In [17]:
# comment if run on colab
all_files = glob.glob('../WithLocation/location_*.csv')
df_list = []
for file in all_files:
    df = pd.read_csv(file)
    df_list.append(df)

In [18]:
all_list = []

for index in range(len(df_list)):

    uni_list =  df_list[index]['University'].to_list()
    field_list = df_list[index]['field'].to_list()
    lat_list = df_list[index]['lat'].to_list()
    lat_list = [0 if np.isnan(x) else x for x in lat_list]
    lon_list = df_list[index]['lon'].to_list()
    lon_list = [0 if np.isnan(x) else x for x in lon_list]
    fac_list = df_list[index]['Faculty Count'].to_list()
    fac_list = [0 if np.isnan(x) else x for x in fac_list]
    pub_list = df_list[index]['Publication Count'].to_list()
    pub_list = [0 if np.isnan(x) else x for x in pub_list]

    # Sample data with latitude, longitude, faculty count, and publication count
    data = {
        'University': uni_list,
        'Latitude': lat_list,
        'Longitude': lon_list,
        'FacultyCount': fac_list,
        'PublicationCount': pub_list,
        'Field': field_list
    }

    df = pd.DataFrame(data)
    all_list.append(df)

In [19]:
print(len(df_list))

12


### Call Dash

In [20]:
# Dash app initialization
app = dash.Dash(__name__)

# Dash app layout
app.layout = html.Div([
    html.H1("World Map with Heat Maps for Faculty Count and Publication Count"),

    dcc.Dropdown(
        id = 'field-dropdown',
        options = [
            {'label': 'All', 'value': 3},
            {'label': 'Artificial Intelligence', 'value': 9},
            {'label': 'Networks', 'value': 11},
            {'label': 'Architecure', 'value': 2},
            {'label': 'Database', 'value': 1},
            {'label': 'HPC', 'value': 5},
            {'label': 'Metrics', 'value': 0},
            {'label': 'Operating System', 'value': 6},
            {'label': 'Other', 'value': 7},
            {'label': 'Programming Language', 'value': 4},
            {'label': 'System', 'value': 8},
            {'label': 'Theory', 'value': 10},
        ],
        value = 3,
        style = {'width': '85%'}
    ),
    dcc.Graph(id='world-map'),
])

# Dash app callback to update the map based on the selected field
@app.callback(
    Output('world-map', 'figure'),
    [Input('field-dropdown', 'value')]
)
def update_map(selected_field):

    selected_df = all_list[selected_field]
    field = selected_df['Field'][0]
    selected_df.to_csv("tmp.csv")
    fig = px.scatter_geo(selected_df, 
                         lat='Latitude', 
                         lon='Longitude', 
                         text='University', 
                         size='FacultyCount', 
                         color='PublicationCount',
                         title = 'World Map in {}'.format(field),
                         labels={'FacultyCount': 'Faculty Count', 'PublicationCount': 'Publication Count'},
                         hover_data=['FacultyCount', 'PublicationCount'],
                         color_continuous_scale='Viridis',  # Adjust the color scale as needed
                        #  projection="natural earth",
                         custom_data=['University', 'FacultyCount', 'PublicationCount']  # Specify the data to include in custom_data
    )
    
    # Update the hovertemplate to exclude lat and lon
    fig.update_traces(
        hovertemplate='<br>'.join([
            'University: %{customdata[0]}',
            'Faculty Count: %{customdata[1]}',
            'Publication Count: %{customdata[2]}'
        ])
    )
    
    fig.write_html("../images/region_analysis/world_map_{}.html".format(field))


    return fig

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True, port = 8051)