In [None]:
import pandas as pd 
import os
import IPython

import json
with open("to_from_data.json", "r") as read_file:
    rankings = json.load(read_file)
data = pd.DataFrame(rankings)

In [None]:
import matplotlib.pyplot as plt

%matplotlib inline

import qeds
qeds.themes.mpl_style();

import cartopy.crs as ccrs #cartopy documentation: https://scitools.org.uk/cartopy/docs/latest/
import cartopy.feature as cfeature

#import geopandas as gpd
#from shapely.geometry import Point

In [None]:
inst_data = pd.read_json("/home/jupyter/notebooks/econjobmarket/to_from_data.json")

In [None]:
inst_data #same as data ^

In [None]:
inst_data = inst_data.dropna(subset = ["to_latitude", "to_longitude", "latitude", "longitude"]) #drop observations with missing geocoordinates

In [None]:
inst_data.name.unique().shape #note 33 unique categories, names

In [None]:
inst_data.description.unique().shape #note 10 unique recruiter_types, descriptions

In [None]:
inst_data["rank"].unique()#.shape

In [None]:
inst_data["from_coordinates"] = list(zip(inst_data.longitude, inst_data.latitude))
inst_data["to_coordinates"] = list(zip(inst_data.to_longitude, inst_data.to_latitude))

In [None]:
inst_data.category_id.unique() #no category 28?
#inst_data[inst_data.category_id == 28]

In [None]:
data_subsets = {}
for i in inst_data.category_id.unique():
    inst_data_subset_iteration = inst_data[inst_data.category_id == i]
    data_subsets[i] = inst_data_subset_iteration

## Maps by Applicant Primary Field (Names) - using scatter-plot arrow proxy

In [None]:
#note that points converging at Gulf of Guinea are at intersection of Equator and Prime Meridian

#NaN probably coded as (0, 0) - dropped na above

#FIXME some institutions such as IMF, CBO, BEA coded as (0, 0) for example uncomment and run the next line
#inst_data.loc[inst_data["to_latitude"] == 0]

In [None]:
for j in inst_data.category_id.unique():
    fig = plt.figure(figsize=(25, 20))
    ax = fig.add_subplot(projection = ccrs.PlateCarree()) #Projection list: https://scitools.org.uk/cartopy/docs/latest/crs/projections.html

    ax.coastlines()
    ax.add_feature(cfeature.OCEAN)
    ax.add_feature(cfeature.BORDERS)
    ax.add_feature(cfeature.LAND)
    ax.set_title("category_id_" + str(j) + ": " + data_subsets[j].name.unique()[0])
    
    for i in data_subsets[j].index:
        ax.scatter(inst_data.longitude[i], inst_data.latitude[i], transform = ccrs.Geodetic(), color = "blue", marker = "o") #use scatter point method from maps emailed
        ax.plot([inst_data.longitude[i], inst_data.to_longitude[i]], [inst_data.latitude[i], inst_data.to_latitude[i]], transform=ccrs.Geodetic())
        ax.scatter(inst_data.to_longitude[i], inst_data.to_latitude[i], transform = ccrs.Geodetic(), color = "red", marker = "^") #use scatter point method from maps emailed, ideally cartopy adaptation of basemap https://stackoverflow.com/questions/45512429/python-basemap-drawgreatcircle-with-arrow-end-cap?rq=1


## Maps by Applicant Primary Field (Names) - using annotation arrow proxy

In [None]:
#with arrows but not great circle plot

#FIXME need to work on colour using colormaps

for j in inst_data.category_id.unique():
    fig = plt.figure(figsize=(25, 20))
    ax = fig.add_subplot(projection = ccrs.Mercator()) #Projection list: https://scitools.org.uk/cartopy/docs/latest/crs/projections.html

    ax.coastlines()
    ax.add_feature(cfeature.OCEAN)
    ax.add_feature(cfeature.BORDERS)
    ax.add_feature(cfeature.LAND)
    ax.set_title("category_id_" + str(j) + ": " + data_subsets[j].name.unique()[0])
    transform = ccrs.PlateCarree()._as_mpl_transform(ax)
    for i in data_subsets[j].index:
        ax.annotate(" ", xy = (inst_data.to_longitude[i], inst_data.to_latitude[i]), xytext = (inst_data.longitude[i], inst_data.latitude[i]), arrowprops = dict(arrowstyle = "->", connectionstyle="arc3, rad = -0.8", color = "red"), xycoords = transform, ha = 'right', va = 'top', annotation_clip = False)


## Maps by Recruiter Type and Applicant Primary Field 

In [None]:
data_subsets[1][data_subsets[1].recruiter_type == 1]

In [None]:
for j in inst_data.category_id.unique():
    for k in data_subsets[j].recruiter_type.unique():
        fig = plt.figure(figsize=(25, 20))
        ax = fig.add_subplot(projection = ccrs.PlateCarree()) #Projection list: https://scitools.org.uk/cartopy/docs/latest/crs/projections.html
    
        ax.coastlines()
        ax.add_feature(cfeature.OCEAN)
        ax.add_feature(cfeature.BORDERS)
        ax.add_feature(cfeature.LAND)
        ax.set_title("category_id_" + str(j) + ": " + data_subsets[j].name.unique()[0] + "; recruiter_type_" + str(k) + ": " + data_subsets[j][data_subsets[j].recruiter_type == k].description.unique()[0])
    
        for i in data_subsets[j][data_subsets[j].recruiter_type == k].index:
            ax.scatter(inst_data.longitude[i], inst_data.latitude[i], transform = ccrs.Geodetic(), color = "blue", marker = "o") #use scatter point method from maps emailed
            ax.plot([inst_data.longitude[i], inst_data.to_longitude[i]], [inst_data.latitude[i], inst_data.to_latitude[i]], transform=ccrs.Geodetic())
            ax.scatter(inst_data.to_longitude[i], inst_data.to_latitude[i], transform = ccrs.Geodetic(), color = "red", marker = "^") #use scatter point method from maps emailed, ideally cartopy adaptation of basemap https://stackoverflow.com/questions/45512429/python-basemap-drawgreatcircle-with-arrow-end-cap?rq=1
     

##  Interactive Map - no arrows

In [None]:
import plotly.graph_objects as go

In [None]:
from_location_data = pd.DataFrame(inst_data.loc[:, ["latitude", "longitude", "from_institution_name", "rank"]])
to_location_data = pd.DataFrame(inst_data.loc[:, ["to_latitude", "to_longitude", "to_name", "to_rank"]])
location_data_1 = from_location_data.rename(columns = {"from_institution_name": "institution_name"})
location_data_2 = to_location_data.rename(columns = {"to_latitude": "latitude", "to_longitude": "longitude", "to_name": "institution_name", "to_rank": "rank"})

location_data = pd.concat([location_data_1, location_data_2], ignore_index = True)
location_data = location_data.loc[:, ["latitude", "longitude", "institution_name", "rank"]]
location_data = location_data.drop_duplicates(ignore_index = True)

In [None]:
location_data

In [None]:
fig = go.Figure()

In [None]:
fig.add_trace(go.Scattergeo(lon = location_data["longitude"], lat = location_data["latitude"], hoverinfo = "text", text = location_data.loc[:, ["institution_name", "rank"]], mode = "markers", marker = dict(size = 2, color = "rgb(255, 0, 0)", line = dict(width = 3, color = "rgba(68, 68, 68, 0)"))))

In [None]:
for i in inst_data.index:
    fig.add_trace(go.Scattergeo(lon = [inst_data["longitude"][i], inst_data["to_longitude"][i]], lat = [inst_data["latitude"][i], inst_data["to_latitude"][i]], mode = "lines", line = dict(width = 1, color = "red")))
    
#opacity = float(df["count"][i]) / float(df["count"].max()) 

In [None]:
fig.update_layout(
    title_text = "Interactive Map", showlegend = False, geo = dict(projection_type = "equirectangular", showland = True, landcolor = "rgb(243, 243, 243)", countrycolor = "rgb(204, 204, 204)",))

#to add markers https://plotly.com/python/marker-style/

## Ranking Scatterplot

In [None]:
import seaborn as sns

In [None]:
data_clean_rank = data.dropna(subset = ["rank", "to_rank"])

In [None]:
fig, ax = plt.subplots(figsize = (10, 10))
rank_scat = sns.regplot(x = "rank", y = "to_rank", data = data_clean_rank, ax = ax)
rank_scat.get_lines()[0].set_color("red")