In [None]:
import pandas as pd
import numpy as np
import altair as alt

In [None]:
from geopy.distance import distance as geopy_distance
memo_distances = {}
def distance(c1, c2):
    """
    Computes distance from two latlong coordinates.
    """
    global memo_distances
    if (c1, c2) in memo_distances:
        return memo_distances[(c1, c2)]
    else:
        ret = geopy_distance(c1, c2).km
        memo_distances[(c1, c2)] = ret
        memo_distances[(c2, c1)] = ret
        return ret

def get_cost(a_lat, a_long, b_lat, b_long):
    """
    Computes carbon cost of a flight from a to b in kg.
    """
    dist = distance((a_lat, a_long), (b_lat, b_long))
    if dist < 785:
        emission_factor = 0.14735
    elif dist < 3700:
        emission_factor = 0.08728
    else:
        emission_factor = 0.077610
    radiative_factor_index = 1.891
    cost = dist * emission_factor * radiative_factor_index * 2
    return cost


In [None]:
get_cost(39.95233, -75.16379, -33.86785, 151.20732)

In [None]:
ps = pd.read_csv("../output/test_real/raw_participants.csv", na_filter=False)
cs = pd.read_csv("../output/test_real/raw_confs.csv", na_filter=False)

cs = cs.set_index(["conference", "year"])
ps = ps.set_index(["participant", "conference", "year"])

continents_cat = pd.CategoricalDtype(categories=["NA", "EU", "OC", "AS", "AF", "SA"])
cs["continent"] = cs["continent"].astype(continents_cat)
ps["continent"] = ps["continent"].astype(continents_cat)

In [None]:
all = ps.join(cs, on=["conference", "year"], lsuffix="_part", rsuffix="_conf")

all["cost"] = np.vectorize(get_cost)(all["lat_part"],all["long_part"],all["lat_conf"],all["long_conf"])

## Cost per conference

In [None]:
per_conf = all.reset_index(level="participant")

cost_per_conference = per_conf.groupby(by=["conference", "year"]).cost.agg(["mean", "count"])

In [None]:
all

## Locality of participation

In [None]:
locality = all.loc[:, ["continent_part", "continent_conf"]].reset_index("participant")

In [None]:
locality_norm = locality.groupby(by=["conference", "year"]).continent_part.value_counts(normalize=True)
locality_norm = locality_norm.unstack(fill_value=0)
locality_norm["continent"] = cs["continent"]
locality_norm["local"] = locality_norm.apply(lambda r: r[r["continent"]], axis=1)
locality_norm

### Graph of the origin of participants for each conference

In [None]:
loc_per_conf = all[["continent_part"]].groupby(by=["conference", "year"]).continent_part.value_counts()
loc_per_conf = loc_per_conf.rename("participants")
loc_per_conf

In [None]:
def plot_conf(data, conf):
  dfConf = data.loc[conf].reset_index()
  c = alt.Chart(dfConf, title=conf)
  c.mark_bar().encode(
      x="year",
      y="sum(participants)",
     color="continent_part"
  ).display()

In [None]:
loc_per_conf.index.to_frame()["conference"]

In [None]:
for conf in loc_per_conf.index.droplevel([1,2]).unique():
    plot_conf(loc_per_conf, conf)