In [None]:
import pandas as pd
import numpy as np
import altair as alt

In [None]:
from geopy.distance import distance as geopy_distance
memo_distances = {}
def distance(c1, c2):
    """
    Computes distance from two latlong coordinates.
    """
    global memo_distances
    if (c1, c2) in memo_distances:
        return memo_distances[(c1, c2)]
    else:
        ret = geopy_distance(c1, c2).km
        memo_distances[(c1, c2)] = ret
        memo_distances[(c2, c1)] = ret
        return ret

def get_cost(a_lat, a_long, b_lat, b_long):
    """
    Computes carbon cost of a flight from a to b in kg.
    """
    dist = distance((a_lat, a_long), (b_lat, b_long))
    if dist < 785:
        emission_factor = 0.14735
    elif dist < 3700:
        emission_factor = 0.08728
    else:
        emission_factor = 0.077610
    radiative_factor_index = 1.891
    cost = dist * emission_factor * radiative_factor_index * 2
    return cost


In [None]:
get_cost(39.95233, -75.16379, -33.86785, 151.20732)

In [None]:
ps = pd.read_csv("../output/test/raw_participants.csv", na_filter=False)
cs = pd.read_csv("../output/test/raw_confs.csv", na_filter=False)

cs = cs.set_index(["conference", "year"])
ps = ps.set_index(["participant", "conference", "year"])

continents_cat = pd.CategoricalDtype(categories=["NA", "EU", "OC", "AS", "AF", "SA"])
cs["continent"] = cs["continent"].astype(continents_cat)
ps["continent"] = ps["continent"].astype(continents_cat)

In [None]:
all = ps.join(cs, on=["conference", "year"], lsuffix="_part", rsuffix="_conf")
all = all.drop_duplicates().sort_index()
all["cost"] = np.vectorize(get_cost)(all["lat_part"],all["long_part"],all["lat_conf"],all["long_conf"])

## Cost per conference

In [None]:
per_conf = all.reset_index(level="participant")

cost_per_conference = per_conf.groupby(by=["conference", "year"]).cost.agg(["mean", "count"])

## Locality of participation

In [None]:
locality = all.loc[:, ["continent_part", "continent_conf"]].reset_index("participant")

In [None]:
locality_norm = locality.groupby(by=["conference", "year"]).continent_part.value_counts(normalize=True)
locality_norm = locality_norm.unstack(fill_value=0)
locality_norm["continent"] = cs["continent"]
locality_norm["local"] = locality_norm.apply(lambda r: r[r["continent"]], axis=1)

### Graph of the origin of participants for each conference

In [None]:
loc_per_conf = all[["continent_part"]].groupby(by=["conference", "year"]).continent_part.value_counts()
loc_per_conf = loc_per_conf.rename("participants")

In [None]:
def plot_conf(data, conf):
  dfConf = data.loc[conf].reset_index()
  c = alt.Chart(dfConf, title=conf)
  c.mark_bar().encode(
      x="year",
      y="sum(participants)",
     color="continent_part"
  ).display()

In [None]:
#for conf in loc_per_conf.index.droplevel([1,2]).unique():
    #plot_conf(loc_per_conf, conf)

## Cross-participation

In [None]:
x = all.reset_index("participant")[["participant"]]
x.index = x.index.to_flat_index()
x = x.reset_index().set_index(["index", "participant"])
x["count"] = 1
x = x.unstack(0, fill_value=0)
x.columns = x.columns.droplevel(0)


import itertools
yv = {}
for i,j in itertools.product(x.columns.values, x.columns.values):
    yv[(i,j)] = np.maximum(x[i], x[j]).sum()
y = pd.DataFrame(data=yv.values(), index=yv.keys(), columns=["count"])
y.index = y.index.set_names(["left", "right"])


In [None]:
cross = all.reset_index(level="participant")
cross = cross[["participant"]]
cross.index = cross.index.to_flat_index()

cross = cross.reset_index().set_index("participant")

cross = cross.join(cross, lsuffix="_l", rsuffix="_r")
cross

In [None]:
cross_count_pair = cross.reset_index().groupby(by=["index_l", "index_r"]).participant.count()
y["participant"] = cross_count_pair
cross_count_pair = y.fillna(0)
cross_count_pair

In [None]:
#cross_count_pair = cross_count_pair.unstack(fill_value=0).reset_index()
cross_count_pair = cross_count_pair.reset_index()
cross_count_pair["percentage"] = 100. * (cross_count_pair["participant"].div(cross_count_pair["count"], fill_value=0))
cross_count_pair

In [None]:
alt.Chart(cross_count_pair).mark_rect().encode(
    x='left:O',
    y='right:O',
    color='percentage:Q'
)