In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [6]:
df = pd.read_csv("data/preprocessed.csv")
df["d"] = df["d"].apply(lambda x: x[:10])
df["y"] = df["d"].apply(lambda x: x[:4])

In [7]:
speakers = ['craig wright',
            'jamie dimon', 
            'vitalik buterin',
            'tyler winklevoss',
            'mark karpeles',
            'andreas antonopoulos']

d2o = df[["d", "o"]].to_dict()
d2o = {d2o["d"][i]: d2o["o"][i] for i in d2o["o"]}
d2o = dict(sorted(d2o.items(), key=lambda item: item[0]))
total = 0
for d_, o_ in d2o.items():
    d2o[d_] = d2o[d_] + total
    total = d2o[d_]

df = df[df.apply(lambda x: x['s'] in speakers, axis=1)]

speaker2o = df.groupby("s")["o"].apply(lambda x: list(x)).to_dict()
speaker2d = df.groupby("s")["d"].apply(lambda x: list(x)).to_dict()

speaker2pairs = {}
for speaker in speaker2o:
    if speaker in speakers:
        o, d = speaker2o[speaker], speaker2d[speaker]
        a, b = list(zip(*sorted(list(zip(d, o)), key=lambda x: x[0])))
        speaker2pairs[speaker] = (a, np.cumsum(b) / np.array([d2o[d_] for d_ in a]))

fig = go.Figure()
df_ = {}
for speaker, pair in speaker2pairs.items():
    df_[speaker] = pd.DataFrame(pair).T.rename(columns={0: "date", 1: "o"})
    fig.add_trace(go.Scatter(x=df_[speaker]["date"], y=df_[speaker]["o"], name=speaker))
    
    
fig.update_layout(title="Accumulated quotation share by selected speakers")
fig.update_xaxes(title="Date")
fig.update_yaxes(title="Accumulated quotation share")

fig.write_html('docs/_includes/accumulated_quotation_share_by_speaker.html')

fig.show()

In [8]:
df = pd.read_csv("data/preprocessed.csv")
df["d"] = df["d"].apply(lambda x: x[:10])
df["y"] = df["d"].apply(lambda x: x[:4])

In [9]:
occ2o = df.groupby("occupation")["o"].apply(lambda x: list(x)).to_dict()
occ2d = df.groupby("occupation")["d"].apply(lambda x: list(x)).to_dict()
d2o = df[["d", "o"]].to_dict()

In [10]:
d2o = {d2o["d"][i]: d2o["o"][i] for i in d2o["o"]}

In [11]:
total = 0 
d2o = dict(sorted(d2o.items(), key=lambda item: item[0]))
for d_, o_ in d2o.items():
    d2o[d_] = d2o[d_] + total
    total = d2o[d_]

In [12]:
occ2total = {occ: sum(o_) for occ, o_ in occ2o.items()}
best_occs = sorted(occ2total.items(), key=lambda item: item[1])
best_occs = [occ for occ, _ in best_occs[-5:]]

In [13]:
occ2pairs = {}
all_dates = []
for occ in occ2o:
    if occ in best_occs:
        o, d = occ2o[occ], occ2d[occ]
        a, b = list(zip(*sorted(list(zip(d, o)), key=lambda x: x[0])))
        occ2pairs[occ] = (a, np.cumsum(b))
        all_dates += a

In [14]:
fig = go.Figure()
df = {}
for occ, pair in occ2pairs.items():
    df[occ] = pd.DataFrame(pair).T.rename(columns={0: "date", 1: "o"})
    fig.add_trace(go.Scatter(x=df[occ]["date"], y=df[occ]["o"], name=occ))

fig.show()