In [1]:
import pandas as pd
import numpy as np
import datetime

data_pickles = ['../data2015.pkl', '../data2016.pkl','../data2017.pkl', '../data2018.pkl','../data2019.pkl', '../data2020.pkl']
dfs = [pd.read_pickle(f) for f in data_pickles]
df = pd.concat(dfs)
df

Unnamed: 0,quoteID,quotation,speaker,date,numOccurrences
5265,2015-08-19-056730,It's very important to understand how the ocea...,Helen McGregor,2015-08-19 11:46:00,2
5891,2015-09-28-067347,man-made emissions are an insignificant part o...,John Abraham,2015-09-28 23:16:52,1
7868,2015-09-29-052783,In the final months before crucial climate tal...,Bill de Blasio,2015-09-29 16:10:01,1
9207,2015-10-29-104475,The Baker Polito Administration is fully devot...,Peter Lorenz,2015-10-29 10:49:41,3
9709,2015-05-07-042445,The general narrative is addressing climate ch...,Dallas Burtraw,2015-05-07 17:55:43,2
...,...,...,...,...,...
5240963,2020-02-06-089077,The expected increase in severe weather due to...,Deb Gardner,2020-02-06 22:38:10,1
5241586,2020-02-05-114487,We will be an energetic champion of free trade...,Dominic Raab,2020-02-05 16:30:31,2
5243208,2020-02-10-057505,"Obama got health care, Trump got his tax cut, ...",Tom Steyer,2020-02-10 11:00:08,1
5243271,2020-02-19-061285,Our colleagues will also continue to work toge...,Markus Dohle,2020-02-19 08:37:21,1


In [2]:
df_time = df.groupby(pd.Grouper(freq='W-MON', key='date'), as_index=True).count() # or .sum(), dunno which is better
df_time = df_time.rename(columns={'numOccurrences':'val'})


In [3]:
ev = pd.read_csv("./events.csv", delimiter=';')
ev['Date'] = pd.to_datetime(ev['Date'], format="%d/%m/%Y")
ev['date_ann'] = ev['Date'].dt.to_period('W-mon').apply(lambda r: r.end_time.date())
ev['date_ann'] = ev['date_ann'].astype('datetime64[ns]')

ev


Unnamed: 0,Name,Date,Type,Comment,Unnamed: 4,Unnamed: 5,Unnamed: 6,date_ann
0,COP 21,2015-12-07,1,,,,,2015-12-07
1,COP 22,2016-11-07,1,,,,,2016-11-07
2,COP 23,2017-11-12,1,,,,,2017-11-13
3,COP 24,2018-12-02,1,,,,,2018-12-03
4,COP 25,2019-12-09,1,,,,,2019-12-09
5,G20 2016,2016-09-05,1,,,,,2016-09-05
6,G20 2017,2017-07-07,1,,,,,2017-07-10
7,G20 2018,2018-11-30,1,,,,,2018-12-03
8,G20 2019,2019-06-28,1,,,,,2019-07-01
9,Primary elections 2016,2016-05-30,2,Février à Juin 2016,24/04/2016 -,,,2016-05-30


In [43]:
def add_annotation(fig, df_time, date, text, bgcolor):   
    fig.add_annotation(
            x=date,
            y=df_time.loc[date].val,
            hovertext=text,
            showarrow=True,
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            borderwidth=4,
            borderpad=4,
            opacity=0.8,
            bgcolor=bgcolor,
            font=dict(
                family="Courier New, monospace",
                size=12,
                color="#ffffff"
            ),
            )

In [70]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

c_cop = "#2f669c"
c_pol = "#9c342f"
c_cli = "#2f9c3d"

colors=[c_cop, c_pol, c_cli]
topics=["Governmental Events", "Politics", "Climate Catastrophe"]

fig.add_trace(go.Scatter(x=df_time.index, y=df_time.val, name="", showlegend=False))
for i in range (0,3):
    ev_s = ev.loc[ev.Type == i+1]
    fig.add_trace(go.Scatter(x=ev_s['date_ann'], y=df_time.loc[ev_s.date_ann].val, 
                             mode="markers",
                             marker=dict(
                                 size=10, color=colors[i]),
                             name=topics[i], showlegend=True,
                            hoverinfo="text",
                            text=ev_s['Name']))

fig.update_xaxes(rangeslider_visible=True)

"""
for r in ev.iterrows():
    add_annotation(fig, df_time, r[1]['date_ann'], r[1]["Name"], colors[r[1]["Type"]-1])
"""
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.update_layout(width=800, height=600)
fig.show()

In [45]:
fig.write_html('./time_series.html')