In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
news_df = pd.read_csv('../data/google_news_weekly_timeseries.csv')
trends_df = pd.read_csv('../data/google_trends_data.csv')
traffic_df = pd.read_csv('../data/google_analytics_traffic.csv')

In [3]:
news_df['datetime'] = pd.to_datetime(news_df['datetime'])
trends_df['week'] = pd.to_datetime(trends_df['week'])

In [4]:
traffic_df['date'] = pd.to_datetime(traffic_df['date'])

In [5]:
traffic_df = traffic_df.set_index('date')
traffic_df = traffic_df.groupby(pd.Grouper(freq='1w'))['users'].sum().reset_index()

In [6]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=traffic_df.date,
        y=traffic_df.users
    )
)

fig.show()

In [7]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=trends_df.week,
        y=trends_df.trend_value
    )
)

fig.show()

In [8]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=news_df.datetime,
        y=news_df.article_count
    )
)

fig.show()

In [9]:
merge_df_1 = pd.merge(left=news_df, right=traffic_df, left_on='datetime', right_on='date', how='inner').drop(labels=['date'], axis=1)
merged_df = merge_df_1.merge(trends_df, left_on='datetime', right_on='week', how='inner').drop(labels='week', axis=1)
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
merged_df.head()

Unnamed: 0,datetime,article_count,users,trend_value
0,2020-03-29,1,661,0
1,2020-04-05,0,264,0
2,2020-04-12,0,349,0
3,2020-04-19,1,356,0
4,2020-04-26,0,327,0


In [10]:
merged_df.to_csv('../data/combined_weekly_data.csv', index=False)

In [11]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=merged_df.datetime,
        y=merged_df.article_count,
        name='articles'),
)

fig.add_trace(
    go.Scatter(
        x=merged_df.datetime,
        y=merged_df.users,
        name='site_visits',
        yaxis="y2"),
)

fig.add_trace(
    go.Scatter(
        x=merged_df.datetime,
        y=merged_df.trend_value,
        name='google_trends',
        yaxis="y3"),
)

fig.update_layout(
    xaxis=dict(
        domain=[0, 0.85]
    ),
    yaxis=dict(
        title="Weekly Article Count",
        titlefont=dict(
            #color="#1f77b4"
        ),
        tickfont=dict(
            #color="#1f77b4"
        )
    ),
    yaxis2=dict(
        title="Weekly Site Visits",
        titlefont=dict(
            #color="#ff7f0e"
        ),
        tickfont=dict(
            #color="#ff7f0e"
        ),
        anchor="x",
        overlaying="y",
        side="right",
    ),
    yaxis3=dict(
        title="Weekly Google Trend Rank",
        titlefont=dict(
            #color="#d62728"
        ),
        tickfont=dict(
            #color="#d62728"
        ),
        anchor="free",
        overlaying="y",
        side="right",
        position=0.95
    ),
)

fig.update_layout(
    title_text="Trends",
    autosize=False,
    margin=dict(l=50, r=50, t=100, b=100, pad=4),
    width=1000,
    height=500,
)

fig.show()

In [12]:
fig.write_html('../figs/weekly_timeseries_comparison.html')