In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

### Functions

In [None]:
def plot_news_frequency(
        df: pd.DataFrame,
        title: str,
    ) -> plt.Figure:
    fig, ax = plt.subplots(figsize=(10, 5), dpi=200)

    plt.rcParams['font.family'] = 'Arial'

    df['count'] = 1
    frequency = df['count'].resample('Q').sum()

    ax.bar(frequency.index, frequency.values, width=75, color='firebrick')

    ax.set_title(title, fontsize=13)
    ax.set_xlabel('Date', fontsize=13)
    ax.set_ylabel('Number of articles per quarter', fontsize=13)

    # Only view years on x-axis
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.xticks(rotation=45) 

    return fig


### News frequency

In [None]:
df = pd.read_csv(r'C:\Users\joneh\master_thesis\data\news\TG_CrudeANDOil.csv')
df.index = pd.to_datetime(df['datetime']).dt.tz_localize(None)

df_NYT = pd.read_csv(r'C:\Users\joneh\master_thesis\data\news\NYT_CrudeANDOil.csv')
df_NYT.index = pd.to_datetime(df_NYT['pub_date']).dt.tz_localize(None)

fig_TG = plot_news_frequency(df, 'The Guardian news data frequency')
fig_NYT = plot_news_frequency(df_NYT, 'The New York Times news data frequency')

# save plot as png
fig_TG.savefig('images/TG_news_freq.png', bbox_inches='tight')
fig_NYT.savefig('images/NYT_news_freq.png', bbox_inches='tight')