In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.font_manager as fm
import matplotlib.ticker as mtick
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.use('pdf')

media = [
    'junge Welt',
    "NachDenkSeiten",
    'taz',
    'Süddeutsche Zeitung',
    'stern TV',
    "DER SPIEGEL",
    'Der Tagesspiegel',
    'ARD',
    'ZDF',
    "ZDFheute Nachrichten",
    'Bayerischer Rundfunk',
    'ntv Nachrichten',
    'RTL',
    'FOCUS Online',
    'ZEIT ONLINE',
    'faz',
    'WELT',
    "BILD",
    'NZZ Neue Zürcher Zeitung',
    "Junge Freiheit",
    'COMPACTTV'
]
shorts = ['JW', 'NDS', 'taz', 'SZ', 'St', 'Sp', 'TS', 'ARD', 'ZDF', 'ZDFh', 'BR', 'ntv', 'RTL', 'FOCUS', 'ZEIT', 'faz', 'WELT', 'BILD', 'NZZ', 'JF', 'CTV']

fpath = Path(mpl.get_data_path(), "fonts/ttf/cmu_serif_roman.ttf")
fprop = fm.FontProperties(fname=fpath)

width = 6.2 #inches
height = width/1.614

font = {'family':'serif', 'serif': ['cmr10']}
plt.rc('font',**font)
plt.rcParams['axes.unicode_minus'] = False
plt.rc('axes', titlesize=12)
plt.rc('xtick', labelsize=12)
plt.rc('ytick', labelsize=12)

In [None]:
df = pd.read_pickle('../data/topics_combined.pkl')
df['date'] = pd.to_datetime(df['date'])

In [None]:
flucht_over_time = df[df['topic'] == '7_flüchtlinge_menschen_ukraine_grenze']
flucht_over_time = flucht_over_time[flucht_over_time['minute'] == 0]
flucht_over_time = flucht_over_time.groupby('date').size()

klima_over_time = df[df['topic'] == '10_co2_klimaschutz_klima_energien']
klima_over_time = klima_over_time[klima_over_time['minute'] == 0]
klima_over_time = klima_over_time.groupby('date').size()

justiz_over_time = df[df['topic'] == '3_polizei_polizisten_täter_beamten']
justiz_over_time = justiz_over_time[justiz_over_time['minute'] == 0]
justiz_over_time = justiz_over_time.groupby('date').size()

flucht_minutes = df[df['topic'] == '7_flüchtlinge_menschen_ukraine_grenze'].groupby('date').size().sum()
klima_minutes = df[df['topic'] == '10_co2_klimaschutz_klima_energien'].groupby('date').size().sum()
justiz_minutes = df[df['topic'] == '3_polizei_polizisten_täter_beamten'].groupby('date').size().sum()

In [None]:
fig, axs = plt.subplots(3, 1, sharex=True)
fig.subplots_adjust(left=.09, bottom=.13, right=.98, top=.92)

axs[0].bar(flucht_over_time.index, flucht_over_time.values, color='red', width = 5.0)
axs[1].bar(klima_over_time.index, klima_over_time.values, color='green', width = 5.0)
axs[2].bar(justiz_over_time.index, justiz_over_time.values, color='blue', width = 5.0)

axs[0].set_title(f'Veröffentlichungen pro Tag und Themenbereich im Zeitverlauf', fontsize=12, font=fpath)
axs[0].grid(b=True, which='major', color='k', linestyle='--', alpha=0.5)
axs[1].set_ylabel('Anzahl der veröffentlichten Videos pro Tag', fontsize=12, font=fpath)
axs[1].grid(b=True, which='major', color='k', linestyle='--', alpha=0.5)
axs[2].set_xlabel('Zeitpunkt der Veröffentlichung', fontsize=12, font=fpath)
axs[2].grid(b=True, which='major', color='k', linestyle='--', alpha=0.5)


leg1 = mpatches.Patch(color='red')
leg2 = mpatches.Patch(color='green')
leg3 = mpatches.Patch(color='blue')
labels = ['Flucht', 'Klima', 'Justiz']
fig.legend(handles=[leg1, leg2, leg3], labels=labels, loc="upper left", title='Themenbereich', bbox_to_anchor=(0.084, 0.928),)

fig.set_size_inches(width, height)
fig.savefig('../results/themenbereiche_zeitverlauf.pdf')

In [None]:
print(f'Flucht count: {flucht_over_time.sum()} videos\nKlima count: {klima_over_time.sum()} videos\nJustiz count: {justiz_over_time.sum()} videos\n\n')
print(f'Flucht minutes: {flucht_minutes}\nKlima minutes: {klima_minutes}\nJustiz minutes: {justiz_minutes}\n')

In [None]:
topics_of_interest = {
    'Flucht':'7_flüchtlinge_menschen_ukraine_grenze',
    'Klima':'10_co2_klimaschutz_klima_energien',
    'Justiz':'3_polizei_polizisten_täter_beamten',
}
topic_percentage_df = pd.DataFrame(columns=media, index=topics_of_interest.keys())
for medium in media:
    n_videos = len(df[(df['medium']==medium) & (df['minute']==0)])
    for topic, topic_string in topics_of_interest.items():
        n_topic_videos = len(df[(df['medium']==medium) & (df['minute']==0) & (df['topic'] == topic_string)])
        topic_percentage_df.loc[topic, medium] = n_topic_videos/n_videos


In [None]:
X = np.arange(len(media))
barwidth = 0.25

fig, ax = plt.subplots()
fig.subplots_adjust(left=.11, bottom=.23, right=.99, top=.92)

plt.bar(X - 0.5*barwidth, topic_percentage_df.loc['Flucht'].values*100, color = 'r', width = barwidth, edgecolor = 'black', label='Flucht')
plt.bar(X + 0.5*barwidth, topic_percentage_df.loc['Klima'].values*100, color = 'g', width = barwidth, edgecolor = 'black', label='Klima')
plt.bar(X + 1.5*barwidth, topic_percentage_df.loc['Justiz'].values*100, color = 'b', width = barwidth, edgecolor = 'black', label='Justiz')

plt.xticks(X + barwidth/2, shorts, rotation=55, font=fpath, fontsize=12, color='grey')
plt.gca().get_xticklabels()[1].set_color('black') 
plt.gca().get_xticklabels()[2].set_color('black') 
plt.gca().get_xticklabels()[5].set_color('black') 
plt.gca().get_xticklabels()[7].set_color('black') 
plt.gca().get_xticklabels()[8].set_color('black') 
plt.gca().get_xticklabels()[9].set_color('black') 
plt.gca().get_xticklabels()[10].set_color('black') 
plt.gca().get_xticklabels()[11].set_color('black') 
plt.gca().get_xticklabels()[13].set_color('black') 
plt.gca().get_xticklabels()[15].set_color('black') 
plt.gca().get_xticklabels()[16].set_color('black') 
plt.gca().get_xticklabels()[17].set_color('black') 
plt.gca().get_xticklabels()[20].set_color('black') 

plt.yticks(font=fpath, fontsize=12)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.legend(title='Themenbereich', prop=fpath, fontsize=12)
ax.grid(b=True, which='major', color='k', linestyle='--', alpha=0.5)
ax.set_title(f'Veröffentlichungen pro Medium und Themenbereich', fontsize=12, font=fpath)
ax.set_ylabel('Prozentualer Anteil des Themenbereichs', fontsize=12, font=fpath)
ax.set_xlabel('Medium', fontsize=12, font=fpath)

fig.set_size_inches(width, height)
fig.savefig('../results/themenbereiche_prozentual.pdf')