In [1]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np

topics = pd.read_pickle("data/topics.pkl")
shows = pd.read_pickle("data/sendungen.pkl")

In [2]:
show_counts = shows["title"].value_counts()[["tagesschau", "tagesthemen", "tagesthemen extra", "bericht aus berlin"]].to_frame().reindex(["tagesschau", "tagesthemen", "bericht aus berlin", "tagesthemen extra"])
show_counts.rename(index={"bericht aus berlin": "Bericht aus Berlin"}, inplace=True)
fig = px.bar(
    show_counts, 
    y="title", 
    title="Anzahl Shows nach Typ",
    color=["blue", "green", "red", "magenta"],
    text="title",
    height=400,
    width= 500,
    labels={"title": "Anzahl Shows", "index": "Showtyp"},
    template = "plotly_dark"
    )
fig.update_layout(showlegend=False)

fig.show()

In [3]:
relevant_shows = ["tagesschau", "tagesthemen"]
plot_df = None

for show_type in relevant_shows:
    show_episodes = shows[shows["title"] == show_type]
    by_quarter = show_episodes.groupby("quarter")["desc_length"].median().to_frame()
    by_quarter["mode"] = show_type
    plot_df = by_quarter if plot_df is None else plot_df.append(by_quarter)
plot_df.reset_index(inplace=True)

fig = px.line(
    plot_df,
    x = "quarter", 
    y = "desc_length", 
    color = 'mode', 
    template = "plotly_dark",
    labels = {"desc_length": "Description Length", "quarter": "Year", },
    range_y = [100, 700],
    width = 1000,
    height = 500
    )

x_labels = [" " if not label.endswith("1") else label.split("/")[0] for label in shows["quarter"].unique()]

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = list(range(len(x_labels))),
        ticktext = x_labels
    )
)
fig.update_layout(legend_title_text='Show')

fig.write_html("plots/as_html/description_length_by_show.html") # save as html
fig.write_image("plots/as_png/description_length_by_show.png") # save as png

fig.show()

In [4]:
by_weekday = shows.groupby("weekday")["desc_length"].mean().to_frame().reindex(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])
by_weekday["desc_length"] = by_weekday["desc_length"].astype(int)
fig = px.bar(
    by_weekday, 
    y="desc_length", 
    title="Description length by Weekday",
    text="desc_length",
    height=500,
    width = 800,
    labels={"desc_length": "Description Length", "weekday": "Weekday"},
    template = "plotly_dark"
    )
fig.show()

In [5]:
by_weekday = shows.groupby("weekday")["num_topics"].mean().to_frame().reindex(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])
by_weekday["num_topics"] = by_weekday["num_topics"].round(2)
fig = px.bar(
    by_weekday, 
    y="num_topics", 
    title="Description length by Weekday",
    text="num_topics",
    height=500,
    width = 700,
    labels={"num_topics": "Description Length", "weekday": "Weekday"},
    template = "plotly_dark"
    )
fig.show()

In [6]:
topic_distribution = topics["category"].value_counts().to_frame().rename(columns={"category": "count"}).drop("Lottozahlen")
topic_distribution["category"] = topic_distribution.index
fig = px.pie(
    topic_distribution, 
    values='count', 
    names='category', 
    title='Meldungen nach Kategorie',
    width=750,
    height=750,
    template = "plotly_dark",
    )
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.show()