In [15]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np

topics = pd.read_pickle("data/topics.pkl")
shows = pd.read_pickle("data/sendungen.pkl")

In [13]:
show_counts = shows["title"].value_counts()[["tagesschau", "tagesthemen", "tagesthemen extra", "bericht aus berlin"]].to_frame().reindex(["tagesschau", "tagesthemen", "bericht aus berlin", "tagesthemen extra"])
show_counts.rename(index={"bericht aus berlin": "Bericht aus Berlin"}, inplace=True)
fig = px.bar(
    show_counts, 
    y="title", 
    title="Anzahl Shows nach Typ",
    color=["blue", "green", "red", "magenta"],
    text="title",
    height=1500,
    width= 2400,
    labels={"title": "Anzahl Shows", "index": "Showtyp"},
    # template = "plotly_dark"
    )
fig.update_layout(
    showlegend=False,
    font = {"size": 30},
    title_x=0.5)

fig.write_html("plots/as_html/anzahl_shows_nach_typ.html") # save as html
fig.write_image("plots/as_png/anzahl_shows_nach_typ.png") # save as png 

In [62]:
relevant_shows = ["tagesschau", "tagesthemen", "tagesthemen extra"]
plot_df = None

for show_type in relevant_shows:
    show_episodes = shows[shows["title"] == show_type]
    by_quarter = show_episodes.groupby("quarter")["desc_length"].median().to_frame()
    by_quarter["mode"] = show_type
    plot_df = by_quarter if plot_df is None else plot_df.append(by_quarter)
plot_df.reset_index(inplace=True)

fig = px.line(
    plot_df,
    x = "quarter", 
    y = "desc_length", 
    color = 'mode', 
    # template = "plotly_dark",
    labels = {"desc_length": "Description Length", "quarter": "Year", },
    width = 2000,
    height = 1000
    )

x_labels = [" " if not label.endswith("1") else label.split("/")[0] for label in shows["quarter"].unique()]

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = list(range(len(x_labels))),
        ticktext = x_labels
    ),
    title = "Median Beschreibungslänge nach Quartal",
    title_x = 0.5,
    font = {"size": 30}
)
fig.update_layout(legend_title_text='Show')

fig.write_html("plots/as_html/description_length_by_show.html") # save as html
fig.write_image("plots/as_png/description_length_by_show.png") # save as png

fig.show()

In [64]:

by_quarter = show_episodes.groupby("quarter")["desc_length"].median().to_frame()
by_quarter.reset_index(inplace=True)

fig = px.line(
    by_quarter,
    x = "quarter", 
    y = "desc_length", 
    # template = "plotly_dark",
    labels = {"desc_length": "Description Length", "quarter": "Year", },
    width = 2000,
    height = 1000
    )

x_labels = [" " if not label.endswith("1") else label.split("/")[0] for label in shows["quarter"].unique()]

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = list(range(len(x_labels))),
        ticktext = x_labels
    ),
    title = "Median Beschreibungslänge nach Quartal",
    title_x = 0.5,
    font = {"size": 30}
)
fig.update_layout(legend_title_text='Show')

fig.write_html("plots/as_html/description_length_by_show.html") # save as html
fig.write_image("plots/as_png/description_length_by_show.png") # save as png

In [5]:
by_weekday = shows.groupby("weekday")["num_topics"].mean().to_frame().reindex(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])
by_weekday["num_topics"] = by_weekday["num_topics"].round(2)
fig = px.bar(
    by_weekday, 
    y= "num_topics", 
    title= "Description length by Weekday",
    text= "num_topics",
    height= 500,
    width = 700,
    labels= {"num_topics": "Description Length", "weekday": "Weekday"},
    template = "plotly_dark"
    )
fig.show()

In [38]:
topic_distribution = topics["category"].value_counts().to_frame().rename(columns={"category": "count"}).drop("Lottozahlen")
topic_distribution["category"] = topic_distribution.index
fig = px.pie(
    topic_distribution, 
    values='count', 
    names='category', 
    title='Meldungen nach Kategorie',
    width=1500,
    height=1500,
    # template = "plotly_dark",
    )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    font = {"size": 30},
    title_x=0.5)
fig.write_html("plots/as_html/meldungen_nach_kategorie.html") # save as html
fig.write_image("plots/as_png/meldungen_nach_kategorie.png") # save as pngmeldungen_nach_kategorie
fig.show()

In [65]:
topics["topic_length"] = topics["topic"].str.len()
by_category = topics.groupby("category")["topic_length"].mean().drop("Lottozahlen").to_frame().reset_index().sort_values("topic_length", ascending = True)
by_category["topic_length"] = by_category["topic_length"].round()
fig  = px.bar(
    by_category,
    title = "Beschreibungslänge nach Kategorie",
    x = "topic_length",
    y = "category",
    orientation="h",
    labels = {"topic_length": "Beschreibungslänge", "category": "Kategorie"},
    width = 1500,
    height = 600,
    text = "topic_length"
)
fig.update_layout(
    font = {"size": 20},
    title_x=0.5)

fig.write_html("plots/as_html/beschreibungslänge_nach_kategorie.html") # save as html
fig.write_image("plots/as_png/beschreibungslänge_nach_kategorie.png") # save as pngmeldungen_nach_kategoriebeschreibungslänge_nach_kategorie