In [2]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# topics = pd.read_pickle("data/topics.pkl")
shows = pd.read_pickle("../data/sendungen.pkl")
shows = shows[shows["article"].str.len() > 0]
topics = pd.read_pickle("../data/topics.pkl")
topics = topics[~topics["topic"].str.lower().str.contains("das wetter")]

In [2]:
show_counts = shows["title"].value_counts()[["tagesschau", "tagesthemen", "tagesthemen extra", "bericht aus berlin"]].to_frame().reindex(["tagesschau", "tagesthemen", "bericht aus berlin", "tagesthemen extra"])
show_counts.rename(index={"bericht aus berlin": "Bericht aus Berlin"}, inplace=True)
fig = px.bar(
    show_counts, 
    y="title", 
    title="Anzahl Shows nach Typ",
    color=["blue", "green", "red", "magenta"],
    text="title",
    height=1500,
    width= 2400,
    labels={"title": "Anzahl Shows", "index": "Showtyp"},
    # template = "plotly_dark"
    )
fig.update_layout(
    showlegend=False,
    font = {"size": 30},
    title_x=0.5)

fig.write_html("../plots/as_html/anzahl_shows_nach_typ.html") # save as html
fig.write_image("../plots/as_png/anzahl_shows_nach_typ.png") # save as png 

In [6]:
relevant_shows = ["tagesschau", "tagesthemen", "tagesthemen extra"]
plot_df = None

for show_type in relevant_shows:
    show_episodes = shows[shows["title"] == show_type]
    by_quarter = show_episodes.groupby("quarter")["desc_length"].mean().to_frame()
    by_quarter["mode"] = show_type
    plot_df = by_quarter if plot_df is None else plot_df.append(by_quarter)
plot_df.reset_index(inplace=True)

fig = px.line(
    plot_df,
    x = "quarter", 
    y = "desc_length", 
    color = 'mode', 
    # template = "plotly_dark",
    labels = {"desc_length": "Description Length", "quarter": "Year", },
    width = 2000,
    height = 1000
    )

x_labels = [" " if not label.endswith("1") else label.split("/")[0] for label in shows["quarter"].unique()]

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = list(range(len(x_labels))),
        ticktext = x_labels
    ),
    title = "Durchschnittliche Beschreibungslänge nach Quartal",
    title_x = 0.5,
    font = {"size": 30}
)
fig.update_layout(legend_title_text='Show')

fig.write_html("../plots/as_html/description_length_by_show.html") # save as html
fig.write_image("../plots/as_png/description_length_by_show.png") # save as png

In [7]:
relevant_shows = ["tagesschau", "tagesthemen", "tagesthemen extra"]
plot_df = None

for show_type in relevant_shows:
    show_episodes = shows[shows["title"] == show_type]
    by_quarter = show_episodes.groupby("quarter")["num_topics"].mean().to_frame()
    by_quarter["mode"] = show_type
    plot_df = by_quarter if plot_df is None else plot_df.append(by_quarter)
plot_df.reset_index(inplace=True)

fig = px.line(
    plot_df,
    x = "quarter", 
    y = "num_topics", 
    color = 'mode', 
    # template = "plotly_dark",
    labels = {"num_topics": "Anzahl Themen", "quarter": "Year", },
    width = 2000,
    height = 1000
    )

x_labels = [" " if not label.endswith("1") else label.split("/")[0] for label in shows["quarter"].unique()]

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = list(range(len(x_labels))),
        ticktext = x_labels
    ),
    title = "Durschnittliche Themen pro Sendung",
    title_x = 0.5,
    font = {"size": 30}
)
fig.update_layout(legend_title_text='Show')
fig.write_image("../plots/as_png/num_topics_per_show.png") # save as png

In [5]:
topic_distribution = topics["category"].value_counts().to_frame().rename(columns={"category": "count"}).drop("Lottozahlen")
topic_distribution["category"] = topic_distribution.index
fig = px.pie(
    topic_distribution, 
    values='count', 
    names='category', 
    title='Meldungen nach Kategorie',
    width=1500,
    height=1500,
    # template = "plotly_dark",
    )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    font = {"size": 30},
    title_x=0.5)
fig.write_html("../plots/as_html/meldungen_nach_kategorie.html") # save as html
fig.write_image("../plots/as_png/meldungen_nach_kategorie.png") # save as png

In [6]:
topics["topic_length"] = topics["topic"].str.len()
by_category = topics.groupby("category")["topic_length"].mean().drop("Lottozahlen").to_frame().reset_index().sort_values("topic_length", ascending = True)
by_category["topic_length"] = by_category["topic_length"].round()
fig  = px.bar(
    by_category,
    title = "Beschreibungslänge nach Kategorie",
    x = "topic_length",
    y = "category",
    orientation="h",
    labels = {"topic_length": "Beschreibungslänge", "category": "Kategorie"},
    width = 1500,
    height = 600,
    text = "topic_length"
)
fig.update_layout(
    font = {"size": 20},
    title_x=0.5)

fig.write_html("../plots/as_html/beschreibungslänge_nach_kategorie.html") # save as html
fig.write_image("../plots/as_png/beschreibungslänge_nach_kategorie.png") # save as png

In [7]:
num_topics_per_show = shows.groupby("title")["num_topics"].mean().reindex(["tagesschau", "tagesthemen", "tagesthemen extra"]).round().to_frame().reset_index()
fig = px.bar(
    num_topics_per_show,
    title = "Durchschnittliche Themen pro Sendung",
    x = "title",
    y = "num_topics",
    text = "num_topics",
    height=1500,
    width= 2400,
    color = ["blue", "red", "green"],
    labels = {"title": "Showtyp", "num_topics": "Avg Topic Count"})
fig.update_layout(
    showlegend=False,
    font = {"size": 30},
    title_x=0.5)

fig.write_image("../plots/as_png/topic_counts_per_show.png")