In [1]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np

In [2]:
topics = pd.read_excel("data/cleaned.xlsx")
shows = pd.read_excel("data/sendungen.xlsx")

In [3]:
# look at desc length over time
by_quarter = shows.groupby("quarter")

desc_len_development_median = by_quarter["desc_length"].median().to_frame()
desc_len_development_median["mode"] = "Median"

desc_len_development_mean = by_quarter["desc_length"].mean().to_frame()
desc_len_development_mean["mode"] = "Mean"

desc_len_df = desc_len_development_median.append(desc_len_development_mean)

desc_len_df.reset_index(inplace=True)

In [4]:
# plot description length over time

fig = px.line(
    desc_len_df,
    x = "quarter", 
    y = "desc_length", 
    color = 'mode', 
    template = "plotly_dark",
    labels = {"desc_length": "Description Length", "quarter": "Year"},
    range_y = [200, 600],
    width = 1500,
    height = 500
    )

x_labels = [" " if not label.endswith("1") else label.split("/")[0] for label in shows["quarter"].unique()]

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = list(range(len(x_labels))),
        ticktext = x_labels
    )
)

fig.write_html("plots/as_html/description_length.html") # save as html
fig.write_image("plots/as_png/description_length.png") # save as png

fig.show()