In [1]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime

from scripts.visualizer import Visualizer

In [9]:
### Preparing for visualization
# Config
weekdays=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
months=['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

path_aktualne = ("aktualne", "data/articles_Aktualne.cz.csv")
path_idnes = ("idnes", "data/articles_iDnes.cz.csv")
file_paths = [path_aktualne, path_idnes]

v = Visualizer(files=file_paths, verbose=True, dropna=True)

100%|██████████| 2/2 [00:00<00:00, 34.58it/s]

Data from data/articles_Aktualne.cz.csv successfully uploaded.
Data from data/articles_iDnes.cz.csv successfully uploaded.
Visualizer successfully initialized.





In [10]:
date_range = v.visualizerDateRange(get_range=True, set_range=True)
date_range

Data were successfully cropped and range from 2020-02-26 to 2020-05-01.


('2020-02-26', '2020-05-01')

In [11]:
def extendDataFrame(data):
    '''
        Creates new columns with weekday and hours from the date and time respectively
    '''
    
    # Work with date
    data["date"] = pd.to_datetime(data["date"])
    data["month"] = pd.Categorical(data["date"].dt.month_name(), categories = months, ordered = True)
    data["weekday"] = pd.Categorical(data["date"].dt.day_name(), categories = weekdays, ordered = True)
    
    # Works with time
    data["hour"] = pd.to_datetime(v.data["time"]).dt.hour
        

    return data

data_ext = extendDataFrame(v.data)

In [39]:
count_articles_aktualne = data_ext[["source"]].groupby(["source"]).size()[0]
count_articles_idnes = data_ext[["source"]].groupby(["source"]).size()[1]

In [57]:
labels = ['Aktuálně.cz','iDnes.cz']
values = [count_articles_aktualne, count_articles_idnes]
title_text = "Number of articles from %s to %s." % (date_range[0], date_range[1])

fig = go.Figure(data=[go.Pie(labels=labels, values=values, marker=dict(colors=["#636df6", "#ee553a"]), textinfo='label+percent')])
fig.update(layout_title_text=title_text)
fig.show()

In [41]:
days_count = data_ext[["weekday", "source"]].groupby(["weekday", "source"]).size()
days_count = days_count.sort_index()

In [61]:
# Weekday figure
fig1 = go.Figure(data=[
    go.Bar(name='Aktuálně.cz', x=weekdays, y=days_count.xs("aktualne", level=1)),
    go.Bar(name='iDnes.cz', x=weekdays, y=days_count.xs("idnes", level=1)),
])

# Change the bar mode
title_text = "Number of articles from %s to %s by day of the week." % (date_range[0], date_range[1])
fig1.update_layout(barmode='stack', xaxis=dict(
        title='Weekdays'), yaxis=dict(title='Number of Articles'), title_text=title_text)
fig1.show()

In [43]:
hours_count = data_ext[["hour", "source"]].groupby(["hour", "source"]).size().unstack(fill_value=0).stack()
hours_count = hours_count.sort_index()

In [62]:
#Time figure
hour_list = list(hours_count.index.unique(level=0))

fig2 = go.Figure(data=[
    go.Bar(name='Aktuálně.cz', x=hour_list, y=hours_count.xs("aktualne", level=1)),
    go.Bar(name='iDnes.cz', x=hour_list, y=hours_count.xs("idnes", level=1))
])

# Change the bar mode
title_text = "Number of articles from %s to %s by the hour." % (date_range[0], date_range[1])
fig2.update_layout(barmode='stack', xaxis=dict(
        title='Hours',
        tickmode='linear'), yaxis=dict(title='Number of Articles'), title_text=title_text)
fig2.show()