# Data Visualisation

In [None]:
#import if it is already not
import pandas as pd
df_long = pd.read_pickle('data/df_long.pkl')

In [None]:
df_long.head()

## Plotting with Pandas

In [None]:
albany = df_long[df_long['branch'] == 'Albany Park'].sort_index()

In [None]:
albany.head()

In [None]:
albany.plot()

In [None]:
albany['circulation'].plot()

## Use Pandas for More Detailed Charts

In [None]:
albany['circulation'].plot(title='Circulation Count Over Time', 
                                figsize=(10, 5), 
                                color='blue', 
                                xlabel='Date',
                                ylabel='Circulation Count')

### Changing plot types

In [None]:
albany['circulation'].plot(kind='area', 
                            title='Circulation Count Area Plot at Albany Park', alpha=0.5, 
                            xlabel='Date',
                            ylabel='Circulation Count')

In [None]:
albany['circulation'].plot(kind='hist', bins=20, 
                            title='Distribution of Circulation Counts at Albany Park',
                            xlabel='Circulation Count')

## Use Plotly for interactive plots

In [None]:
import plotly.express as px

In [None]:
# Creating a line plot for a few selected branches to avoid clutter
selected_branches = df_long[df_long['branch'].isin(['Altgeld',
 'Archer Heights',
 'Austin',
 'Austin-Irving',
 'Avalon'])]
selected_branches = selected_branches.sort_values(by='date')

In [None]:
fig = px.line(selected_branches, x=selected_branches.index, y='circulation', color='branch', title='Circulation Over Time for Selected Branches')
fig.show()

## Bar plots with Plotly

In [None]:
# Aggregate circulation by branch
total_circulation_by_branch = df_long.groupby('branch')['circulation'].sum().reset_index()

# Create a bar plot
fig = px.bar(total_circulation_by_branch, x='branch', y='circulation', title='Total Circulation by Branch')
fig.show()