$\textbf{PROGRAMMING ASSIGNMENT - Lecture 2}$
---

Instruction: Choose a dataset and perform a data storytelling, choose the proper data visualization, do not use words, your graphs must tell a story.

---

Data: [Metro Interstate Traffic Volume](https://archive.ics.uci.edu/dataset/492/metro+interstate+traffic+volume)

### $\textbf{Analysis on the Highest Traffic Volume Year along the Metro Interstate}$

In [154]:
%matplotlib inline
import pandas as pd
import plotly.graph_objects as go
import plotly.subplots as sp

In [155]:
df = pd.read_csv("./datasets/metro-interstate-traffic-volume.csv", parse_dates=["date_time"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   holiday              61 non-null     object        
 1   temp                 48204 non-null  float64       
 2   rain_1h              48204 non-null  float64       
 3   snow_1h              48204 non-null  float64       
 4   clouds_all           48204 non-null  int64         
 5   weather_main         48204 non-null  object        
 6   weather_description  48204 non-null  object        
 7   date_time            48204 non-null  datetime64[ns]
 8   traffic_volume       48204 non-null  int64         
dtypes: datetime64[ns](1), float64(3), int64(2), object(3)
memory usage: 3.3+ MB


In [156]:
df.describe()

Unnamed: 0,temp,rain_1h,snow_1h,clouds_all,date_time,traffic_volume
count,48204.0,48204.0,48204.0,48204.0,48204,48204.0
mean,281.20587,0.334264,0.000222,49.362231,2016-01-05 10:46:16.773711616,3259.818355
min,0.0,0.0,0.0,0.0,2012-10-02 09:00:00,0.0
25%,272.16,0.0,0.0,1.0,2014-02-06 11:45:00,1193.0
50%,282.45,0.0,0.0,64.0,2016-06-11 03:30:00,3380.0
75%,291.806,0.0,0.0,90.0,2017-08-11 06:00:00,4933.0
max,310.07,9831.3,0.51,100.0,2018-09-30 23:00:00,7280.0
std,13.338232,44.789133,0.008168,39.01575,,1986.86067


In [157]:
df.isna().sum()

holiday                48143
temp                       0
rain_1h                    0
snow_1h                    0
clouds_all                 0
weather_main               0
weather_description        0
date_time                  0
traffic_volume             0
dtype: int64

In [158]:
df.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
0,,288.28,0.0,0.0,40,Clouds,scattered clouds,2012-10-02 09:00:00,5545
1,,289.36,0.0,0.0,75,Clouds,broken clouds,2012-10-02 10:00:00,4516
2,,289.58,0.0,0.0,90,Clouds,overcast clouds,2012-10-02 11:00:00,4767
3,,290.13,0.0,0.0,90,Clouds,overcast clouds,2012-10-02 12:00:00,5026
4,,291.14,0.0,0.0,75,Clouds,broken clouds,2012-10-02 13:00:00,4918


In [159]:
timeseries_df = df.set_index("date_time")
timeseries_df.drop(columns=["weather_description"], inplace=True)
timeseries_df.head()

Unnamed: 0_level_0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,traffic_volume
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2012-10-02 09:00:00,,288.28,0.0,0.0,40,Clouds,5545
2012-10-02 10:00:00,,289.36,0.0,0.0,75,Clouds,4516
2012-10-02 11:00:00,,289.58,0.0,0.0,90,Clouds,4767
2012-10-02 12:00:00,,290.13,0.0,0.0,90,Clouds,5026
2012-10-02 13:00:00,,291.14,0.0,0.0,75,Clouds,4918


In [160]:
aggregations = {
    "temp": "mean",
    "rain_1h": "sum",
    "snow_1h": "sum",
    "clouds_all": "mean",
    "weather_main": lambda series : series.mode().iloc[0] if not series.mode().empty else pd.NA,
    "holiday": lambda series : series.mode().iloc[0] if not series.mode().empty else pd.NA,
    "traffic_volume": "mean",
}

yearly_df = timeseries_df.resample("YE").agg(aggregations)

fig = go.Figure()
fig.add_trace(go.Scatter(x=yearly_df.index, y=yearly_df['traffic_volume']))
fig.update_layout(title='Yearly Traffic Volume at the Metro Interstate (2013 - 2019)',
                   xaxis_title='Date',
                   yaxis_title='Traffic Volume')
fig.show()

In [161]:
daily_df = timeseries_df.resample("D").agg(aggregations)
daily_2018_df = daily_df.loc["2018"]

fig = sp.make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=daily_2018_df.index, y=daily_2018_df['traffic_volume'], name="Traffic Volume", mode="lines"),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=daily_2018_df.index, y=daily_2018_df['temp'], name="Temperature", mode="lines"),
    secondary_y=True
)

fig.update_layout(title='Daily Traffic Volume vs Temperature Trends along Metro Interstate in 2018', xaxis_title='Date')
fig.update_yaxes(title_text="Traffic Volume", secondary_y=False)
fig.update_yaxes(title_text="Temperature", secondary_y=True)
fig.show()

In [162]:
unique_categories_counts = daily_2018_df['weather_main'].value_counts()

fig = go.Figure()
fig.add_trace(go.Pie(labels=unique_categories_counts.index, values=unique_categories_counts.values))
fig.update_layout(title='Daily Weather Distribution for the Year 2018 along the Metro Interstate')
fig.show()

In [163]:
traffic_volume_by_weather = daily_2018_df.groupby('weather_main')['traffic_volume'].mean().sort_values(ascending=False)

fig = go.Figure(data=[go.Bar(x=traffic_volume_by_weather.index, y=traffic_volume_by_weather.values)])
fig.update_layout(title='Average Traffic Volume per Weather Condition for the Year 2018 along the Metro Interstate',
                  xaxis_title='Weather',
                  yaxis_title='Average Traffic Volume')

fig.show()

In [164]:
fig = sp.make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=daily_2018_df.index, y=daily_2018_df['traffic_volume'], name="Traffic Volume", mode="lines"),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=daily_2018_df.index, y=daily_2018_df['rain_1h'], name="Rainfall", mode="lines"),
    secondary_y=True
)

fig.update_layout(title='Daily Traffic Volume vs Rainfall (mm) Trends along Metro Interstate in 2018', xaxis_title='Date')
fig.update_yaxes(title_text="Traffic Volume", secondary_y=False)
fig.update_yaxes(title_text="Rainfall", secondary_y=True)
fig.show()

In [165]:
fig = sp.make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=daily_2018_df.index, y=daily_2018_df['traffic_volume'], name="Traffic Volume", mode="lines"),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=daily_2018_df.index, y=daily_2018_df['clouds_all'], name="Cloud Cover", mode="lines"),
    secondary_y=True
)

fig.update_layout(title='Daily Traffic Volume vs Cloud Cover Trends along Metro Interstate in 2018', xaxis_title='Date')
fig.update_yaxes(title_text="Traffic Volume", secondary_y=False)
fig.update_yaxes(title_text="Cloud Cover (%)", secondary_y=True)
fig.show()