# Task 2.6: Creating Dashboards with Python
#### 1. Install libraries
#### 2. Import Data
#### 3. Data Wrangling
#### 4. Use plotly to produce a bar chart for the most popular stations in New York.
#### 5. Create a dual-axis line chart for the aggregated bike trips and temperatures in plotly.

### 1. Install Libraries

In [None]:
# Installing libaries
import os
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime as dt 

### 2. Import Data

In [None]:
# Importing Data
df = pd.read_csv('LaGuardia_data.csv', index_col = 0)

In [None]:
df.dtypes

In [None]:
df.head(10)

### 3. Data Wrangling

In [None]:
# Create a month column
df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
df['month'] = df['date'].dt.month
df['month'] = df['month'].astype('int')

In [None]:
df.head()

In [None]:
# Create the season column
df['season'] = [
"winter" if (month == 12 or 1 <= month <= 2)
    else "spring" if (3 < month <= 5)
    else "summer" if (6 <= month <= 8)
    else "fall"
for month in df['month']
    ]

In [None]:
df.head()

### 4. Use plotly to produce a bar chart for the most popular stations in New York.

In [None]:
## Grouping the data by the column “start_station_name” and calculate the frequency/count to get Top 20 staions
df['value'] = 1
df_groupby_bar = df.groupby('start_station_name', as_index=False).agg({'value' : 'sum'})
top_20 = df_groupby_bar.nlargest(20, 'value')

In [None]:
fig = go.Figure(go.Bar(x = top_20['start_station_name'], y = top_20['value']))
fig.show()

In [None]:
fig = go.Figure(go.Bar(x = top_20['start_station_name'], y = top_20['value'], marker={'color': top_20['value'],'colorscale': 'Blues'}))
fig.show()

In [None]:
## Creating Bar chart
fig.update_layout(
    title = 'Top 20 most popular bike stations in New York',
    xaxis_title = 'Start stations',
    yaxis_title ='Sum of trips',
    width = 900, height = 600
)

In [None]:
# Save the top 20 stations as a CSV file
top_20.to_csv('top_20.csv')

### 5. Create a dual-axis line chart for the aggregated bike trips and temperatures in plotly.

In [None]:
# Filter only the necessary columns and reset the index
df_filtered = df[['date', 'bike_rides_daily', 'avgTemp']].reset_index(drop=True)

In [None]:
df_filtered.head()

In [None]:
# Sampling filtered DataFrame
df_sampled = df_filtered[::100]

In [None]:
# Create a figure with a secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add the bike rides trace (Primary Y-axis)
fig.add_trace(
    go.Scatter(
        x=df_sampled['date'], 
        y=df_sampled['bike_rides_daily'], 
        name='Daily bike rides', 
        line=dict(color='blue')  # Line color
    ),
    secondary_y=False
)

# Add the temperature trace (Secondary Y-axis)
fig.add_trace(
    go.Scatter(
        x=df_sampled['date'], 
        y=df_sampled['avgTemp'], 
        name='Daily temperature', 
        line=dict(color='red')  # Line color
    ),
    secondary_y=True
)

# Update layout
fig.update_layout(
    title='Citi Bike Rides vs Temperatures New York 2022',
    xaxis_title='Date',
    yaxis_title='Bike Rides',
    yaxis2_title='Temperature (°F)',  
    template='plotly'  
)

fig.show()

In [None]:
df_sampled.to_csv('daily_rides_vs_temperature.csv')

In [None]:
print(os.path.exists("Citi_bike Trips Aggregated.html"))

### Reduce the row and column count

In [None]:
df.columns

In [None]:
# Create a copy with fewer columns
df_1 = df.drop(columns = {'ride_id', 'rideable_type', 'started_at', 'ended_at',
    'start_station_id','end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', '_merge',
       'month'}) 

In [None]:
df_1.columns

In [None]:
df_1.head()

### Create random split

In [None]:
# Creating random split
np.random.seed(32)
red = np.random.rand(len(df_1)) <= 0.92

In [None]:
small = df_1[~red]

In [None]:
small.shape

In [None]:
df_1.memory_usage(deep=True).sum()

In [None]:
small = df_1.sample(frac=0.01, random_state=32)

In [None]:
small.shape

In [None]:
small.memory_usage(deep=True).sum()

In [None]:
# Order by date
small.sort_values(by="date", inplace=True)

In [None]:
small.head()

In [None]:
small.to_csv('reduced_data_to_plot_7.csv',index = False)

In [None]:
import seaborn as sns

#plt.rcParams['agg.path.chunksize'] = 101

fig, ax = plt.subplots(figsize=(10, 5))

# Create a plot of the bike rides
line1, = ax.plot(small['date'], small['bike_rides_daily'], color="navy", label="Bike Rides Daily")
ax.set_xlabel("Year 2022", fontsize=14)  # Set x-axis label
ax.set_ylabel("Bike rides daily", color="navy", fontsize=14)  # Set y-axis label

# Create the dual axis
ax2 = ax.twinx()

# Make a plot with the temperatures on the y-axis using the second axis object
line2, = ax2.plot(small['date'], small["avgTemp"], color="red", label="Average Temperatures")
ax2.set_ylabel("Average temperatures", color="red", fontsize=14)

# Combine legends from both axes
lines = [line1, line2]
labels = [line.get_label() for line in lines]
ax.legend(lines, labels, loc="upper left", fontsize=12)

# Add title
plt.title("Temperature and trips in 2022", fontsize=18)

# Show the plot
plt.show()

In [None]:
fig_2 = make_subplots(specs = [[{"secondary_y": True}]])

fig_2.add_trace(
go.Scatter(x = small['date'], y = small['bike_rides_daily'], name = 'Daily bike rides', marker={'color': small['bike_rides_daily'],'color': 'blue'}),
secondary_y = False
)
    
fig_2.add_trace(
go.Scatter(x=small['date'], y = small['avgTemp'], name = 'Daily temperature', marker={'color': small['avgTemp'],'color': 'red'}),
secondary_y=True
)

fig_2.update_layout(
title = 'Daily bike trips and temperatures in New York 2022',
height = 600
)
    
#st.plotly_chart(fig_2, use_container_width=True)