# Creating Dashboard with Python

## Import Libraries

In [1]:
# Import libraries
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

## Wrangle Data

In [3]:
# Import data
df = pd.read_csv('Data/Prepared Data/NY_merged_agg.csv', index_col = 0)

In [4]:
# Checking data types
df.dtypes

ride_id                object
rideable_type          object
start_time             object
end_time               object
start_station_name     object
start_station_id       object
end_station_name       object
end_station_id         object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
date                   object
avgTemp_C             float64
avgTemp_F             float64
daily_rides             int64
_merge                 object
dtype: object

In [5]:
# Creating a month column 
df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
df['month'] = df['date'].dt.month
df['month'] = df['month'].astype('int')

In [6]:
# Creating a season column
df['season'] = [
"winter" if (month == 12 or 1 <= month <= 4)
    else "spring" if (4 < month <= 5)
    else "summer" if (6 <= month <= 9)
    else "fall"
for month in df['month']
    ]

In [7]:
# Checking dataframe
df.head()

Unnamed: 0,ride_id,rideable_type,start_time,end_time,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,date,avgTemp_C,avgTemp_F,daily_rides,_merge,month,season
0,115C78C3039FFA89,electric_bike,2022-01-01 09:21:14,2022-01-01 09:35:46,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member,2022-01-01,11.6,52.88,592,both,1,winter
1,7FFD810CAA7A919E,classic_bike,2022-01-01 02:43:56,2022-01-01 02:43:57,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-01,11.6,52.88,592,both,1,winter
2,E715E8432031B72C,classic_bike,2022-01-01 02:13:33,2022-01-01 02:18:42,Essex Light Rail,JC038,Washington St,JC098,40.712774,-74.036486,40.724294,-74.035483,member,2022-01-01,11.6,52.88,592,both,1,winter
3,BF1B7B1E1961A87B,electric_bike,2022-01-01 17:18:46,2022-01-01 18:55:25,Grand St,JC102,W 27 St & 7 Ave,6247.06,40.715178,-74.037683,40.746647,-73.993915,casual,2022-01-01,11.6,52.88,592,both,1,winter
4,4A01F0E53C6F4386,electric_bike,2022-01-01 11:23:32,2022-01-01 11:29:27,Christ Hospital,JC034,Hoboken Terminal - Hudson St & Hudson Pl,HB101,40.734786,-74.050444,40.735938,-74.030305,member,2022-01-01,11.6,52.88,592,both,1,winter


## Bar Charts with Plotly

In [8]:
# Grouping by start station and creating dataframe of top 20 most popular stations
df['value'] = 1 
df_group = df.groupby('start_station_name', as_index=False).agg({'value': 'sum'})
top20 = df_group.nlargest(20, 'value')

In [None]:
# Creating bar chart of top 20 stations
fig = go.Figure(go.Bar(x = top20['start_station_name'], y = top20['value'], marker={'color': top20['value'],'colorscale': 'viridis_r'}))
fig.show()

In [None]:
# Adding details to bar chart
fig.update_layout(
    title = 'Top 20 Most Popular Bike Stations in New York',
    xaxis_title = 'Start stations',
    yaxis_title ='Total trips',
    width = 900, height = 600
)

In [18]:
# Exporting top20 dataframe to .csv
top20.to_csv('top20.csv')

## Line Chart in Plotly

In [None]:
# Creating line chart

fig_2 = make_subplots(specs = [[{"secondary_y": True}]])

fig_2.add_trace(
go.Scatter(x = df['date'], y = df['daily_rides'], name = 'Daily Bike Rides',
           marker={'color': df['daily_rides'],'color': 'navy'}),
    secondary_y = False
)

fig_2.add_trace(
go.Scatter(x=df['date'], y = df['avgTemp_F'], name = 'Daily Temperature',
           marker={'color': df['avgTemp_F'],'color': 'red'}),
    secondary_y=True
)

In [None]:
# Adding details to line chart
fig_2.update_layout(
    title = 'Daily Bike Trips and Temperature in New York 2022',
    xaxis_title = 'Month',
    yaxis=dict(
        title="Total Rides",
        titlefont=dict(color="navy"),
        tickfont=dict(color="navy")  
    ),
    yaxis2=dict(
        title="Average Temperature (\u00B0F)",
        titlefont=dict(color="red"),
        tickfont=dict(color="red"),
        overlaying="y",
        side="right"
    ),
    width = 1100, height = 400
)

## Reducing Data

In [25]:
# Checking columns in dataframe
df.columns

Index(['ride_id', 'rideable_type', 'start_time', 'end_time',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'avgTemp_C', 'avgTemp_F', 'daily_rides',
       '_merge', 'month', 'season', 'value'],
      dtype='object')

In [26]:
# Removing columns to keep only those necessary for dashboard
df_1 = df.drop(columns = {'ride_id', 'rideable_type', 'start_time', 'end_time', 'start_station_id', 'end_station_id',
                          'start_lat', 'start_lng', 'end_lat', 'end_lng', 'member_casual', 'avgTemp_C', '_merge'})

In [27]:
# Checking columns of new dataframe
df_1.columns

Index(['start_station_name', 'end_station_name', 'date', 'avgTemp_F',
       'daily_rides', 'month', 'season', 'value'],
      dtype='object')

In [28]:
# Exporting reduced dataset
df_1.to_csv('reduced_data.csv')