In [606]:
import numpy as np
import datetime as dt 
import csv
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [620]:
df = pd.read_csv('tbd_stats.csv')
attendance = pd.read_csv('sat_attendance.csv')
weather = pd.read_csv('weather.csv')
df['date'] = pd.to_datetime(df['date'])
attendance['date'] = pd.to_datetime(attendance['date'])
weather['date'] = pd.to_datetime(weather['date'])
weather = weather.loc[weather["date"].isin(df["date"])]

# weather = weather.merge(df, how='inner', on="date")


In [608]:
fig = px.line(
    df, y="date", x="num_riders", 
    markers=True, 
    title="TBD 2024 Ride Attendance"
)
fig.update_yaxes(
    fixedrange=False,
    dtick="M1",
    # tickformat="%b",
    range=[pd.to_datetime('2024-12-31'), pd.to_datetime('2024-01-01')]
)
fig.update_xaxes(dtick="2",range=[2,20])
fig.update_layout(
    title_x=0.5,
    width=450,
    height=800,
    margin=dict(l=5, r=5, t=35, b=5),  # left, right, top, bottom margins
    yaxis=dict(title=None),
    xaxis_title='Number of Riders',
)
fig.update_traces(connectgaps=True, textposition='top center')
fig.show()

In [609]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=df["date"], y=df["num_riders"], name="Number of Riders"),
)

fig.add_trace(
    go.Scatter(x=weather["date"], y=weather[" AvgTemperature"], name="Avg Temperature"),
    secondary_y=True,
)

fig.update_xaxes(
    fixedrange=False,
    dtick="M1",
    # tickformat="%b",
    range=[pd.to_datetime('2024-01-01'),pd.to_datetime('2024-12-31'), ]
)
fig.update_layout(
    title_x=0.5,
    title_text='TBD Ridership and Average Temperature',
    # width=450,
    # height=800,
    margin=dict(l=5, r=5, t=35, b=5),  # left, right, top, bottom margins
    xaxis=dict(title=None),
    yaxis_title='Number of Riders',
    showlegend=False
)
fig.update_traces(connectgaps=True, textposition='top center')
fig.show()

In [622]:
coffee_shops = df[['coffee_shop', 'route']]
coffee_shops.dropna(inplace=True)
shops, visit = np.unique(coffee_shops, return_counts=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [623]:
sunburst_coffee = coffee_shops.groupby(['coffee_shop','route']).size().reset_index().rename(columns={0:'count'})
fig = px.sunburst(sunburst_coffee, path=['route', 'coffee_shop', ], values='count')
fig.update_layout(
    width=450,
    height=800,
    margin=dict(l=5, r=5, t=5, b=5),  # left, right, top, bottom margins
)
fig.show()

## Attendance Dataframe Setup
Setting up two dataframes for rider leaderboard activity 
- attendance_leaderboard df for all riders
- attendance_regular df for riders who joined more than 3 times this year


In [612]:
result = [attendance.loc[i, :].values.tolist()[2:] for i in df.index]
# Cleaning 'nan' and unknown names (??) from data
arr = np.array(result)
arr = arr.flatten()
arr = arr[arr != 'nan']
arr = arr[arr != '??']
names, counts = np.unique(arr, return_counts=True)
# Creating dataframes
attendance_leaderboard = pd.DataFrame({'Name': names, 'Count': counts})
attendance_leaderboard = attendance_leaderboard.sort_values(['Count'], ascending=[False])
attendance_leaderboard = attendance_leaderboard.reset_index(drop=True)

In [613]:
# Calculating data for riding streak and hiatus streak

weekly_attendance = pd.Series(result)
best_streak = {}
longest_break = {}
for name in names:
    attended = 0
    absent = np.nan
    longest_streak = 0
    longest_hiatus = 0
    for week in weekly_attendance:
        week = pd.Series(week)
        if pd.isnull(week[0]): 
            continue
        elif week.str.contains(name, regex=False).any(): 
            attended += 1
            if attended > longest_streak: longest_streak = attended
            if not pd.isnull(absent) and absent > longest_hiatus: longest_hiatus = absent
            absent = 0
        else: 
            attended = 0
            absent += 1
    if absent > longest_hiatus: longest_hiatus = absent # Include current hiatus streak
    best_streak[name] = longest_streak
    longest_break[name] = longest_hiatus

In [614]:
# Adding streak column
streak_df = pd.DataFrame.from_dict(best_streak, orient='index', columns=['Streak'] )
streak_df = streak_df.sort_values(['Streak'], ascending=[False])
streak_df = streak_df.reset_index()
streak_df = streak_df.rename(columns={"index": "Name", "Streak":"Streak"})
attendance_leaderboard = attendance_leaderboard.merge(streak_df, on="Name", how='left')

In [615]:
# Adding hiatus column 
hiatus_df = pd.DataFrame.from_dict(longest_break, orient='index', columns=['Hiatus'] )
hiatus_df = hiatus_df.sort_values(['Hiatus'], ascending=[False])
hiatus_df = hiatus_df.reset_index()
hiatus_df = hiatus_df.rename(columns={"index": "Name", "Hiatus":"Hiatus"})
attendance_leaderboard = attendance_leaderboard.merge(hiatus_df, on="Name", how='left')


In [616]:
# Creating df for regular (ride count > 3)
attendance_regular = attendance_leaderboard[attendance_leaderboard.Count > 3]

## Rider Leaderboards

In [625]:
fig = px.bar(attendance_regular, y="Name", x="Count", title="Rider Attendance Leaderboard", text_auto=True)
fig.update_layout(
    title_x=0.5,
    width=450,
    height=800,
    margin=dict(l=5, r=5, t=50, b=5),  # left, right, top, bottom margins
    yaxis=dict(title=None, categoryorder='total ascending'),
    xaxis_title='Number of Rides',
    barmode='stack', 
)

fig.show()

In [627]:
fig = px.bar(attendance_regular, y="Name", x="Streak", title="Rider Streak Leaderboard", text_auto=True)
fig.update_layout(
    title_x=0.5,
    width=450,
    height=800,
    margin=dict(l=10, r=10, t=50, b=10),  # left, right, top, bottom margins
    yaxis=dict(title=None, categoryorder='total ascending'),
    xaxis_title='Longest Ride Streak',
    barmode='stack', 
)

fig.show()

In [629]:
fig = px.bar(attendance_regular, y="Name", x="Hiatus", title="Rider Hiatus Leaderboard", text_auto=True)
fig.update_layout(
    title_x=0.5,
    width=450,
    height=800,
    margin=dict(l=10, r=10, t=50, b=10),  # left, right, top, bottom margins
    yaxis=dict(title=None, categoryorder='total descending'),
    xaxis_title='Longest Ride Hiatus',
    barmode='stack', 
)

fig.show()