In [640]:
import numpy as np
import datetime as dt 
import csv
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [641]:
df = pd.read_csv('tbd_stats.csv')
attendance = pd.read_csv('sat_attendance.csv')
weather = pd.read_csv('weather.csv')
df['date'] = pd.to_datetime(df['date'])
attendance['date'] = pd.to_datetime(attendance['date'])
weather['date'] = pd.to_datetime(weather['date'])
weather = weather.loc[weather["date"].isin(df["date"])]

In [847]:
print(df['miles'].mean())
print(df['miles'].median())
print(df['num_riders'].mean())
print(df['num_riders'].median())

21.3075
19.7
9.95
9.5


# TBD Attendance
The attendance graph and animation

In [812]:
fig = px.line(
    df.iloc[1:-2], x="date", y="num_riders", 
    markers=True, 
    title="TBD 2024 Ride Attendance",
    # line_shape='spline'
)
fig.update_xaxes(
    fixedrange=False,
    dtick="M1",
    tickformat="%b",
    range=[pd.to_datetime('2024-01-01'),pd.to_datetime('2024-12-31')],
    gridcolor='silver'
)
fig.update_yaxes(
    dtick="2",
    range=[2,19],
    gridcolor='silver'
)
fig.update_layout(
    title_x=0.5,
    width=450,
    height=800,
    margin=dict(l=5, r=5, t=50, b=5),  # left, right, top, bottom margins
    yaxis=dict(title=None),
    xaxis_title='Number of Riders',
    plot_bgcolor='#fffcf7',
    paper_bgcolor='#fffcf7',
    # template='simple_white',
)
fig.update_traces(
    connectgaps=True, 
    line=dict(width=5),
)
fig.show()
fig.write_image(f'attendance.png', format='png')

In [743]:
def more_plots(num_riders, multiplier):
    multiplied_riders = []
    for i in range(len(num_riders)-1):
        next_ride_num = num_riders[i+1]
        increment = (next_ride_num - num_riders[i]) / multiplier
        new_riders = []
        for j in range(multiplier):
            new_riders.append(num_riders[i] + j * increment)
        multiplied_riders = multiplied_riders + new_riders
    multiplied_riders.append(num_riders[-1]) # Add last val
    return multiplied_riders

In [814]:
multiplier = 1
df_x2 = df[['date', 'had_ride', 'num_riders']].dropna()
new_num_riders = more_plots(list(df_x2['num_riders']),multiplier)
new_date = more_plots(list(df_x2['date']),multiplier)
df_x2 = pd.DataFrame(np.repeat(df_x2.values, multiplier, axis=0)).iloc[:(multiplier-1)*-1] # Drop duplicated last row
df_x2['num_riders'] = new_num_riders
df_x2['date'] = new_date
df_x2 = df_x2.rename(columns={1:"had_ride"})


In [836]:

NUM_WEEKS=52
RETAIL_COLORS = ['#BF3200'] 
df_indexed = pd.DataFrame()
for index in np.arange(start=0,
                       stop=len(df_x2),
                       step=1):
    if df_x2.iloc[index].had_ride: 
        df_slicing = df_x2.iloc[:index+1].copy()
        df_slicing['frame'] = (index)
        df_indexed = pd.concat([df_indexed, df_slicing])

scatter_plot = px.scatter(
    df_indexed,
    y='num_riders',
    x='date',
    animation_frame='frame',
    color_discrete_sequence=RETAIL_COLORS
)
for frame in scatter_plot.frames:
    for data in frame.data:
        data.update(mode='markers',
                    showlegend=True,
                    opacity=1)
        data['x'] = np.take(data['x'], [-1])
        data['y'] = np.take(data['y'], [-1])

line_plot = px.line(
    df_indexed,
    y='num_riders',
    x='date',
    animation_frame='frame',
    color_discrete_sequence=RETAIL_COLORS,
    width=450,
    height=400,
    # line_shape='spline', # Make a line graph curvy
)
# Hide line plot legend to avoid duplication with scatter plot's legend
line_plot.update_traces(showlegend=False, connectgaps=True,)  

for frame in line_plot.frames:
    for data in frame.data:
        data.update(mode='lines', opacity=0.8, showlegend=False)

combined_plot = go.Figure(
    data=line_plot.data + scatter_plot.data,
    frames=[
        go.Frame(data=line_plot.data + scatter_plot.data, name=scatter_plot.name)
        for line_plot, scatter_plot in zip(line_plot.frames, scatter_plot.frames)
    ],
    layout=line_plot.layout
)

combined_plot.update_xaxes(
    fixedrange=False,
    dtick="M1",
    tickformat="%b",
    range=[pd.to_datetime('2024-01-01'), pd.to_datetime('2024-12-31')],
    gridcolor='silver'
)
combined_plot.update_yaxes(dtick="2",range=[0,20],gridcolor='silver')
combined_plot.update_traces(
    line=dict(width=5),
    marker=dict(size=20)) # Play with marker size and line width
combined_plot.update_layout(
    margin=dict(l=5, r=5, t=50, b=5),  # left, right, top, bottom margins
    yaxis=dict(title=None),
    xaxis_title='Number of Riders',
    plot_bgcolor='#fffcf7',
    paper_bgcolor='#fffcf7',
)
combined_plot['layout'].pop("sliders")
combined_plot.layout.updatemenus[0].buttons[0]['args'][1]['frame']['duration'] = 150
combined_plot.layout.updatemenus[0].buttons[0]['args'][1]['transition']['duration'] = 20
combined_plot.layout.updatemenus[0].buttons[0]['args'][1]['transition']['redraw'] = False
combined_plot.show()


In [837]:
num_frames = df_indexed.iloc[[0.-1]].frame
for frame in range(int(num_frames+1)): 
    fig = px.line(
        df_indexed.loc[df_indexed['frame']==frame],
        x="date", y="num_riders", 
        markers=False, 
        # line_shape='spline',
        color_discrete_sequence=['#BF3200'] 
    )
    try:
        fig.add_scatter(x = [fig.data[0].x[-1]], y = [fig.data[0].y[-1]],
                            mode = 'markers',
                            marker = {'size':15, 'color':'#BF3200'},
                            showlegend = False,)
    except: continue
    fig.update_xaxes(
        fixedrange=False,
        dtick="M1",
        tickformat="%b",
        # gridcolor='silver',
        range=[pd.to_datetime('2024-01-01'),pd.to_datetime('2024-12-31')]
    )
    fig.update_yaxes(dtick="2",range=[2.1,19],gridcolor='silver',)
    fig.update_layout(
        width=450,
        height=400,
        margin=dict(l=5, r=5, t=50, b=5),  # left, right, top, bottom margins
        xaxis=dict(title=None),
        yaxis_title='Number of Riders',
        plot_bgcolor='#fffcf7',
        paper_bgcolor='#fffcf7',
    )
    fig.update_traces(
        connectgaps=True, 
        line=dict(width=5),
    )
    # fig.show()
    fig.write_image(f'attendance_animation/{frame}.png', format='png')




Calling int on a single element Series is deprecated and will raise a TypeError in the future. Use int(ser.iloc[0]) instead



In [838]:
import imageio as iio
frame_idx = df_indexed.frame
frames = np.stack([iio.imread(f"attendance_animation/{x}.png") for x in frame_idx], axis=0)
iio.mimsave("tbd_attendance.gif", frames, format = 'GIF', duration=5)





# Temperature no correlation

In [859]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=df["date"], y=df["num_riders"], name="Number of Riders"),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=weather["date"], y=weather[" MaxTemperature"], name="Max Temperature"),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x=weather["date"], y=weather[" AvgTemperature"], name="Avg Temperature"),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x=weather["date"], y=weather[" MinTemperature"], name="Min Temperature"),
    secondary_y=True,
)

fig.update_xaxes(
    fixedrange=False,
    dtick="M1",
    # tickformat="%b",
    range=[pd.to_datetime('2024-01-01'),pd.to_datetime('2024-12-31'), ]
)
fig.update_layout(
    title_x=0.5,
    # title_text='TBD Ridership and Average Temperature',
    # width=450,
    # height=800,
    margin=dict(l=5, r=5, t=35, b=5),  # left, right, top, bottom margins
    xaxis=dict(title=None),
    yaxis_title='Number of Riders',
    showlegend=False
)
fig.update_traces(connectgaps=True)
fig.show()
fig.write_image(f'ridership_vs_temp.png', format='png')

## Ride with warmest and coldest temperature

Warmest ride: 89.5F

Coldest ride: 35F

# Sunburst Distribution of Routes and Coffee Shops

In [649]:
coffee_shops = df[['coffee_shop', 'route']]
coffee_shops.dropna(inplace=True)
shops, visit = np.unique(coffee_shops, return_counts=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [835]:
sunburst_coffee = coffee_shops.groupby(['coffee_shop','route']).size().reset_index().rename(columns={0:'count'})
fig = px.sunburst(sunburst_coffee, path=['route', 'coffee_shop', ], values='count')
fig.update_layout(
    width=450,
    height=800,
    margin=dict(l=5, r=5, t=100, b=5),  # left, right, top, bottom margins
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
)
fig.update_traces(textinfo='label')
fig.show()
fig.write_image(f'coffee_distribution.png', format='png')


# Attendance Dataframe Setup
Setting up two dataframes for rider leaderboard activity 
- attendance_leaderboard df for all riders
- attendance_regular df for riders who joined more than 3 times this year


In [651]:
result = [attendance.loc[i, :].values.tolist()[2:] for i in df.index]
# Cleaning 'nan' and unknown names (??) from data
arr = np.array(result)
arr = arr.flatten()
arr = arr[arr != 'nan']
arr = arr[arr != '??']
names, counts = np.unique(arr, return_counts=True)
# Creating dataframes
attendance_leaderboard = pd.DataFrame({'Name': names, 'Count': counts})
attendance_leaderboard = attendance_leaderboard.sort_values(['Count'], ascending=[False])
attendance_leaderboard = attendance_leaderboard.reset_index(drop=True)

In [652]:
# Calculating data for riding streak and hiatus streak

weekly_attendance = pd.Series(result)
best_streak = {}
longest_break = {}
for name in names:
    attended = 0
    absent = np.nan
    longest_streak = 0
    longest_hiatus = 0
    for week in weekly_attendance:
        week = pd.Series(week)
        if pd.isnull(week[0]): 
            continue
        elif week.str.contains(name, regex=False).any(): 
            attended += 1
            if attended > longest_streak: longest_streak = attended
            if not pd.isnull(absent) and absent > longest_hiatus: longest_hiatus = absent
            absent = 0
        else: 
            attended = 0
            absent += 1
    if absent > longest_hiatus: longest_hiatus = absent # Include current hiatus streak
    best_streak[name] = longest_streak
    longest_break[name] = longest_hiatus

In [653]:
# Adding streak column
streak_df = pd.DataFrame.from_dict(best_streak, orient='index', columns=['Streak'] )
streak_df = streak_df.sort_values(['Streak'], ascending=[False]).reset_index()
streak_df = streak_df.rename(columns={"index": "Name", "Streak":"Streak"})
attendance_leaderboard = attendance_leaderboard.merge(streak_df, on="Name", how='left')

In [654]:
# Adding hiatus column 
hiatus_df = pd.DataFrame.from_dict(longest_break, orient='index', columns=['Hiatus'] )
hiatus_df = hiatus_df.sort_values(['Hiatus'], ascending=[False]).reset_index()
hiatus_df = hiatus_df.rename(columns={"index": "Name", "Hiatus":"Hiatus"})
attendance_leaderboard = attendance_leaderboard.merge(hiatus_df, on="Name", how='left')


In [851]:
# Creating df for regular (ride count > 3)
attendance_regular = attendance_leaderboard[attendance_leaderboard.Count > 5]

# Rider Leaderboards
- Rider Attendance
- Rider Streak
- Rider Hiatus Streak

In [None]:
fig = px.bar(
    attendance_regular, 
    y="Name", x="Count",
    title="Rider Attendance Leaderboard", 
    text_auto=True)
fig.update_layout(
    title_x=0.5,
    width=600,
    height=800,
    margin=dict(l=5, r=5, t=50, b=5),  # left, right, top, bottom margins
    yaxis=dict(title=None, categoryorder='total ascending'),
    xaxis_title='Number of Rides',
    barmode='stack',
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
)

fig.show()
fig.write_image(f'attendance_leaderboard.png', format='png')

In [853]:
# Rider Streak Leaderboard
fig = px.bar(attendance_regular, y="Name", x="Streak", text_auto=True)
fig.update_layout(
    title_x=0.5,
    width=600,
    height=800,
    margin=dict(l=10, r=10, t=50, b=10),  # left, right, top, bottom margins
    yaxis=dict(title=None, categoryorder='total ascending'),
    xaxis_title='Ride Streak Length',
    barmode='stack', 
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
)
fig.show()
fig.write_image(f'streak_leaderboard.png', format='png')


In [855]:
# Shortest Rider Hiatus
fig = px.bar(attendance_regular, y="Name", x="Hiatus", text_auto=True)
fig.update_layout(
    title_x=0.5,
    width=600,
    height=800,
    margin=dict(l=10, r=10, t=50, b=10),  # left, right, top, bottom margins
    yaxis=dict(title=None, categoryorder='total descending'),
    xaxis_title='Ride Hiatus Length',
    barmode='stack', 
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
)

fig.show()
fig.write_image(f'hiatus_leaderboard.png', format='png')


In [659]:
attendance_by_rider = attendance.drop('had_ride', axis=1, inplace=False)
attendance_by_rider = attendance_by_rider.set_index('date')
attendance_by_rider = attendance_by_rider.merge(df[['date', 'coffee_shop', 'route']], how='inner', on='date')
attendance_by_rider = attendance_by_rider[attendance_by_rider['Unnamed: 2'].notna()]
attendance_dict = attendance_by_rider.to_dict('split')

In [660]:
attendance_by_rider_dict = {}
for name in attendance_leaderboard[attendance_leaderboard.Count > 2]['Name']: 
    attendance_arr = []
    for _, rows in attendance_by_rider.iterrows():
        if name in rows.values: attendance_arr.append(True)
        else: attendance_arr.append(False)
    attendance_by_rider_dict[name] = attendance_arr
regular_rider_attendance = pd.DataFrame(attendance_by_rider_dict)
regular_rider_attendance = regular_rider_attendance.set_index(attendance_by_rider['date'])

In [842]:
regular_rider_attendance.shape

(40, 36)

## Individual Sunbursts

In [661]:
for name in attendance_regular['Name']: 
    rider_dist = regular_rider_attendance[[name]].merge(df[['date', 'coffee_shop', 'route']], how='inner', on='date')
    rider_dist = rider_dist.loc[rider_dist[name]==True]
    rider_coffee = rider_dist.groupby(['coffee_shop', 'route']).size().reset_index().rename(columns={0:'count'})
    rider_coffee['name'] = name
    fig = px.sunburst(rider_coffee, path=['name', 'route', 'coffee_shop', ], values='count')

    fig.update_traces(insidetextorientation='horizontal')
    fig.update_layout(
        width=450,
        height=800,
        margin=dict(l=5, r=5, t=5, b=5),  # left, right, top, bottom margins
    )
    # fig.show()
    # fig.write_image(f'rider_sunbursts/{name}_ride_sunbursts.png', format='png')


## New Riders

In [662]:
new_rider=[False, False, False, False, False, True, False, True, True, False, True, True, False, True, False, False, False, True, False, True, False, False, True, True, True, True, True, True, False, True, False, False, False, True, True, False]
first_ride = pd.DataFrame(regular_rider_attendance.idxmax()).reset_index().rename(columns={"index": "name"}).merge(pd.DataFrame(data={'name':regular_rider_attendance.columns, 'new_rider':new_rider}), on='name').rename(columns={0: "date"})
first_ride=first_ride[first_ride['new_rider']==True]
first_ride=first_ride.merge(df)
first_ride_count = first_ride.groupby(['date']).size().reset_index().rename(columns={0:'count'})

In [663]:
first_ride_routes = first_ride.groupby(['route']).size().reset_index().rename(columns={0:'count'})
fig = px.pie(first_ride_routes, values='count', names='route')
fig.update_layout(
    title_x=0.5,
    width=450,
    height=800,
    margin=dict(l=5, r=5, t=35, b=5),  # left, right, top, bottom margins
    yaxis=dict(title=None),
    xaxis_title='Number of Riders',
    showlegend=False
)
fig.update_traces(textposition='inside', textinfo='value+label')
fig.show()
fig.write_image('new_rider_route_distribution.png', format='png')


In [664]:
fig = px.bar(
    first_ride_count, y='date', x='count',
    title="TBD 2024 New Attendees",
    
)
# fig.add_trace(go.Scatter(x=df["num_riders"], y=df["date"], mode='lines'))

fig.update_yaxes(
    fixedrange=False,
    dtick="M1",
    # tickformat="%b",
    range=[pd.to_datetime('2024-12-31'), pd.to_datetime('2024-01-01')]
)
fig.update_layout(
    title_x=0.5,
    width=450,
    height=800,
    margin=dict(l=5, r=5, t=35, b=5),  # left, right, top, bottom margins
    yaxis=dict(title=None),
    xaxis_title='Number of Riders',
    showlegend=False
)
fig.show()

In [665]:
# SUMPRODUCT: Total miles * num_riders i.e. "Total TBD miles this year" 
(df['miles'] * df['num_riders']).sum()

np.float64(8467.6)