In [None]:
# Model the usage of MTB trails as a function of weather conditions
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import utils


In [None]:
# gather data
md = utils.get_segment_metadata()
rl_ = utils.get_ridelogs()

# Trip junk
md = md[['id', 'name', 'closest_ims']].copy()

In [None]:
# Tabulate ridelog data with date as index
rl2 = pd.pivot_table(rl_, index='date', values='effort_count', columns='segment_id')
rl2.set_index(pd.DatetimeIndex(rl2.index.values), inplace=True)

# resample daily, interpolate missing values, and diff against the previous day
daily = rl2.resample('1D').interpolate().fillna(method='bfill').diff()
#todo: clip value to non-negative
daily

In [None]:
# normalize by day-of-week average
all_segs = daily.columns
d2 = daily.reset_index()
d3 = d2.melt(id_vars = 'index', value_vars=all_segs)
d4 = d3.rename(columns = {'index' : 'date', 'value' : 'rides'})
d4['weekday'] = d4['date'].dt.weekday

by_dow = d4.groupby(['segment_id', 'weekday']).mean().rename(columns={'rides' : 'rides_dow'})
d5 = d4.merge(by_dow, how='left', left_on=['segment_id', 'weekday'], right_on=['segment_id', 'weekday'])
# normalize (nrids = normalized rides)
d5['nrides'] = d5['rides'] / d5['rides_dow']
# negative values might come up if Strava removes rides
# positive values which are too high are not useful for the analysis
d5['nrides'].clip(lower=0, upper=2, inplace=True)
d5

In [None]:
print(daily.idxmax())
daily.max()

In [None]:
# add the closest IMS station
d6 = d5.merge(md[['id', 'closest_ims', 'name']], how='left', left_on=['segment_id'], right_on=['id'])

rain_days = utils.get_rain_days(d6)

In [None]:
# Add rain measurements
data = d6.merge(rain_days, how='left', left_on=['closest_ims', 'date'], right_on=['closest_ims', 'date'])

# cumulative measures of rainfall

data.sort_values('date', inplace=True)
data['rain_7d'] = data.groupby('segment_id')['rain_mm'].apply(lambda x : x.rolling(7).sum()).fillna(0)
data['soil_moisture'] = data.groupby('segment_id')['rain_mm'].apply(utils.bathtub)
df_orig = data.copy()

In [None]:
#data = data_orig.query("segment_id == '7774409'").copy()

In [None]:
#data.groupby('segment_id').sum().sort_values('rides')
#df_orig

In [None]:
def bathtub_set(data_, capacity, drainage):
     return utils.bathtub_(data_['rain_mm'].values, capacity=capacity, drainage=drainage)

def best_bathtub(data_):
    mydata = data_.copy()
    out = []

    for c in range(0,80,4):
        for d in range(0,25):
            # TODO:
            # 1. downsample the observations with no moisture and many rides
            # 2. normalize the ride counts per day of week
            corr, _ = spearmanr(mydata['nrides'].values, bathtub_set(mydata, c, d), nan_policy='omit')
            out.append([c, d, corr])
        
    cdf = pd.DataFrame(out, columns=['capacity','drainage', 'corr'])
    cdf['abscorr'] = cdf['corr'].transform('abs')
    return cdf.iloc[cdf['abscorr'].idxmax()]

In [None]:
df = df_orig.copy()
out = []
for seg in df['segment_id'].unique():
    mydata = df.query("segment_id == @seg")
    res = best_bathtub(mydata).to_dict()
    res['segment_id'] = seg
    out.append(res)

In [None]:
params = pd.DataFrame(out)
params['dfactor'] = params['capacity'] / params['drainage']
params = params.merge(md, how='left', left_on='segment_id', right_on='id')
params

In [None]:
fig, ax = plt.subplots(figsize=(10,10))
sns.set_style('ticks')
sns.scatterplot(data=params, x='capacity', y='drainage', size='abscorr', hue='abscorr', ax=ax)


In [None]:

names = df['segment_id'].unique()
fig, ax = plt.subplots(figsize=(10,60), nrows=len(names), ncols=1)

for vi in range(len(names)):
    seg = names[vi]
    mydata = df.query("segment_id == @seg").copy()
    p = params.query("id == @seg")
    capactiy = p.iloc[0].capacity
    drainage = p.iloc[0].drainage
    name = mydata.iloc[0]['name']
    mydata['soil_moisture'] = bathtub_set(mydata, capacity, drainage)
    sns.scatterplot(data=mydata,
                    y='nrides', x='soil_moisture',
                    hue='segment_id', marker='o',
                    ax=ax[vi]).set_title(f'%s c=%d d=%d' % (name, capacity, drainage))