In [1]:
# Model the usage of MTB trails as a function of weather conditions
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import utils


In [2]:
# gather data
md = utils.get_segment_metadata()
rl_ = utils.get_ridelogs()

# Trip junk
md = md[['id', 'name', 'closest_ims']].copy()

data/ridelogs/segments-20201124.json
data/ridelogs/202012.json
data/ridelogs/202011.json


In [3]:
# Tabulate ridelog data with date as index
rl2 = pd.pivot_table(rl_, index='date', values='effort_count', columns='segment_id')
rl2.set_index(pd.DatetimeIndex(rl2.index.values), inplace=True)

# resample daily, interpolate missing values, and diff against the previous day
daily = rl2.resample('1D').interpolate().fillna(method='bfill').diff()

In [4]:
# add the rainfall data

# first, convert the table to long format
all_segs = daily.columns
d2 = daily.reset_index()
d3 = d2.melt(id_vars = 'index', value_vars=all_segs)

d4 = d3.rename(columns = {'index' : 'date', 'value' : 'rides'})

# add the closest IMS station
d5 = d4.merge(md[['id', 'closest_ims']], how='left', left_on=['segment_id'], right_on=['id'])

rain_days = utils.get_rain_days(d5)

42##2020/12/01
42##2020/11/30
42##2020/12/06
42##2020/12/05
42##2020/11/26
42##2020/11/25
42##2020/11/27
42##2020/11/26
42##2020/11/28
42##2020/11/27
42##2020/11/29
42##2020/11/28
42##2020/11/30
42##2020/11/29
42##2020/12/02
42##2020/12/01
42##2020/12/03
42##2020/12/02
42##2020/12/04
42##2020/12/03
42##2020/12/05
42##2020/12/04


In [25]:
# Add rain measurements
data = d5.merge(rain_days, how='left', left_on=['closest_ims', 'date'], right_on=['closest_ims', 'date'])

# cumulative measures of rainfall

data.sort_values('date', inplace=True)
data['rain_7d'] = data.groupby('segment_id')['rain_mm'].apply(lambda x : x.rolling(7).sum()).fillna(0)
data['soil_moisture'] = data.groupby('segment_id')['rain_mm'].apply(utils.bathtub)
df_orig = data.copy()

In [6]:
#data = data_orig.query("segment_id == '7774409'").copy()

In [7]:
#data.groupby('segment_id').sum().sort_values('rides')

In [21]:
def bathtub_set(data_, capacity, drainage):
     return utils.bathtub_(data_['rain_mm'].values, capacity=capacity, drainage=drainage)

def best_bathtub(data_):
    mydata = data_.copy()
    out = []

    for c in range(0,80,4):
        for d in range(0,25):
            # TODO:
            # 1. downsample the observations with no moisture and many rides
            # 2. normalize the ride counts per day of week
            corr, _ = spearmanr(mydata['rides'].values, bathtub_set(mydata, c, d), nan_policy='omit')
            out.append([c, d, corr])
        
    cdf = pd.DataFrame(out, columns=['capacity','drainage', 'corr'])
    cdf['abscorr'] = cdf['corr'].transform('abs')
    return cdf.iloc[cdf['abscorr'].idxmax()]

In [30]:
out = []
for seg in df['segment_id'].unique():
    mydata = df.query("segment_id == @seg")
    res = best_bathtub(mydata).to_dict()
    res['segment_id'] = seg
    out.append(res)

In [36]:
params = pd.DataFrame(out)
params['dfactor'] = params['capacity'] / params['drainage']
params.merge(md, how='left', left_on='segment_id', right_on='id')

Unnamed: 0,capacity,drainage,corr,abscorr,segment_id,dfactor,id,name,closest_ims
0,8.0,3.0,-0.694668,0.694668,1248017,2.666667,1248017,העלייה מרון,44
1,8.0,5.0,-0.524641,0.524641,18952377,1.6,18952377,שימרי,44
2,16.0,0.0,0.467708,0.467708,3808938,inf,3808938,קפוצינו,205
3,4.0,0.0,-0.259224,0.259224,17443790,inf,17443790,Last Climb Carmel Cup,42
4,28.0,2.0,-0.568656,0.568656,4202076,14.0,4202076,פאמפ טראק ריש לקיש,186
5,8.0,5.0,-0.52094,0.52094,4267589,1.6,4267589,מה עכשיו טיפוס,44
6,16.0,3.0,-0.733348,0.733348,17421855,5.333333,17421855,ברמים בירידה,78
7,36.0,3.0,-0.365942,0.365942,5230474,12.0,5230474,יוסי 2,67
8,4.0,3.0,-0.664216,0.664216,7774409,1.333333,7774409,fun in the forest,67
9,16.0,3.0,-0.64002,0.64002,1717839,5.333333,1717839,סוללים צפון מערבה,186


In [None]:
fig, ax = plt.subplots(figsize=(10,10))
sns.set_style('ticks')
sns.scatterplot(data=cdf, x='capacity', y='drainage', size='negcorr', hue='abscorr', ax=ax)


In [None]:
v = [[12, 7], [4, 3], [10, 2], [20, 4]]

fig, ax = plt.subplots(figsize=(10,40), nrows=len(v), ncols=1)

for vi in range(len(v)):
    vv = v[vi]
    capacity = vv[0]
    drainage = vv[1]
    bathtub_set(capacity, drainage)
    sns.scatterplot(data=data,
                    y='rides', x='soil_moisture',
                    hue='segment_id', marker='o',
                    ax=ax[vi]).set_title(f'c=%d d=%d' % (capacity, drainage))
