In [None]:
# Model the usage of MTB trails as a function of weather conditions
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import spearmanr

In [None]:
data = pd.read_csv('data/tolearn.csv')
data['relative_usage'].clip(lower=0, upper=1, inplace=True)

In [None]:
data.sort_values('date', inplace=True)

data['rain_7d'] = data.groupby('segment_id')['rain_mm'].apply(lambda x : x.rolling(7).sum()).fillna(0)

# add accumulated rainfall using a bathtub model:
#  Daily new rainfall is added to the ground, up to the ground's capacity (then its lost in groundwater flow)
#  Additionally, the ground is drained at a constant rate per day

def bathtub(v, capacity=10, drainage=3):
    ret = []
    prev = 0;
    for vv in v:
        val = max(0, min(capacity, prev + vv) - drainage)
        prev = val
        ret.append(val)
    ret = pd.Series(ret, index=v.index)
    return ret

data['soil_moisture'] = data.groupby('segment_id')['rain_mm'].apply(bathtub)


In [None]:
def bathtub_set(capacity, drainage):
     data['soil_moisture'] = data.groupby('segment_id')['rain_mm'].apply(bathtub, capacity=capacity, drainage=drainage)

In [None]:
out = []

for c in range(0,80,4):
    for d in range(0,25):
        bathtub_set(c, d)
        corr, _ = spearmanr(data['relative_usage'].values, data['soil_moisture'].values, nan_policy='omit')
        out.append([c, d, corr])
        
cdf = pd.DataFrame(out, columns=['capacity','drainage', 'corr'])
        

In [None]:
cdf['abscorr'] = cdf['corr'].transform('abs')
cdf['negcorr'] = cdf['corr'].transform(lambda x : -x)
fig, ax = plt.subplots(figsize=(10,10))
sns.set_style('ticks')
sns.scatterplot(data=cdf, x='capacity', y='drainage', size='negcorr', hue='abscorr', ax=ax)


In [None]:
v = [[20,10], [30,13], [10, 2], [20, 0]]

fig, ax = plt.subplots(figsize=(10,40), nrows=len(v), ncols=1)

for vi in range(len(v)):
    vv = v[vi]
    capacity = vv[0]
    drainage = vv[1]
    bathtub_set(capacity, drainage)
    sns.scatterplot(data=data,
                    y='relative_usage', x='soil_moisture',
                    hue='segment_id', marker='o',
                    ax=ax[vi]).set_title(f'c=%d d=%d' % (capacity, drainage))
