In [None]:
# Visualizations of mtb ridelogs
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import utils
from datetime import date, timedelta

In [None]:
# gather data
md = utils.get_segment_metadata()
# ignore inactive segments
md = md[md['active_html']]

rl_ = utils.get_ridelogs()

# save a table aside
md_meta = md[['id', 'name', 'distance', 'region_name', 'region_url', 'closest_ims']].copy()

In [None]:
d5 = rl_.copy()

# add the closest IMS station
d6 = d5.merge(md_meta, how='right', left_on=['segment_id'], right_on=['id'])
weather_days = utils.get_weather_days(d6)
# Add rain measurements to ride data
d7 = d6.merge(weather_days, how='left', left_on=['closest_ims', 'date'], right_on=['closest_ims', 'date'])

In [None]:
mydf = d7.copy()


In [None]:
md.query("active_modeling == True")[['name', 'id', 'closest_ims']]

In [None]:
today = date.today()
epoch = today - timedelta(weeks=12)
fig, ax = plt.subplots(2, 1, figsize=(8,10))

sns.lineplot(data=mydf[['date','nrides_raw']].query("date >= @epoch").set_index('date'), ci='sd', ax=ax[0])
ax[0].tick_params(labelrotation=-45)

sns.lineplot(data=mydf[['date','rides']].query("date >= @epoch").set_index('date'), ci='sd', ax=ax[1])
ax[1].tick_params(labelrotation=-45)

fig.tight_layout(pad=1)

In [None]:
epoch = today - timedelta(days=2)
mydf.query("date > @epoch")[['date', 'rides', 'nrides_raw', 'region_name', 'name']].sort_values('region_name')

In [None]:
def rtl(x):
    return x[::-1]

a4_dims = (11.7, 8.27)
fig, ax = plt.subplots(figsize=a4_dims)
epoch = today - timedelta(days=40)
sns.set_context('poster', font_scale=0.7,  rc={"lines.linewidth": 1})
mydf['region_name'] = mydf['region_name'].map(rtl)

mymydf = mydf.query("date > @epoch")
sns.lineplot(data=mymydf, x='date', y='nrides_raw', style='region_name', hue='region_name', markers=True, dashes=False, ax=ax)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

In [None]:
#sns.scatterplot(data = mydf, y ='nrides', x='rain_mm')
mydf

In [None]:
a4_dims = (11.7, 8.27)
fig, ax = plt.subplots(figsize=a4_dims)
epoch = today - timedelta(days=45)
sns.set_context('poster', font_scale=0.7,  rc={"lines.linewidth": 1})
name = rtl('מישגב')
sns.lineplot(data=mydf.query("date >= @epoch and region_name == @name"), x='date', y='nrides_raw', style='name', hue='region_name', markers=True, dashes=False, ax=ax)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xticks(rotation=-45);

In [None]:
def days_since_last_rain(vv, thresh=5):   
    ret = np.zeros_like(vv)
    since = 0
    for i in range(len(vv)):
        if(vv[i] >= thresh):
            since = 0
        else:
            since = since + 1
        ret[i] = since
    ret = ret.tolist()
    return ret   

In [None]:
mydf

In [None]:
# correlate with days since last rain

segments = mydf['name'].dropna().unique()
all_corrs = []
for seg in segments:
    d_ = mydf.query("name == @seg").copy()
    cnames = ['nrides']
    for i in [1, 5, 10, 15, 20]:
        cname = 'since_%dmm' % i
        cnames.append(cname)
        vv = d_.fillna(0).groupby('segment_id')['rain_mm'].apply(lambda x : days_since_last_rain(x.values, thresh=i))
        d_[cname] = vv.values[0]
        
    d_rains = d_[cnames]
    x = d_rains.corr(method='spearman')['nrides']
    x1 = pd.DataFrame(x)
    segr = seg[::-1]  # RTL
    x1.rename(columns={'nrides' : segr}, inplace=True)
    all_corrs.append(x1)
#sns.lineplot(data=d_.set_index('nrides'), dashes=False, markers=True, linewidth=2)

In [None]:
if False:
    segments = mydf['name'].dropna().unique()
    all_corrs = []
    for seg in segments:
        d_ = mydf.query("name == @seg").copy()
        cnames = ['nrides']
        for i in range(1,4):
            cname = 'rain_%dd' % i
            cnames.append(cname)
            d_[cname] = d_.fillna(0).groupby('segment_id')['rain_mm'].apply(lambda x : x.rolling(i).sum().clip(lower=0))
        
        d_rains = d_[cnames]
        x = d_rains.corr(method='spearman')['nrides']
        x1 = pd.DataFrame(x)
        segr = seg[::-1]
        x1.rename(columns={'nrides' : segr}, inplace=True)
        all_corrs.append(x1)
#sns.lineplot(data=d_.set_index('nrides'), dashes=False, markers=True, linewidth=2)

In [None]:
a = pd.concat(all_corrs, axis=1)
a.drop('nrides', inplace=True)
a4_dims = (10, 10)
fig, ax = plt.subplots(1, 1, figsize=a4_dims)
sns.lineplot(data=a, dashes=False, markers=True, linewidth=2)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

In [None]:
a4_dims = (10, 6)
fig, ax = plt.subplots(2, 1, figsize=a4_dims)
sns.set_context('poster', font_scale=0.7,  rc={"lines.linewidth": 1})
query = "closest_ims == 205 and date >= '2020-11-30' and date <= '2021-01-04'"
sns.lineplot(data=mydf.query(query), x='date', y='nrides', style='name', hue='region_name', markers=True, dashes=False, ax=ax[0])
ax[0].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xticks(rotation=-45);
w = pd.read_csv('data/climate/weather_days.csv')
w['date'] = pd.to_datetime(w['date'])
w2 = w.query(query).drop(columns=['closest_ims', 'wind_ms'])
w2['rain_mm'].clip(upper=15, inplace=True)
w2['rain_morning'].clip(upper=15, inplace=True)
sns.lineplot(ax=ax[1], data=w2.set_index('date'), markers=True, dashes=False)
ax[1].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


In [None]:
w2 = w.drop(columns=['closest_ims', 'wind_ms', 'rain_morning', 'rain_mm', 'temp_deg', 'temp_morning'])
#w2['rain_mm'].clip(upper=15, inplace=True)
sns.lineplot(data=w2.set_index('date'))

In [None]:
w = pd.read_csv('data/climate/weather_days.csv')
w['date'] = pd.to_datetime(w['date'])
w2 = w.query('closest_ims == 205').drop(columns='closest_ims')
sns.lineplot(data=w2.query("date >= @epoch").set_index('date'))
plt.xticks(rotation=-45);
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


#w2



In [None]:
#print(mydf.dtypes)
#print(w.dtypes)
mydf = mydf.merge(w, how='left', left_on=['closest_ims', 'date'], right_on=['closest_ims', 'date'], suffixes=('', "_y"))
#sns.scatterplot(data = mydf.query('segment_id == "5230474"'), y ='nrides', x='rain_7d')
mydf.info()
#help(mydf.merge)

In [None]:
epoch = today - timedelta(days=1)
mydf[['nrides_raw', 'name', 'date']].query("date > @epoch").sort_values('nrides_raw')

In [None]:
names = md['name'].values
ids = md['id'].values
fig, ax = plt.subplots(figsize=(10,80), nrows=len(names), ncols=1)

for vi in range(len(names)):
    segid = ids[vi]
    segname = names[vi]
    mydata = mydf.query("segment_id == @segid").copy()
    if len(mydata) > 0:
        sns.barplot(data=mydata, x='weekday', y='rides', ax=ax[vi]).set_title(segname)

fig.tight_layout(pad=3)