In [13]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import chart_studio.plotly as py
import cufflinks as cf
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
%matplotlib inline
pio.templates.default = "plotly_white"

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected = True)
cf.go_offline()

In [14]:
# Read data
data = pd.read_csv("/Documents/Personnel/Entraînement/poids.csv")

# Column names & type
data.rename({"Moyenne mobile": "ma", "Calories": "cals"}, axis = 1, inplace = True)
data.columns = data.columns.str.lower()

data["date"] = pd.to_datetime(data["date"], format = "%Y-%m-%d")

# Missing values
data.isnull().sum()
data.describe()

# Keep from 2021-08-02 for model
df = data.loc[data["date"] >= "2021-08-02"]
df.reset_index(inplace = True, drop = True)

# Add column for phase change
df["phase_var"] = 0
last_phase = "Maintenance"

for index, row in df.iterrows():
    current_phase = row["phase"]
    if current_phase != last_phase:
        df.loc[index, "phase_var"] = 1
    last_phase = current_phase

In [31]:
# Plot all data
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.date, y=data.poids, 
                        mode='lines', line=dict(color='black', width=2, dash='dashdot')))
fig.update_layout(yaxis_title="Poids")

In [16]:
# Plot current bulk
dates = df.loc[df.phase_var == 1, "date"].tolist()
dates.append(df.iloc[df.shape[0]-1]["date"])

fig = go.Figure()

fig.add_trace(go.Scatter(x=df.date, y=df.poids, 
                        mode='lines', name='Poids', line=dict(color='black', width = 1.5)))

fig.add_trace(go.Scatter(x=df.date, y=df.ma, 
                        mode='lines', name='Moyenne mobile', line=dict(color='firebrick', width = 1.5)))

fig.add_vrect(x0=str(dates[0]), x1=str(dates[1]), 
              annotation_text="Bulk", annotation_position="top left",
              fillcolor="green", opacity=0.1, line_width=0)

fig.add_vrect(x0=str(dates[2]), x1=str(dates[3]), 
              annotation_text="Bulk", annotation_position="top left",
              fillcolor="green", opacity=0.1, line_width=0)

fig.update_layout(yaxis_title="Poids")

In [17]:
def compute_trend(df, n_day, switch_dates = []):
    if len(switch_dates) == 0:
        switch_dates = df["date"][::n_day].to_list()
#         print(switch_dates)
    end_index = df.tail(1).index.values
    
    for i in range(0, len(switch_dates)):
        col_name = str(switch_dates[i])[0:10]
        df[col_name] = 0
        start_index = df.loc[df.date == switch_dates[i]].index.values[0]
        df.loc[df.date >= switch_dates[i], col_name] = range(1, (int(end_index- start_index + 2)))
        
    return df

In [18]:
# Fit model
df = compute_trend(df, n_day = 14)

X = df.loc[:, df.columns[6:]]
X = sm.add_constant(X)
y = df.poids

model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                  poids   R-squared:                       0.939
Model:                            OLS   Adj. R-squared:                  0.934
Method:                 Least Squares   F-statistic:                     190.3
Date:                Sat, 12 Feb 2022   Prob (F-statistic):           1.69e-97
Time:                        09:24:17   Log-Likelihood:                -229.60
No. Observations:                 189   AIC:                             489.2
Df Residuals:                     174   BIC:                             537.8
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        168.6701      0.450    374.610      0.0

In [32]:
# Plot model
df["fit"] = results.predict(X)

fig = go.Figure()

fig.add_trace(go.Scatter(x=df.date, y=df.poids, 
                        mode='markers', name='Poids', line=dict(color='black', width = 1.5)))

fig.add_trace(go.Scatter(x=df.date, y=df.fit, 
                        mode='lines', name='Fit', line=dict(color='firebrick', width = 1.5)))

fig.add_vrect(x0=str(dates[0]), x1=str(dates[1]), 
              annotation_text="Bulk", annotation_position="top left",
              fillcolor="green", opacity=0.1, line_width=0)

fig.add_vrect(x0=str(dates[2]), x1=str(dates[3]), 
              annotation_text="Bulk", annotation_position="top left",
              fillcolor="green", opacity=0.1, line_width=0)

fig.update_layout(yaxis_title="Poids")

In [20]:
# Create dictionnary
fit_dic = {"index": results.params[1:].index.tolist(),
          "betas": results.params[1:].values.tolist(),
          "betas_cum": results.params[1:].values.cumsum().tolist()}


fit_dic = pd.DataFrame.from_dict(fit_dic)
fit_dic["betas_cum7"] = fit_dic.betas_cum * 7
print(fit_dic)

         index     betas  betas_cum  betas_cum7
0   2021-08-02 -0.004784  -0.004784   -0.033485
1   2021-08-19  0.004818   0.000034    0.000239
2   2021-09-03  0.125051   0.125085    0.875597
3   2021-09-17 -0.060079   0.065006    0.455045
4   2021-10-01  0.090740   0.155746    1.090222
5   2021-10-15  0.021362   0.177108    1.239754
6   2021-10-31 -0.232668  -0.055560   -0.388921
7   2021-11-14  0.162183   0.106623    0.746358
8   2021-11-28 -0.161713  -0.055090   -0.385631
9   2021-12-12  0.028545  -0.026545   -0.185817
10  2021-12-26  0.142040   0.115494    0.808461
11  2022-01-09 -0.132063  -0.016568   -0.115978
12  2022-01-23  0.086099   0.069531    0.486714
13  2022-02-06 -0.243425  -0.173895   -1.217263


In [21]:
print(results.rsquared)

0.9387008640520286
