In [1]:
from tqdm import tqdm
import sys
import datetime

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import statsmodels.formula.api as smf
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML

In [2]:
data = pd.read_pickle("data/data_work.pkl")

In [3]:
data["weekday"] = data["weekday"].astype('category')

In [9]:
formula_1 = "price_increase ~ loading_factor + days_since + route"
formula_2 = "price_increase ~ loading_factor + days_since * loading_factor + route"
formula_3 = "price_increase ~ loading_factor + days_since * loading_factor + route + weekday"
formula_4 = "price_increase ~ loading_factor + days_since * loading_factor + route + weekday + peak_hour"
formula_5 = "price_increase ~ loading_factor + days_since * loading_factor + route + weekday + peak_hour + sales_prev_day"

formulas = [formula_1,formula_2,formula_3,formula_4,formula_5]

In [21]:
def logit_estimate(formulas, data, days=30):
    output_dict = {}
    for formula in formulas:
        model = smf.logit(
            formula, data=data.loc[data["days_till_dep"].isin(range(0, days))]
        )
        output_dict[formula] = model.fit(
            cov_type="cluster",
            cov_kwds={"groups": data.loc[model.data.row_labels, "train_id"]},
        )

    return output_dict

## 30 days

In [22]:
out = logit_estimate(formulas, data, days=30)

Optimization terminated successfully.
         Current function value: 0.336835
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.336703
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.336211
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.334603
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.332756
         Iterations 7


In [33]:
stargazer = Stargazer(list(out.values()))
stargazer.rename_covariates({"Intercept": "Constant"})
output_1 = HTML(stargazer.render_html())

html = output_1.data
with open("logit_output_30days.html", "w") as f:
    f.write(html)
HTML(stargazer.render_html())

0,1,2,3,4,5
,,,,,
,Dependent variable:price_increase,Dependent variable:price_increase,Dependent variable:price_increase,Dependent variable:price_increase,Dependent variable:price_increase
,,,,,
,(1),(2),(3),(4),(5)
,,,,,
Constant,-4.167***,-3.811***,-3.718***,-3.620***,-3.889***
,(0.098),(0.180),(0.189),(0.188),(0.190)
days_since,0.037***,0.020**,0.019**,0.018**,0.012
,(0.004),(0.008),(0.008),(0.008),(0.008)
days_since:loading_factor,,0.044**,0.047**,0.049**,0.048**


## last 5 days

In [34]:
data.query("days_till_dep<5")["sales_prev_day"].value_counts()

True     4754
False     448
Name: sales_prev_day, dtype: int64

In [35]:
out = logit_estimate(formulas, data, days=5)

Optimization terminated successfully.
         Current function value: 0.545135
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.544982
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.541076
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.539536
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.538468
         Iterations 6


In [36]:
stargazer = Stargazer(list(out.values()))
stargazer.rename_covariates({"Intercept": "Constant"})
output_1 = HTML(stargazer.render_html())

html = output_1.data
with open("logit_output_30days.html", "w") as f:
    f.write(html)
HTML(stargazer.render_html())

0,1,2,3,4,5
,,,,,
,Dependent variable:price_increase,Dependent variable:price_increase,Dependent variable:price_increase,Dependent variable:price_increase,Dependent variable:price_increase
,,,,,
,(1),(2),(3),(4),(5)
,,,,,
Constant,-4.910***,-8.271***,-8.430***,-8.478***,-8.736***
,(0.802),(3.041),(3.049),(3.058),(3.065)
days_since,0.101***,0.221**,0.223**,0.226**,0.220**
,(0.030),(0.108),(0.109),(0.109),(0.109)
days_since:loading_factor,,-0.199,-0.202,-0.211,-0.204
