In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from itertools import product
import multiprocessing as mp
import os
import warnings
warnings.filterwarnings("ignore")
import sys
path='C:/Users/annie/temporal/src'  
if path not in sys.path:
    sys.path.append(path)
from model_new import VCM

### simulation settings ###
file = 'V2_cityA_serial_order_dispatch_AA.csv'
# file = 'V2_cityB_serial_order_dispatch_AA.csv'
ycol = 'gmv'
xcols = ['cnt_call','sum_online_time']
scols = ['cnt_call_lag','sum_online_time_lag']
acol = 'is_exp'
main_regcols = ['const'] + xcols
state_regcols = ['const'] + scols

two_sided = False
wild_bootstrap = False
wbi = 1 if wild_bootstrap else 0
tsi = 1 if two_sided else 0
ini = 0
hc = 0.01

### AA test

In [2]:
file = 'V2_S1_serial_order_dispatch_AA.csv' 
df = pd.read_csv('C:/Users/annie/temporal/data/'+file)
df['date'] = df['date']-1
df = df.set_index(['date','time'])

df['const'] = 1
M = len(df.index.get_level_values(1).unique())
N = len(df.index.get_level_values(0).unique())

ti = 1
mt = int(24/ti)
abv = np.tile(np.repeat([-1,1], M//mt), mt//2)
bav = np.tile(np.repeat([1,-1], M//mt), mt//2)
vec = np.hstack([abv, bav])
df[acol] = np.tile(vec, N//2)

xyscols = [ycol] + main_regcols + scols + [acol]
df = df[xyscols]

In [None]:
model = VCM(df, ycol, xcols, acol, scols,
             two_sided=two_sided, 
             wild_bootstrap=wild_bootstrap, 
            center_x=True, scale_x=True,hc=hc)
model.inference(nb=500)

In [None]:
df['fitted_DE'] = model.holder['fitted_DE_1'].values
df['resid_DE'] = model.holder['resid_DE_1'].values
df['fitted_IE1'] = model.holder['fitted_IE1_1'].values
df['resid_IE1'] = model.holder['resid_IE1_1'].values
df['fitted_IE2'] = model.holder['fitted_IE2_1'].values
df['resid_IE2'] = model.holder['resid_IE2_1'].values

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid", palette="pastel", color_codes=True)
fig = plt.figure(figsize=(14, 4))

plt.subplot(1,2,1)
df.groupby('date')['resid_DE'].plot(use_index=False)

plt.subplot(1,2,2)
plt.scatter(df['gmv'],df['fitted_DE'])

plt.show()

In [None]:
[model.holder['pvalue_DE'], model.holder['pvalue_IE']]

###  AB test

In [4]:
file = 'V2_S1_serial_order_dispatch_AB.csv' 
df = pd.read_csv('C:/Users/annie/temporal/data/'+file)
df['date'] = df['date']-1
df = df.set_index(['date','time'])

M = len(df.index.get_level_values(1).unique())
N = len(df.index.get_level_values(0).unique())

df['const'] = 1
xyscols = [ycol] + main_regcols + scols + [acol]
df = df[xyscols]

In [None]:
model = VCM(df, ycol, xcols, acol, scols,
             two_sided=two_sided, 
             wild_bootstrap=wild_bootstrap, 
            center_x=True, scale_x=True,hc=hc)
model.inference(nb=500)

In [None]:
df['fitted_DE'] = model.holder['fitted_DE_1'].values
df['resid_DE'] = model.holder['resid_DE_1'].values
df['fitted_IE1'] = model.holder['fitted_IE1_1'].values
df['resid_IE1'] = model.holder['resid_IE1_1'].values
df['fitted_IE2'] = model.holder['fitted_IE2_1'].values
df['resid_IE2'] = model.holder['resid_IE2_1'].values

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid", palette="pastel", color_codes=True)
fig = plt.figure(figsize=(14, 4))

plt.subplot(1,2,1)
df.groupby('date')['resid_DE'].plot(use_index=False)

plt.subplot(1,2,2)
plt.scatter(df['gmv'],df['fitted_DE'])

plt.show()

In [None]:
[model.holder['pvalue_DE'], model.holder['pvalue_IE']]