In [1]:
import pandas as pd
import preprocess as pp
import datetime
import numpy as np
import eda
import sklearn
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [2]:
features_jan_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_jan_2015.npy', features=True)

targets_jan_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_jan_2015.npy', targets=True)

In [None]:
new_features_jan_2015 = features_jan_2015.loc[:, 'datetime':'z1']
z_list = ['z2', 'z3', 'z4', 'z5', 'z6', 'z7', 'z8', 'z9', 'z10', 'z11', 'z12']
for z in z_list:
    new_features_jan_2015[z] = features_jan_2015[z]


features_hour = new_features_jan_2015.loc[(new_features_jan_2015['datetime'].dt.time >= datetime.time(9,35))
                                          & (new_features_jan_2015['datetime'].dt.time < datetime.time(10,40))]
                                          #& (new_features_jan_2015['datetime'].dt.date >= datetime.date(2015,3,2))
                                          #& (new_features_jan_2015['datetime'].dt.date < datetime.date(2015,3,28))]

targets_hour = targets_jan_2015.loc[(targets_jan_2015['datetime'].dt.time >= datetime.time(9,35))
                                    & (targets_jan_2015['datetime'].dt.time < datetime.time(10,40))]
                                    #& (targets_jan_2015['datetime'].dt.date >= datetime.date(2015,3,2))
                                    #& (targets_jan_2015['datetime'].dt.date < datetime.date(2015,3,28))]

In [None]:
def ols_results_wald(feature_df, target_df, x=None, z=None, y='y2'):
    
    q1 = feature_df[z].quantile(.25)
    q2 = feature_df[z].quantile(.5)
    q3 = feature_df[z].quantile(.75)
    
    features_q1 = feature_df[feature_df[z] < q1]
    features_q2 = feature_df[(feature_df[z] >= q1) & (feature_df[z] < q2)]
    features_q3 = feature_df[(feature_df[z] >= q2) & (feature_df[z] < q3)]
    features_q4 = feature_df[feature_df[z] >= q3]

    targets_q1 = target_df[feature_df[z] < q1]
    targets_q2 = target_df[(feature_df[z] >= q1) & (feature_df[z] < q2)]
    targets_q3 = target_df[(feature_df[z] >= q2) & (feature_df[z] < q3)]
    targets_q4 = target_df[feature_df[z] >= q3]
    
    y_q1 = np.array(targets_q1[y]).reshape(-1,1)
    y_q2 = np.array(targets_q2[y]).reshape(-1,1)
    y_q3 = np.array(targets_q3[y]).reshape(-1,1)
    y_q4 = np.array(targets_q4[y]).reshape(-1,1)
    
    zeros_q1 = np.zeros(np.array(features_q1[x]).reshape(-1,1).shape)
    zeros_q2 = np.zeros(np.array(features_q2[x]).reshape(-1,1).shape)
    zeros_q3 = np.zeros(np.array(features_q3[x]).reshape(-1,1).shape)
    zeros_q4 = np.zeros(np.array(features_q4[x]).reshape(-1,1).shape)
    
    x_ = np.array(feature_df[x]).reshape(-1, 1)

    x_q2 = np.array(features_q2[x]).reshape(-1, 1)
    x_q3 = np.array(features_q3[x]).reshape(-1, 1)
    x_q4 = np.array(features_q4[x]).reshape(-1, 1)

    x_q2 = np.concatenate((zeros_q1, x_q2, zeros_q3, zeros_q4), axis=0)
    x_q3 = np.concatenate((zeros_q1, zeros_q2, x_q3, zeros_q4), axis=0)
    x_q4 = np.concatenate((zeros_q1, zeros_q2, zeros_q3, x_q4), axis=0)

    z_q2 = np.array(features_q2[z]).reshape(-1, 1)
    z_q3 = np.array(features_q3[z]).reshape(-1, 1)
    z_q4 = np.array(features_q4[z]).reshape(-1, 1)

    z_q2 = np.concatenate((zeros_q1, z_q2, zeros_q3, zeros_q4), axis=0)
    z_q3 = np.concatenate((zeros_q1, zeros_q2, z_q3, zeros_q4), axis=0)
    z_q4 = np.concatenate((zeros_q1, zeros_q2, zeros_q3, z_q4), axis=0)

    x_z_q2 = x_q2*z_q2
    x_z_q3 = x_q3*z_q3
    x_z_q4 = x_q4*z_q4

    bias = np.ones(x_z_q2.shape)
    
    x__ = np.concatenate((bias, x_, x_z_q2, x_z_q3, x_z_q4), axis=1)
    y_ = np.concatenate((y_q1, y_q2, y_q3, y_q4), axis=0)

     # reg = LinearRegression()
    # reg.fit(x__, y_)
    # coef = reg.coef_
    
    #reg = LinearRegression()
    #reg.fit(x, y)
    #coef = reg.coef
    results = sm.OLS(y_, x__).fit()
    x_vars = results.summary2().tables[1].index
    
    
    all_zero = [x + '= 0' for x in x_vars[2:]]
    a = results.wald_test(','.join(all_zero[2:]))
    f0 = a.fvalue
    p0 = a.pvalue
    
   
    wald_str = ' = '.join(list(x_vars[2:]))
    equal_coeffs = results.wald_test(wald_str)
    f_equal = equal_coeffs.fvalue
    p_equal = equal_coeffs.pvalue
    
    
    return f0, p0, f_equal, p_equal

In [None]:
f0, p0, f_equal, p_equal = ols_results_wald(features_jan_2015, targets_jan_2015, x='x1', z='z1', y='y2')
p_equal

In [None]:
z_list = new_features_jan_2015.loc[:,'z1':'z12'].columns
x_list = features_hour.loc[:,'x1':'x34'].columns

f_list = []
p_list = []
for z in z_list:
    flist = []
    plist = []
    for feat in x_list:
        f0, p0, f_equal, p_equal = ols_results_wald(features_hour, targets_hour, x=feat, z=z)
        flist.append(f0)
        plist.append(p0)

    f_list.append(flist)
    p_list.append(plist)
    
f_dict = {'z1':f_list[0], 'z2':f_list[1], 'z3':f_list[2], 'z4':f_list[3],
          'z5':f_list[4], 'z6':f_list[5], 'z7':f_list[6], 'z8':f_list[7],
          'z9':f_list[8], 'z10':f_list[9], 'z11':f_list[10], 'z12':f_list[11]}

f_df = pd.DataFrame.from_dict(f_dict, orient='index',
                       columns=x_list)

p_dict = {'z1':p_list[0], 'z2':p_list[1], 'z3':p_list[2], 'z4':p_list[3],
          'z5':p_list[4], 'z6':p_list[5], 'z7':p_list[6], 'z8':p_list[7],
          'z9':p_list[8], 'z10':p_list[9], 'z11':p_list[10], 'z12':p_list[11]}

p_df = pd.DataFrame.from_dict(p_dict, orient='index',
                       columns=x_list)


In [None]:
p_list = []
for col in p_df.columns:
    for idx in p_df.index:
        val = p_df.loc[idx, col]
        p_list.append(((col, idx), val))

In [None]:
ranked_p = eda.sort_scores1(p_list)

In [None]:
ranked_p

In [None]:
# change index to global rank z's
rank_dict = {}
for i in range(len(ranked_p)):
    if ranked_p[i][0][1] in rank_dict:
        rank_dict[ranked_p[i][0][1]] += i+1
    else: 
        rank_dict[ranked_p[i][0][1]] = i+1

In [None]:
rank_dict

In [2]:
import waldtest

In [11]:
# all of january
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 1, 9, 30)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_0 = waldtest.wald_0(new_features_jan, features_hour_jan, targets_hour_jan)
ranked_jan0 = jan_0[3]
print(ranked_jan0)

{'z8': 6536, 'z9': 7034, 'z4': 6471, 'z10': 6710, 'z5': 6803, 'z1': 7104, 'z6': 7149, 'z11': 6627, 'z3': 6726, 'z7': 6748, 'z12': 7020, 'z2': 8508}


In [15]:
ranked_jan0

{'z8': 6536,
 'z9': 7034,
 'z4': 6471,
 'z10': 6710,
 'z5': 6803,
 'z1': 7104,
 'z6': 7149,
 'z11': 6627,
 'z3': 6726,
 'z7': 6748,
 'z12': 7020,
 'z2': 8508}

In [None]:
ranked_jan_equal

In [6]:
# jan wk 1
# wald test for coeffs = 0 
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 5, 9, 5)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_pvals = waldtest.wald_0(new_features_jan, features_hour_jan, targets_hour_jan)

In [9]:
ranked_jan1_x = jan_pvals[2]
ranked_jan1_x

{'x8': 349,
 'x7': 730,
 'x6': 861,
 'x23': 255,
 'x15': 672,
 'x19': 836,
 'x20': 1036,
 'x14': 887,
 'x5': 1993,
 'x34': 2247,
 'x3': 2549,
 'x17': 1866,
 'x25': 2421,
 'x16': 2036,
 'x26': 2275,
 'x33': 3056,
 'x11': 3046,
 'x18': 2748,
 'x31': 3519,
 'x2': 2969,
 'x1': 3099,
 'x32': 3210,
 'x10': 3135,
 'x13': 3183,
 'x22': 3242,
 'x21': 3642,
 'x4': 3355,
 'x12': 2642,
 'x28': 2938,
 'x27': 3757,
 'x9': 3441,
 'x29': 4064,
 'x24': 3626,
 'x30': 3751}

In [10]:
ranked_jan1_z = jan_pvals[3]
ranked_jan1_z

{'z8': 6304,
 'z5': 6456,
 'z9': 6933,
 'z10': 7132,
 'z7': 8084,
 'z3': 6068,
 'z6': 8378,
 'z11': 5639,
 'z1': 7072,
 'z4': 6565,
 'z12': 7100,
 'z2': 7705}

In [None]:
# jan wk 1 new way double check results same
# wald test for coeffs = 0 

f0, p0, f_equal, p_equal = ols_results_wald(new_features_jan_2015, features_hour, targets_hour)
ranked_jan1 = jan_pvals[2]
ranked_jan1

In [12]:
# jan wk 2
# wald test for coeffs = 0 
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 12, 9, 5)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_pvals = waldtest.wald_0(new_features_jan, features_hour_jan, targets_hour_jan)
ranked_jan2 = jan_pvals[3]
ranked_jan2

{'z8': 5954,
 'z9': 6766,
 'z4': 6311,
 'z10': 7365,
 'z6': 6285,
 'z3': 6367,
 'z7': 5345,
 'z1': 7216,
 'z11': 8112,
 'z5': 6767,
 'z12': 8544,
 'z2': 8404}

In [13]:
# jan wk 3
# wald test for coeffs = 0 
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 19, 9, 5)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_pvals = waldtest.wald_0(new_features_jan, features_hour_jan, targets_hour_jan)
ranked_jan3 = jan_pvals[3]
ranked_jan3

{'z8': 6853,
 'z9': 6816,
 'z5': 6826,
 'z4': 4264,
 'z11': 4747,
 'z10': 7771,
 'z7': 8035,
 'z1': 7860,
 'z6': 7564,
 'z12': 7205,
 'z3': 6999,
 'z2': 8496}

In [14]:
# jan wk 4
# wald test for coeffs = 0 
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 26, 9, 5)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_pvals = waldtest.wald_0(new_features_jan, features_hour_jan, targets_hour_jan)
ranked_jan4 = jan_pvals[3]
ranked_jan4

{'z8': 5886,
 'z4': 6661,
 'z9': 6985,
 'z5': 6612,
 'z10': 5907,
 'z3': 6608,
 'z6': 7333,
 'z11': 5966,
 'z7': 6009,
 'z1': 7718,
 'z12': 8901,
 'z2': 8850}

In [None]:
# jan wk 1
# wald test for equal coeffs
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 5, 9, 5)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_pvals = waldtest.wald_equal(new_features_jan, features_hour_jan, targets_hour_jan)
ranked_jan1 = jan_pvals[1]
ranked_jan1

In [None]:
# jan wk 2
# wald test for equal coeffs
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 12, 9, 5)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_pvals = waldtest.wald_equal(new_features_jan, features_hour_jan, targets_hour_jan)
ranked_jan2 = jan_pvals[2]
ranked_jan2

In [None]:
# jan wk 3
# wald test for equal coeffs
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 19, 9, 5)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_pvals = waldtest.wald_equal(new_features_jan, features_hour_jan, targets_hour_jan)
ranked_jan3 = jan_pvals[2]
ranked_jan3

In [None]:
# jan wk 4
# wald test for equal coeffs
jan = waldtest.hours(features_jan_2015, targets_jan_2015, 2015, 1, 26, 9, 5)
new_features_jan = jan[0]
features_hour_jan = jan[1]
targets_hour_jan = jan[2]
jan_pvals = waldtest.wald_equal(new_features_jan, features_hour_jan, targets_hour_jan)
ranked_jan4 = jan_pvals[2]
ranked_jan4

# February

In [16]:
features_feb_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_feb_2015.npy', features=True)

targets_feb_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_feb_2015.npy', targets=True)

In [18]:
# all of february
# wald test coeffs = 0 
# x's and z's ranked globally
feb = waldtest.hours(features_feb_2015, targets_feb_2015, 2015, 2, 2, 9, 26)
new_features_feb = feb[0]
features_hour_feb = feb[1]
targets_hour_feb = feb[2]
feb_0 = waldtest.wald_0(new_features_feb, features_hour_feb, targets_hour_feb)

In [19]:
ranked_feb0_x = feb_0[2]
ranked_feb0_x

{'x8': 231,
 'x7': 289,
 'x6': 324,
 'x5': 632,
 'x3': 1839,
 'x2': 1150,
 'x33': 1030,
 'x34': 1710,
 'x1': 1283,
 'x23': 1657,
 'x32': 1174,
 'x10': 1392,
 'x13': 1441,
 'x11': 1627,
 'x27': 1568,
 'x24': 2110,
 'x15': 2808,
 'x12': 3389,
 'x14': 3372,
 'x29': 3236,
 'x18': 3204,
 'x22': 3115,
 'x4': 2761,
 'x26': 3488,
 'x19': 3330,
 'x21': 3416,
 'x16': 3987,
 'x17': 4018,
 'x25': 3400,
 'x20': 3743,
 'x30': 3944,
 'x28': 4134,
 'x31': 4331,
 'x9': 4303}

In [20]:
ranked_feb0_z = feb_0[3]
ranked_feb0_z

{'z8': 6349,
 'z4': 6091,
 'z9': 7384,
 'z5': 6751,
 'z10': 7438,
 'z3': 6138,
 'z6': 7133,
 'z7': 7241,
 'z1': 6675,
 'z11': 6612,
 'z12': 8026,
 'z2': 7598}

# March

In [21]:
features_mar_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_mar_2015.npy', features=True)

targets_mar_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_mar_2015.npy', targets=True)

In [27]:
# all of march
# wald test coeffs = 0 
# x's and z's ranked globally
mar = waldtest.hours(features_mar_2015, targets_mar_2015, 2015, 3, 2, 9, 28)
new_features_mar = mar[0]
features_hour_mar = mar[1]
targets_hour_mar = mar[2]
mar_0 = waldtest.wald_0(new_features_mar, features_hour_mar, targets_hour_mar)

In [28]:
ranked_mar0_x = mar_0[2]
ranked_mar0_x

{'x8': 267,
 'x7': 327,
 'x6': 373,
 'x5': 863,
 'x3': 1285,
 'x2': 1104,
 'x33': 992,
 'x1': 1269,
 'x32': 1176,
 'x10': 1384,
 'x13': 1401,
 'x34': 1479,
 'x11': 1376,
 'x27': 1714,
 'x23': 1893,
 'x25': 3455,
 'x26': 3542,
 'x20': 2724,
 'x19': 3156,
 'x15': 2555,
 'x24': 2793,
 'x18': 3202,
 'x14': 3119,
 'x4': 2856,
 'x17': 3794,
 'x16': 3675,
 'x12': 3060,
 'x21': 3654,
 'x28': 4105,
 'x9': 4143,
 'x29': 4016,
 'x30': 4245,
 'x22': 3907,
 'x31': 4532}

In [29]:
ranked_mar0_z = mar_0[3]
ranked_mar0_z

{'z8': 6106,
 'z4': 6569,
 'z5': 5973,
 'z10': 7153,
 'z9': 7556,
 'z3': 6659,
 'z7': 6800,
 'z6': 6914,
 'z12': 7186,
 'z1': 7927,
 'z11': 6043,
 'z2': 8550}

# April

In [25]:
features_apr_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_apr_2015.npy', features=True)

targets_apr_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_apr_2015.npy', targets=True)

In [31]:
# all of april
# wald test coeffs = 0 
# x's and z's ranked globally
apr = waldtest.hours(features_apr_2015, targets_apr_2015, 2015, 4, 1, 9, 29)
new_features_apr = apr[0]
features_hour_apr = apr[1]
targets_hour_apr = apr[2]
apr_0 = waldtest.wald_0(new_features_apr, features_hour_apr, targets_hour_apr)

In [32]:
ranked_apr0_x = apr_0[2]
ranked_apr0_x

{'x8': 291,
 'x7': 429,
 'x6': 466,
 'x2': 892,
 'x1': 948,
 'x10': 1013,
 'x13': 1045,
 'x24': 1474,
 'x33': 1439,
 'x11': 1273,
 'x32': 1535,
 'x27': 2004,
 'x3': 1198,
 'x5': 1291,
 'x34': 1708,
 'x21': 2454,
 'x23': 2713,
 'x20': 2434,
 'x22': 2828,
 'x15': 2811,
 'x18': 2963,
 'x4': 2905,
 'x19': 3020,
 'x16': 3699,
 'x14': 3903,
 'x17': 3713,
 'x9': 3756,
 'x25': 3684,
 'x12': 3950,
 'x30': 4192,
 'x26': 4104,
 'x28': 4362,
 'x31': 4430,
 'x29': 4509}

In [33]:
ranked_apr0_z = apr_0[3]
ranked_apr0_z

{'z8': 6214,
 'z9': 6820,
 'z5': 6112,
 'z10': 6870,
 'z4': 7107,
 'z3': 6992,
 'z6': 6449,
 'z11': 7400,
 'z7': 5959,
 'z1': 7274,
 'z12': 8116,
 'z2': 8123}

# May

In [34]:
features_may_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_may_2015.npy', features=True)

targets_may_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_may_2015.npy', targets=True)

In [35]:
# all of may
# wald test coeffs = 0 
# x's and z's ranked globally
may = waldtest.hours(features_may_2015, targets_may_2015, 2015, 5, 4, 9, 25)
new_features_may = may[0]
features_hour_may = may[1]
targets_hour_may = may[2]
may_0 = waldtest.wald_0(new_features_may, features_hour_may, targets_hour_may)

In [36]:
ranked_may0_x = may_0[2]
ranked_may0_x

{'x8': 206,
 'x7': 351,
 'x6': 391,
 'x5': 937,
 'x3': 1239,
 'x33': 1150,
 'x32': 1286,
 'x2': 1158,
 'x23': 1296,
 'x34': 1401,
 'x1': 1240,
 'x10': 1289,
 'x13': 1355,
 'x27': 1814,
 'x11': 1548,
 'x24': 2353,
 'x15': 2513,
 'x14': 2957,
 'x17': 2641,
 'x19': 3097,
 'x20': 3131,
 'x4': 2940,
 'x18': 3664,
 'x16': 3093,
 'x31': 3673,
 'x29': 3717,
 'x21': 3827,
 'x26': 3889,
 'x22': 4038,
 'x12': 4037,
 'x25': 3733,
 'x28': 4400,
 'x9': 4486,
 'x30': 4586}

In [37]:
ranked_may0_z = may_0[3]
ranked_may0_z

{'z9': 6720,
 'z8': 6341,
 'z4': 7071,
 'z5': 6229,
 'z10': 6639,
 'z3': 7298,
 'z7': 6156,
 'z6': 6404,
 'z1': 7605,
 'z12': 8001,
 'z11': 7140,
 'z2': 7832}

# June

In [38]:
features_jun_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_jun_2015.npy', features=True)

targets_jun_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_jun_2015.npy', targets=True)

In [39]:
# all of june
# wald test coeffs = 0 
# x's and z's ranked globally
jun = waldtest.hours(features_jun_2015, targets_jun_2015, 2015, 6, 1, 9, 26)
new_features_jun = jun[0]
features_hour_jun = jun[1]
targets_hour_jun = jun[2]
jun_0 = waldtest.wald_0(new_features_jun, features_hour_jun, targets_hour_jun)

In [40]:
ranked_jun0_x = jun_0[2]
ranked_jun0_x

{'x8': 163,
 'x7': 325,
 'x6': 394,
 'x5': 909,
 'x3': 825,
 'x2': 1499,
 'x34': 1063,
 'x1': 1851,
 'x33': 1400,
 'x10': 1997,
 'x13': 2060,
 'x32': 1717,
 'x11': 2063,
 'x23': 1337,
 'x19': 1131,
 'x20': 1905,
 'x27': 2806,
 'x24': 2720,
 'x18': 2488,
 'x14': 2455,
 'x4': 2447,
 'x15': 2732,
 'x22': 3094,
 'x25': 3235,
 'x17': 3955,
 'x21': 3515,
 'x26': 3684,
 'x12': 4079,
 'x9': 3973,
 'x16': 4009,
 'x31': 4082,
 'x30': 4320,
 'x28': 4639,
 'x29': 4564}

In [41]:
ranked_jun0_z = jun_0[3]
ranked_jun0_z

{'z8': 5849,
 'z9': 6132,
 'z5': 6390,
 'z3': 6356,
 'z4': 6529,
 'z10': 7400,
 'z7': 6041,
 'z6': 6938,
 'z11': 7111,
 'z1': 7831,
 'z12': 8244,
 'z2': 8615}

# July

In [3]:
features_jul_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_jul_2015.npy', features=True)

targets_jul_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_jul_2015.npy', targets=True)

In [4]:
# all of july
# wald test coeffs = 0 
# x's and z's ranked globally
jul = waldtest.hours(features_jul_2015, targets_jul_2015, 2015, 7, 1, 9, 30)
new_features_jul = jul[0]
features_hour_jul = jul[1]
targets_hour_jul = jul[2]
jul_0 = waldtest.wald_0(new_features_jul, features_hour_jul, targets_hour_jul)

In [5]:
ranked_jul0_x = jul_0[2]
ranked_jul0_x

{'x8': 142,
 'x7': 332,
 'x6': 386,
 'x5': 723,
 'x3': 673,
 'x34': 1056,
 'x2': 1070,
 'x1': 1259,
 'x23': 1539,
 'x10': 1374,
 'x13': 1361,
 'x33': 1405,
 'x11': 1670,
 'x32': 1624,
 'x24': 2194,
 'x22': 2899,
 'x12': 3025,
 'x27': 2339,
 'x21': 2867,
 'x17': 3320,
 'x20': 2972,
 'x15': 3115,
 'x25': 3391,
 'x4': 3225,
 'x9': 3265,
 'x26': 3749,
 'x14': 3896,
 'x19': 3612,
 'x16': 3518,
 'x18': 3832,
 'x30': 4377,
 'x29': 4286,
 'x31': 4471,
 'x28': 4469}

In [6]:
ranked_jul0_z = jul_0[3]
ranked_jul0_z

{'z9': 6941,
 'z8': 5885,
 'z10': 6663,
 'z5': 6600,
 'z4': 6894,
 'z3': 6604,
 'z7': 6950,
 'z1': 6971,
 'z11': 5941,
 'z6': 7299,
 'z12': 8597,
 'z2': 8091}

# August

In [7]:
features_aug_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_aug_2015.npy', features=True)

targets_aug_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_aug_2015.npy', targets=True)

In [8]:
# all of august
# wald test coeffs = 0 
# x's and z's ranked globally
aug = waldtest.hours(features_aug_2015, targets_aug_2015, 2015, 8, 3, 9, 25)
new_features_aug = aug[0]
features_hour_aug = aug[1]
targets_hour_aug = aug[2]
aug_0 = waldtest.wald_0(new_features_aug, features_hour_aug, targets_hour_aug)

In [9]:
ranked_aug0_x = aug_0[2]
ranked_aug0_x

{'x8': 375,
 'x7': 488,
 'x6': 536,
 'x34': 1102,
 'x33': 1177,
 'x5': 1090,
 'x32': 1386,
 'x2': 1001,
 'x3': 888,
 'x1': 1214,
 'x10': 1349,
 'x23': 1274,
 'x13': 1378,
 'x11': 1634,
 'x24': 2231,
 'x27': 1977,
 'x15': 2433,
 'x25': 2875,
 'x26': 3187,
 'x20': 2805,
 'x14': 3404,
 'x17': 3160,
 'x19': 3224,
 'x18': 3668,
 'x16': 3512,
 'x9': 3479,
 'x12': 3704,
 'x21': 3795,
 'x22': 4023,
 'x4': 3691,
 'x31': 3885,
 'x30': 4476,
 'x29': 4420,
 'x28': 4595}

In [10]:
ranked_aug0_z = aug_0[3]
ranked_aug0_z

{'z9': 6824,
 'z8': 6406,
 'z3': 6351,
 'z4': 6559,
 'z5': 6110,
 'z10': 6933,
 'z7': 5552,
 'z11': 6736,
 'z1': 7439,
 'z6': 6516,
 'z12': 8616,
 'z2': 9394}

# September

In [11]:
features_sep_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_sep_2015.npy', features=True)

targets_sep_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_sep_2015.npy', targets=True)

In [12]:
# all of september
# wald test coeffs = 0 
# x's and z's ranked globally
sep = waldtest.hours(features_sep_2015, targets_sep_2015, 2015, 9, 1, 9, 25)
new_features_sep = sep[0]
features_hour_sep = sep[1]
targets_hour_sep = sep[2]
sep_0 = waldtest.wald_0(new_features_sep, features_hour_sep, targets_hour_sep)

In [13]:
ranked_sep0_x = sep_0[2]
ranked_sep0_x

{'x8': 177,
 'x7': 317,
 'x6': 369,
 'x34': 947,
 'x5': 818,
 'x3': 689,
 'x33': 1161,
 'x2': 978,
 'x32': 1480,
 'x1': 1289,
 'x10': 1443,
 'x13': 1471,
 'x27': 2133,
 'x24': 2101,
 'x11': 1823,
 'x23': 2144,
 'x12': 2808,
 'x15': 2428,
 'x20': 2591,
 'x19': 2928,
 'x25': 3829,
 'x14': 2946,
 'x16': 3425,
 'x17': 3169,
 'x4': 3392,
 'x18': 3794,
 'x21': 3657,
 'x26': 3919,
 'x22': 3508,
 'x31': 4087,
 'x28': 4228,
 'x9': 4509,
 'x30': 4467,
 'x29': 4411}

In [14]:
ranked_sep0_z = sep_0[3]
ranked_sep0_z

{'z8': 5926,
 'z9': 6405,
 'z7': 6305,
 'z10': 6833,
 'z5': 6295,
 'z4': 7309,
 'z6': 6730,
 'z11': 6591,
 'z1': 7714,
 'z3': 7506,
 'z12': 7546,
 'z2': 8276}

# October

In [15]:
features_oct_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_oct_2015.npy', features=True)

targets_oct_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_oct_2015.npy', targets=True)

In [16]:
# all of october
# wald test coeffs = 0 
# x's and z's ranked globally
october = waldtest.hours(features_oct_2015, targets_oct_2015, 2015, 10, 5, 9, 25)
new_features_oct = october[0]
features_hour_oct = october[1]
targets_hour_oct = october[2]
oct_0 = waldtest.wald_0(new_features_oct, features_hour_oct, targets_hour_oct)

In [17]:
ranked_oct0_x = oct_0[2]
ranked_oct0_x

{'x8': 267,
 'x7': 421,
 'x6': 444,
 'x32': 1290,
 'x33': 1220,
 'x5': 1125,
 'x34': 1481,
 'x2': 1063,
 'x1': 1171,
 'x10': 1272,
 'x24': 1803,
 'x13': 1335,
 'x3': 1068,
 'x23': 2380,
 'x27': 1943,
 'x22': 2209,
 'x20': 3508,
 'x19': 3077,
 'x11': 2229,
 'x21': 2364,
 'x18': 3110,
 'x25': 2547,
 'x16': 3706,
 'x14': 3048,
 'x26': 3227,
 'x4': 3993,
 'x17': 4035,
 'x28': 3669,
 'x31': 3852,
 'x12': 3789,
 'x15': 3969,
 'x30': 4020,
 'x29': 4384,
 'x9': 4417}

In [18]:
ranked_oct0_z = oct_0[3]
ranked_oct0_z

{'z9': 7046,
 'z8': 6612,
 'z4': 6173,
 'z10': 6813,
 'z3': 5953,
 'z5': 6563,
 'z1': 7355,
 'z7': 6662,
 'z6': 6709,
 'z11': 7014,
 'z12': 8027,
 'z2': 8509}

# November

In [19]:
features_nov_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_nov_2015.npy', features=True)

targets_nov_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_nov_2015.npy', targets=True)

In [21]:
# all of november
# wald test coeffs = 0 
# x's and z's ranked globally
nov = waldtest.hours(features_nov_2015, targets_nov_2015, 2015, 11, 2, 9, 25)
new_features_nov = nov[0]
features_hour_nov = nov[1]
targets_hour_nov = nov[2]
nov_0 = waldtest.wald_0(new_features_nov, features_hour_nov, targets_hour_nov)

In [22]:
ranked_nov0_x = nov_0[2]
ranked_nov0_x

{'x8': 222,
 'x7': 446,
 'x6': 514,
 'x5': 1155,
 'x23': 1073,
 'x34': 937,
 'x33': 1437,
 'x3': 1173,
 'x24': 2125,
 'x32': 1923,
 'x2': 1733,
 'x1': 2241,
 'x16': 2769,
 'x10': 2480,
 'x27': 2906,
 'x20': 1998,
 'x14': 3298,
 'x13': 2628,
 'x22': 2285,
 'x19': 1626,
 'x21': 1908,
 'x18': 2334,
 'x11': 3536,
 'x12': 2619,
 'x26': 3429,
 'x15': 3104,
 'x25': 3749,
 'x29': 3808,
 'x17': 3175,
 'x9': 3764,
 'x28': 4019,
 'x4': 4126,
 'x31': 4429,
 'x30': 4467}

In [23]:
ranked_nov0_z = nov_0[3]
ranked_nov0_z

{'z8': 5593,
 'z9': 7092,
 'z5': 6279,
 'z10': 6239,
 'z4': 7131,
 'z3': 6327,
 'z6': 7029,
 'z1': 6671,
 'z7': 6630,
 'z12': 9044,
 'z11': 5703,
 'z2': 9698}

# December

In [24]:
features_dec_2015 = pp.read_npy1('/u/project/cratsch/tescala/features_dec_2015.npy', features=True)

targets_dec_2015 = pp.read_npy1('/u/project/cratsch/tescala/targets_dec_2015.npy', targets=True)

In [25]:
# all of december
# wald test coeffs = 0 
# x's and z's ranked globally
dec = waldtest.hours(features_dec_2015, targets_dec_2015, 2015, 12, 1, 9, 24)
new_features_dec = dec[0]
features_hour_dec = dec[1]
targets_hour_dec = dec[2]
dec_0 = waldtest.wald_0(new_features_dec, features_hour_dec, targets_hour_dec)

In [26]:
ranked_dec0_x = dec_0[2]
ranked_dec0_x

{'x8': 146,
 'x7': 387,
 'x6': 464,
 'x3': 995,
 'x34': 1157,
 'x5': 1394,
 'x20': 1265,
 'x18': 1856,
 'x23': 1212,
 'x33': 1931,
 'x2': 1965,
 'x32': 2095,
 'x21': 2390,
 'x1': 2272,
 'x10': 2387,
 'x22': 2383,
 'x13': 2509,
 'x19': 1854,
 'x24': 2310,
 'x17': 2479,
 'x16': 2510,
 'x15': 2813,
 'x12': 2434,
 'x4': 3220,
 'x27': 3487,
 'x11': 3575,
 'x14': 3710,
 'x9': 3882,
 'x29': 3770,
 'x26': 3824,
 'x25': 3871,
 'x30': 4155,
 'x28': 4275,
 'x31': 4459}

In [27]:
ranked_dec0_z = dec_0[3]
ranked_dec0_z

{'z8': 4843,
 'z9': 6442,
 'z10': 6683,
 'z5': 6610,
 'z7': 6008,
 'z6': 6734,
 'z4': 7267,
 'z11': 7557,
 'z1': 8003,
 'z3': 5954,
 'z12': 9956,
 'z2': 7379}