In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from collections import Counter

### Load Data

In [3]:
df = pd.read_csv("15min_dateset.csv")
df.drop('Unnamed: 0', inplace = True, axis = 1)
print(len(df['EXP'].unique()))
df.head()

32


Unnamed: 0,Date,Type,from,EXP,jul_date,hour_angle,max_wind_speed,avg_wind_speed,R1,R2,...,HYBL_one,HYBL_two,HYBL_three,HYBL_four,HYBL_five,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700
0,2017-01-07 06:00:00,ANDE,MESONET,1/7/17,7,-1.570795,2.332608,1.295893,2.630564,2.322274,...,0.62923,0.903721,3.610055,5.01167,6.054984,0.739,-5000.0,40.293,23.757,16.787335
1,2017-01-07 06:15:00,ANDE,MESONET,1/7/17,7,-1.505345,1.555072,0.647947,2.055383,1.813005,...,0.748575,1.493288,3.643217,5.232982,6.394255,1.05,-5000.0,40.293,23.959,16.195689
2,2017-01-07 06:30:00,ANDE,MESONET,1/7/17,7,-1.439895,2.332608,1.360688,1.792587,1.671478,...,1.154196,1.645343,3.346854,5.054551,6.395084,1.211,-5000.0,40.293,24.118,15.636587
3,2017-01-07 06:45:00,ANDE,MESONET,1/7/17,7,-1.374446,1.555072,0.453563,1.442889,1.352497,...,2.183654,2.266223,3.550607,5.363623,6.869857,0.943,-5000.0,40.293,24.22,16.767452
4,2017-01-07 07:00:00,ANDE,MESONET,1/7/17,7,-1.308996,2.526992,1.360688,2.283417,1.677404,...,2.40729,2.633109,3.692489,5.309871,6.783159,0.751,-5000.0,40.293,24.149,17.249375


### Functions Used

In [4]:
def model_info1(x, y, test_date, data_tested):
    cm = confusion_matrix(x,y)
    acc = accuracy_score(x,y)
    cm = confusion_matrix(x,y)
    if len(cm) > 1:
        hit = cm[1][1]
        false_alarms = cm[0][1]
        misses = cm[1][0]
        no_events = cm[0][0]
        csi = hit / (false_alarms + misses + hit)
        hit_rate = hit / (hit + misses)
        false_alarm_rate = false_alarms / (hit + false_alarms)
    else:
        hit = 0
        false_alarms = 0
        misses = 0
        no_events = cm[0][0]
        csi = 0
        hit_rate =0
        false_alarm_rate = 0
    data = [{"Accuracy":acc,"Hits":hit, "False Alarms":false_alarms, 'Misses':misses,'No Events': no_events, 'CSI':csi, 'Hit Rate':hit_rate, 'False Alarm Rate':false_alarm_rate}]
    df = pd.DataFrame(data)
    df['test_date'] = test_date
    df['data_tested'] = data_tested
    if len(Counter(x)) == 1:
        l30 = Counter(x)[0]
        df['greter_than_30_knots'] = '0'
        df['less_than_30_knots'] = l30
    else:
        g30 = Counter(x)[1]
        l30 = Counter(x)[0]
        df['greter_than_30_knots'] = g30
        df['less_than_30_knots'] = l30
    return (df)

def model_info2(x, y,test_date,data_tested):
    mae = mean_absolute_error(x,y)
    mse = mean_squared_error(x,y)
    r2 = r2_score(x,y)
    residual_0 = x - y
    res_0 = np.array(residual_0)
    res_mean = round(res_0.mean(),4)
    data = [{"MAE":mae ,"MSE":mse, 'Residual Mean':res_mean}]
    df = pd.DataFrame(data)
    df['test_date'] = test_date
    df['data_tested'] = data_tested
    return (df)

In [13]:
df2 = df[['Date','Type', 'from', 'EXP',
       'max_wind_speed', 'R1', 'R2',
       'Pressure_reduced_to_MSL_.Pa..0.MSL',
       'Derived_radar_reflectivity_.dB..1.HYBL',
       'u.component_of_wind_.m.s..85000.ISBL',
       'v.component_of_wind_.m.s..85000.ISBL', 'wind_speed_85000',
       'wind_shear_85000', 'Wind_speed_.gust._.m.s..0.SFC', 'HYBL_one',
       'HYBL_two', 'HYBL_three', 'HYBL_four', 'HYBL_five', 'diff_temp',
       'geo_cbl', 'veg_sfc', 'best_4_layer', 'ws_700']]
le = LabelEncoder()
df2['from'] = le.fit_transform(df2['from'])
df2.head()

Unnamed: 0,Date,Type,from,EXP,max_wind_speed,R1,R2,Pressure_reduced_to_MSL_.Pa..0.MSL,Derived_radar_reflectivity_.dB..1.HYBL,u.component_of_wind_.m.s..85000.ISBL,...,HYBL_one,HYBL_two,HYBL_three,HYBL_four,HYBL_five,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700
0,2017-01-07 06:00:00,ANDE,1,1/7/17,2.332608,2.630564,2.322274,102879.7,0.0,5.447,...,0.62923,0.903721,3.610055,5.01167,6.054984,0.739,-5000.0,40.293,23.757,16.787335
1,2017-01-07 06:15:00,ANDE,1,1/7/17,1.555072,2.055383,1.813005,102951.0,0.0,5.629,...,0.748575,1.493288,3.643217,5.232982,6.394255,1.05,-5000.0,40.293,23.959,16.195689
2,2017-01-07 06:30:00,ANDE,1,1/7/17,2.332608,1.792587,1.671478,102890.6,0.0,5.653,...,1.154196,1.645343,3.346854,5.054551,6.395084,1.211,-5000.0,40.293,24.118,15.636587
3,2017-01-07 06:45:00,ANDE,1,1/7/17,1.555072,1.442889,1.352497,102931.0,0.0,5.692,...,2.183654,2.266223,3.550607,5.363623,6.869857,0.943,-5000.0,40.293,24.22,16.767452
4,2017-01-07 07:00:00,ANDE,1,1/7/17,2.526992,2.283417,1.677404,102882.5,0.0,5.226,...,2.40729,2.633109,3.692489,5.309871,6.783159,0.751,-5000.0,40.293,24.149,17.249375


### Grid search for best parameters

In [14]:
#X = df2.drop(['max_wind_speed', 'Type', 'EXP'], 1)
#Y = df2.max_wind_speed.values

In [15]:
#from sklearn.model_selection import GridSearchCV
#pram = {'subsample': [0.6, 0.8, 1.0],
        #'colsample_bytree': [0.6, 0.8, 1.0],
        #'max_depth': [3, 4, 5],
        #'learning_rate': [.01,0.1,0.3],
        #'n_estimators': [100,250,500]}
#clf = GridSearchCV(XGBRegressor(objective= 'reg:squarederror' , random_state=19), pram, cv=3, n_jobs=-1, verbose=2, scoring='neg_mean_absolute_error')
#clf.fit(X,Y)

In [16]:
#clf.best_score_, clf.best_params_

In [17]:
def rf_model(x):

    test_1 = df2[df2.EXP == x]
    train_1 = df2[df2.EXP != x]

    train_1 = train_1[['Date','Type', 'from',
           'max_wind_speed', 'R1', 'R2',
           'Pressure_reduced_to_MSL_.Pa..0.MSL',
           'Derived_radar_reflectivity_.dB..1.HYBL',
           'u.component_of_wind_.m.s..85000.ISBL',
           'v.component_of_wind_.m.s..85000.ISBL', 'wind_speed_85000',
           'wind_shear_85000', 'Wind_speed_.gust._.m.s..0.SFC', 'HYBL_one',
           'HYBL_two', 'HYBL_three', 'HYBL_four', 'HYBL_five', 'diff_temp',
           'geo_cbl', 'veg_sfc', 'best_4_layer', 'ws_700']]


    test_1 = test_1[['Date','Type', 'from',
           'max_wind_speed', 'R1', 'R2',
           'Pressure_reduced_to_MSL_.Pa..0.MSL',
           'Derived_radar_reflectivity_.dB..1.HYBL',
           'u.component_of_wind_.m.s..85000.ISBL',
           'v.component_of_wind_.m.s..85000.ISBL', 'wind_speed_85000',
           'wind_shear_85000', 'Wind_speed_.gust._.m.s..0.SFC', 'HYBL_one',
           'HYBL_two', 'HYBL_three', 'HYBL_four', 'HYBL_five', 'diff_temp',
           'geo_cbl', 'veg_sfc', 'best_4_layer', 'ws_700']]

    x_train = train_1.drop('max_wind_speed', 1)
    x_train.drop(['Date','Type'], axis=1, inplace=True)
    y_train = train_1.max_wind_speed
    x_test = test_1.drop('max_wind_speed', 1)
    x_test_copy = x_test.copy()
    x_test.drop(['Date','Type'], axis=1, inplace=True)
    y_test = test_1.max_wind_speed


    model = XGBRegressor(booster='gbtree', objective='reg:squarederror', colsample_bytree = 0.6,
                        learning_rate = 0.1, max_depth=5, n_estimators=500, subsample = 0.6,
                         random_state=16,n_jobs=-1)
    model.fit(x_train, y_train)

    pred_train = model.predict(x_train)
    train_info = model_info2(y_train, pred_train, x , "train")

    y_train30 = np.where(y_train > 30, 1, 0 )
    pred_train30 = np.where(pred_train > 30, 1, 0 )
    train_info2 = model_info1(y_train30, pred_train30, x,'train')


    pred_test = model.predict(x_test)
    test_info = model_info2(y_test, pred_test, x , "test")

    y_test30 = np.where(y_test > 30, 1, 0 )
    pred_test30 = np.where(pred_test > 30, 1, 0 )
    test_info2 = model_info1(y_test30, pred_test30, x,'test')

    table = train_info.append(test_info)
    table2 = train_info2.append(test_info2)
    
    print(x)
    print(table)
    print("==" *10)
    print(table2)

    df4 = pd.DataFrame(y_test)
    df4 = df4.rename(columns={'max_wind_speed': 'actual'})
    df4 = df4.reset_index()
    df4.drop('index', axis=1, inplace=True)
    x_test_copy = x_test_copy.reset_index()
    x_test_copy.drop("index", axis=1, inplace=True)
    df5 = pd.concat([x_test_copy, df4], axis=1)
    pred_df = pd.DataFrame(pred_test, columns=["pred"])
    df6 = pd.concat([df5, pred_df], axis=1)
    df6['From'] = np.where(df6['from'] == 1, 'Mesonet', 'ASOS')
    df6.drop('from', inplace=True, axis=1)
    df6['test_date'] = x
    return(table, table2, df6)

In [18]:
table = pd.DataFrame()
table_class = pd.DataFrame()
data = pd.DataFrame()
for i in df['EXP'].unique():
    t,tc,d = rf_model(i)
    table = table.append(t)
    table_class = table_class.append(tc)
    data = data.append(d)

1/7/17
        MAE        MSE  Residual Mean test_date data_tested
0  2.467257  10.516900         0.0002    1/7/17       train
0  2.971370  13.926024         1.5958    1/7/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.973504  2189           432    1441      66628  0.538897   0.60303   
0  1.000000     0             0       0       1959  0.000000   0.00000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.164823    1/7/17       train                 3630   
0          0.000000    1/7/17        test                    0   

   less_than_30_knots  
0               67060  
0                1959  
2/9/17
        MAE        MSE  Residual Mean test_date data_tested
0  2.444517  10.423489         0.0007    2/9/17       train
0  3.653666  21.017256        -1.0929    2/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.973707  2118           397    1459      66614  0.5329

1/4/18
        MAE        MSE  Residual Mean test_date data_tested
0  2.470077  10.557065        -0.0051    1/4/18       train
0  4.092643  30.543353         1.7808    1/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.974548  1982           381    1405      66404  0.526008  0.585179   
0  0.913201    29             1     214       2233  0.118852  0.119342   

   False Alarm Rate test_date data_tested  greter_than_30_knots  \
0          0.161236    1/4/18       train                  3387   
0          0.033333    1/4/18        test                   243   

   less_than_30_knots  
0               66785  
0                2234  
3/2/18
        MAE        MSE  Residual Mean test_date data_tested
0  2.427127  10.221432         0.0010    3/2/18       train
0  4.728076  37.583447        -0.8946    3/2/18        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.974337  2049           398    1404      66368  0.5

9/10/18
        MAE        MSE  Residual Mean test_date data_tested
0  2.452463  10.439702         0.0002   9/10/18       train
0  3.561545  22.232356         1.7692   9/10/18        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.973404  2171           418    1454      66344  0.536977  0.598897   
0  0.997790     0             0       5       2257  0.000000  0.000000   

   False Alarm Rate test_date data_tested  greter_than_30_knots  \
0          0.161452   9/10/18       train                  3625   
0               NaN   9/10/18        test                     5   

   less_than_30_knots  
0               66762  
0                2257  
9/26/18
        MAE        MSE  Residual Mean test_date data_tested
0  2.432577  10.252488         0.0042   9/26/18       train
0  3.859869  23.814683         1.9681   9/26/18        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.973491  2171           410    1459      66464  0

In [20]:
#table.to_csv('XGB_stats.csv')
table.head()

Unnamed: 0,MAE,MSE,Residual Mean,test_date,data_tested
0,2.467257,10.5169,0.0002,1/7/17,train
0,2.97137,13.926024,1.5958,1/7/17,test
0,2.444517,10.423489,0.0007,2/9/17,train
0,3.653666,21.017256,-1.0929,2/9/17,test
0,2.417145,10.132724,-0.0034,2/13/17,train


In [22]:
table_class.to_csv('XGB_stats_class.csv')
table_class.head()

Unnamed: 0,Accuracy,Hits,False Alarms,Misses,No Events,CSI,Hit Rate,False Alarm Rate,test_date,data_tested,greter_than_30_knots,less_than_30_knots
0,0.973504,2189,432,1441,66628,0.538897,0.60303,0.164823,1/7/17,train,3630,67060
0,1.0,0,0,0,1959,0.0,0.0,0.0,1/7/17,test,0,1959
0,0.973707,2118,397,1459,66614,0.532964,0.592116,0.157853,2/9/17,train,3577,67011
0,0.965551,23,41,30,1967,0.244681,0.433962,0.640625,2/9/17,test,53,2008
0,0.976391,1746,326,1343,67277,0.511274,0.565231,0.157336,2/13/17,train,3089,67603


### 10 Samples 

In [8]:
def rf_model2(x,sample):

    test_1 = df2[df2.EXP == x]
    train_1 = df2[df2.EXP != x]
    exp = train_1['EXP'].unique()
    smp = np.random.choice(exp,sample,replace=False)
    train2 = train_1[train_1['EXP'].isin(smp)]
    print(len(train2['EXP'].unique()))

    train2 = train2[['Type', 'from',
       'max_wind_speed', 'R1', 'R2',
       'Pressure_reduced_to_MSL_.Pa..0.MSL',
       'Derived_radar_reflectivity_.dB..1.HYBL',
       'u.component_of_wind_.m.s..85000.ISBL',
       'v.component_of_wind_.m.s..85000.ISBL', 'wind_speed_85000',
       'wind_shear_85000', 'Wind_speed_.gust._.m.s..0.SFC', 'HYBL_one',
       'HYBL_two', 'HYBL_three', 'HYBL_four', 'HYBL_five', 'diff_temp',
       'geo_cbl', 'veg_sfc', 'best_4_layer', 'ws_700']]


    test_1 = test_1[['Type', 'from',
       'max_wind_speed', 'R1', 'R2',
       'Pressure_reduced_to_MSL_.Pa..0.MSL',
       'Derived_radar_reflectivity_.dB..1.HYBL',
       'u.component_of_wind_.m.s..85000.ISBL',
       'v.component_of_wind_.m.s..85000.ISBL', 'wind_speed_85000',
       'wind_shear_85000', 'Wind_speed_.gust._.m.s..0.SFC', 'HYBL_one',
       'HYBL_two', 'HYBL_three', 'HYBL_four', 'HYBL_five', 'diff_temp',
       'geo_cbl', 'veg_sfc', 'best_4_layer', 'ws_700']]

    x_train = train2.drop('max_wind_speed', 1)
    x_train.drop('Type', axis=1, inplace=True)
    y_train = train2.max_wind_speed
    x_test = test_1.drop('max_wind_speed', 1)
    x_test_copy = x_test.copy()
    x_test.drop('Type', axis=1, inplace=True)
    y_test = test_1.max_wind_speed
    
    ll = list(smp)
    dates = " ".join(str(x) for x in ll)

    model = XGBRegressor(booster='gbtree', objective='reg:squarederror', colsample_bytree = 0.6,
                        learning_rate = 0.1, max_depth=5, n_estimators=500, subsample = 0.6,
                         random_state=16,n_jobs=-1)
    model.fit(x_train, y_train)

    pred_train = model.predict(x_train)
    train_info = model_info(y_train, pred_train, x , "train",)

    pred_test = model.predict(x_test)
    test_info = model_info(y_test, pred_test, x , "test",)

    table = train_info.append(test_info)
    table['train_dates'] = dates
    table['number_sample'] = sample
    print(x)
    print(table)
    print("--" * 40)

    df4 = pd.DataFrame(y_test)
    df4 = df4.rename(columns={'max_wind_speed': 'actual'})
    df4 = df4.reset_index()
    df4.drop('index', axis=1, inplace=True)
    x_test_copy = x_test_copy.reset_index()
    x_test_copy.drop("index", axis=1, inplace=True)
    df5 = pd.concat([x_test_copy, df4], axis=1)
    pred_df = pd.DataFrame(pred_test, columns=["pred"])
    df6 = pd.concat([df5, pred_df], axis=1)
    df6['From'] = np.where(df6['from'] == 1, 'Mesonet', 'ASOS')
    df6.drop('from', inplace=True, axis=1)
    df6['test_date'] = x
    df6['number_sample'] = sample
    return (table, df6)

In [9]:
table = pd.DataFrame()
data = pd.DataFrame()
count = 0
for i in df['EXP'].unique():
    t,d = rf_model2(i,10)
    table = table.append(t)
    data = data.append(d)
    count += 1
    print(count,'/32')

10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/7/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.713781   5.127535  0.918707         0.0029    1/7/17       train   
0  3.146377  16.660902  0.435652         0.0467    1/7/17        test   

                                         train_dates  number_sample  
0  6/19/17 10/2/18 5/26/18 12/9/17 3/22/17 9/10/1...             10  
0  6/19/17 10/2/18 5/26/18 12/9/17 3/22/17 9/10/1...             10  
--------------------------------------------------------------------------------
1 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.999566   6.747597  0.915365        -0.0049    2/9/17       train   
0  3.703234  22.168337  0.530080        -0.0786    2/9/17        test   

                                         train_dates  number_sample  
0  10/12/18 6/13/18 2/24/19 12/17/18 9/10/18 7/13...             10  
0  10/12/18 6/13/18 2/24/19 12/17/18 9/10/18 7/13...             10  
--------------------------------------------------------------------------------
2 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.611160   4.378991  0.918819        -0.0015   2/13/17       train   
0  7.116489  81.358290 -0.007255         4.9574   2/13/17        test   

                                         train_dates  number_sample  
0  3/21/18 10/27/18 3/7/18 7/13/17 12/22/18 9/10/...             10  
0  3/21/18 10/27/18 3/7/18 7/13/17 12/22/18 9/10/...             10  
--------------------------------------------------------------------------------
3 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/25/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.745016   5.121212  0.927579         0.0034   2/25/17       train   
0  4.773732  36.610564  0.031154         2.0357   2/25/17        test   

                                         train_dates  number_sample  
0  2/24/19 7/17/18 1/4/18 9/10/18 10/12/18 9/26/1...             10  
0  2/24/19 7/17/18 1/4/18 9/10/18 10/12/18 9/26/1...             10  
--------------------------------------------------------------------------------
4 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.573855   4.265230  0.920698        -0.0029    3/2/17       train   
0  8.087270  94.859418 -0.760464         7.4874    3/2/17        test   

                                         train_dates  number_sample  
0  3/21/18 10/27/18 3/14/17 5/26/18 9/10/18 7/13/...             10  
0  3/21/18 10/27/18 3/14/17 5/26/18 9/10/18 7/13/...             10  
--------------------------------------------------------------------------------
5 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/14/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.761831   5.274957  0.901778        -0.0019   3/14/17       train   
0  4.051122  25.925197  0.716276        -0.4550   3/14/17        test   

                                         train_dates  number_sample  
0  3/7/18 9/4/18 12/22/18 3/21/18 5/15/18 9/10/18...             10  
0  3/7/18 9/4/18 12/22/18 3/21/18 5/15/18 9/10/18...             10  
--------------------------------------------------------------------------------
6 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/22/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.788935   5.550937  0.911428         0.0018   3/22/17       train   
0  6.245754  59.313846 -0.029301         4.2105   3/22/17        test   

                                         train_dates  number_sample  
0  3/2/18 10/2/18 10/27/18 7/13/17 9/6/18 12/22/1...             10  
0  3/2/18 10/2/18 10/27/18 7/13/17 9/6/18 12/22/1...             10  
--------------------------------------------------------------------------------
7 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.680963   4.820675  0.901078         0.0011   6/19/17       train   
0  4.357741  31.381434  0.380924        -0.2637   6/19/17        test   

                                         train_dates  number_sample  
0  5/26/18 10/29/17 9/10/18 12/17/18 5/15/18 9/26...             10  
0  5/26/18 10/29/17 9/10/18 12/17/18 5/15/18 9/26...             10  
--------------------------------------------------------------------------------
8 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.831786   5.742954  0.934177        -0.0009   7/13/17       train   
0  3.729357  21.207677  0.181608        -1.3228   7/13/17        test   

                                         train_dates  number_sample  
0  12/9/17 2/25/17 2/9/17 12/17/18 8/17/18 6/13/1...             10  
0  12/9/17 2/25/17 2/9/17 12/17/18 8/17/18 6/13/1...             10  
--------------------------------------------------------------------------------
9 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/29/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.766014   5.279024  0.930748         0.0013  10/29/17       train   
0  6.633243  69.277239 -0.066989        -1.3831  10/29/17        test   

                                         train_dates  number_sample  
0  12/17/18 12/22/18 10/12/18 2/24/19 1/4/18 2/25...             10  
0  12/17/18 12/22/18 10/12/18 2/24/19 1/4/18 2/25...             10  
--------------------------------------------------------------------------------
10 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


11/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.634432   4.469089  0.929657         0.0012  11/19/17       train   
0  6.170681  59.014308  0.178221        -0.2057  11/19/17        test   

                                         train_dates  number_sample  
0  6/19/17 2/13/17 3/2/18 9/10/18 1/7/17 12/9/17 ...             10  
0  6/19/17 2/13/17 3/2/18 9/10/18 1/7/17 12/9/17 ...             10  
--------------------------------------------------------------------------------
11 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.764674   5.343959  0.938567        -0.0041   12/9/17       train   
0  4.577334  27.920650 -1.160579        -4.1147   12/9/17        test   

                                         train_dates  number_sample  
0  8/17/18 11/19/17 7/13/17 3/22/17 3/14/17 3/2/1...             10  
0  8/17/18 11/19/17 7/13/17 3/22/17 3/14/17 3/2/1...             10  
--------------------------------------------------------------------------------
12 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/4/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.084307   7.353896  0.896921        -0.0053    1/4/18       train   
0  5.882082  48.334323  0.586645        -0.8888    1/4/18        test   

                                         train_dates  number_sample  
0  10/29/17 6/19/17 9/6/18 2/24/19 12/22/18 12/17...             10  
0  10/29/17 6/19/17 9/6/18 2/24/19 12/22/18 12/17...             10  
--------------------------------------------------------------------------------
13 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.705044   4.988680  0.922595        -0.0012    3/2/18       train   
0  5.552204  49.828113  0.279748        -2.1157    3/2/18        test   

                                         train_dates  number_sample  
0  7/13/17 9/26/18 7/21/18 7/17/18 2/25/17 3/22/1...             10  
0  7/13/17 9/26/18 7/21/18 7/17/18 2/25/17 3/22/1...             10  
--------------------------------------------------------------------------------
14 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/7/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.626195   4.458627  0.924622        -0.0015    3/7/18       train   
0  3.379033  17.354891  0.315938        -0.4205    3/7/18        test   

                                         train_dates  number_sample  
0  1/4/18 6/13/18 10/27/18 12/9/17 7/17/18 2/9/17...             10  
0  1/4/18 6/13/18 10/27/18 12/9/17 7/17/18 2/9/17...             10  
--------------------------------------------------------------------------------
15 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.860984   5.934995  0.920047         0.0001   3/21/18       train   
0  4.502395  30.633489 -0.017375        -3.0242   3/21/18        test   

                                         train_dates  number_sample  
0  7/21/18 12/22/18 7/17/18 1/7/17 3/2/17 5/15/18...             10  
0  7/21/18 12/22/18 7/17/18 1/7/17 3/2/17 5/15/18...             10  
--------------------------------------------------------------------------------
16 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/15/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.710283   4.980963  0.921730         0.0025   5/15/18       train   
0  4.654409  42.222164  0.235175        -0.2900   5/15/18        test   

                                         train_dates  number_sample  
0  3/2/18 9/4/18 5/26/18 12/22/18 9/10/18 10/29/1...             10  
0  3/2/18 9/4/18 5/26/18 12/22/18 9/10/18 10/29/1...             10  
--------------------------------------------------------------------------------
17 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.703161   4.975141  0.925806        -0.0013   5/26/18       train   
0  3.309150  17.196468  0.107229        -1.4796   5/26/18        test   

                                         train_dates  number_sample  
0  10/29/17 3/14/17 10/27/18 1/7/17 5/15/18 3/21/...             10  
0  10/29/17 3/14/17 10/27/18 1/7/17 5/15/18 3/21/...             10  
--------------------------------------------------------------------------------
18 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/13/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.550437   4.110681  0.934924         0.0018   6/13/18       train   
0  5.009992  46.371133 -2.097004        -2.5864   6/13/18        test   

                                         train_dates  number_sample  
0  9/4/18 12/9/17 1/7/17 5/15/18 8/17/18 1/4/18 9...             10  
0  9/4/18 12/9/17 1/7/17 5/15/18 8/17/18 1/4/18 9...             10  
--------------------------------------------------------------------------------
19 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.836363   5.678442  0.925255         0.0049   7/17/18       train   
0  3.213211  19.679221  0.353842         0.4248   7/17/18        test   

                                         train_dates  number_sample  
0  5/15/18 3/7/18 3/2/17 3/2/18 10/12/18 2/9/17 1...             10  
0  5/15/18 3/7/18 3/2/17 3/2/18 10/12/18 2/9/17 1...             10  
--------------------------------------------------------------------------------
20 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.788141   5.506706  0.909350         0.0047   7/21/18       train   
0  4.071255  27.142860  0.133267         1.1770   7/21/18        test   

                                         train_dates  number_sample  
0  8/17/18 10/2/18 9/10/18 7/17/18 9/6/18 6/13/18...             10  
0  8/17/18 10/2/18 9/10/18 7/17/18 9/6/18 6/13/18...             10  
--------------------------------------------------------------------------------
21 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


8/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.741892   5.100538  0.927566         0.0023   8/17/18       train   
0  4.515576  32.185008  0.077710        -1.2634   8/17/18        test   

                                         train_dates  number_sample  
0  9/6/18 3/2/17 3/21/18 3/14/17 9/10/18 5/15/18 ...             10  
0  9/6/18 3/2/17 3/21/18 3/14/17 9/10/18 5/15/18 ...             10  
--------------------------------------------------------------------------------
22 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/4/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.776659   5.346043  0.917544        -0.0008    9/4/18       train   
0  2.510519  10.286684  0.066608         0.5824    9/4/18        test   

                                         train_dates  number_sample  
0  1/4/18 6/13/18 3/21/18 7/21/18 12/22/18 10/27/...             10  
0  1/4/18 6/13/18 3/21/18 7/21/18 12/22/18 10/27/...             10  
--------------------------------------------------------------------------------
23 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/6/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.960729   6.455015  0.920950         0.0037    9/6/18       train   
0  3.233780  19.166716  0.247097        -0.1676    9/6/18        test   

                                         train_dates  number_sample  
0  3/14/17 7/21/18 3/22/17 2/25/17 12/22/18 3/2/1...             10  
0  3/14/17 7/21/18 3/22/17 2/25/17 12/22/18 3/2/1...             10  
--------------------------------------------------------------------------------
24 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/10/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.836082   5.862499  0.927745        -0.0053   9/10/18       train   
0  3.875938  25.351670  0.043359         1.6713   9/10/18        test   

                                         train_dates  number_sample  
0  8/17/18 12/9/17 10/12/18 2/9/17 2/24/19 5/26/1...             10  
0  8/17/18 12/9/17 10/12/18 2/9/17 2/24/19 5/26/1...             10  
--------------------------------------------------------------------------------
25 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.693969   4.940906  0.942878         0.0015   9/26/18       train   
0  4.167878  27.660448 -0.224431         2.7545   9/26/18        test   

                                         train_dates  number_sample  
0  7/17/18 2/9/17 3/2/17 9/6/18 9/10/18 10/2/18 1...             10  
0  7/17/18 2/9/17 3/2/17 9/6/18 9/10/18 10/2/18 1...             10  
--------------------------------------------------------------------------------
26 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.621090   4.447171  0.938779        -0.0008   10/2/18       train   
0  4.444659  30.826307 -0.082367        -2.0540   10/2/18        test   

                                         train_dates  number_sample  
0  8/17/18 12/9/17 2/9/17 9/10/18 5/26/18 3/21/18...             10  
0  8/17/18 12/9/17 2/9/17 9/10/18 5/26/18 3/21/18...             10  
--------------------------------------------------------------------------------
27 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/12/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.000994   6.767298  0.910401        -0.0063  10/12/18       train   
0  5.382613  43.323504  0.341527        -2.7974  10/12/18        test   

                                         train_dates  number_sample  
0  7/13/17 9/26/18 8/17/18 6/19/17 10/27/18 2/13/...             10  
0  7/13/17 9/26/18 8/17/18 6/19/17 10/27/18 2/13/...             10  
--------------------------------------------------------------------------------
28 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/27/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.800679   5.612996  0.893202        -0.0013  10/27/18       train   
0  4.203714  29.769834  0.552307         0.8258  10/27/18        test   

                                         train_dates  number_sample  
0  6/19/17 3/7/18 10/29/17 7/21/18 9/10/18 9/4/18...             10  
0  6/19/17 3/7/18 10/29/17 7/21/18 9/10/18 9/4/18...             10  
--------------------------------------------------------------------------------
29 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.885076   6.092700  0.924356        -0.0005  12/17/18       train   
0  5.185347  45.084056  0.274486         1.9284  12/17/18        test   

                                         train_dates  number_sample  
0  10/27/18 10/12/18 3/22/17 8/17/18 10/2/18 7/17...             10  
0  10/27/18 10/12/18 3/22/17 8/17/18 10/2/18 7/17...             10  
--------------------------------------------------------------------------------
30 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/22/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.795957   5.467937  0.932406        -0.0029  12/22/18       train   
0  5.617977  49.378404  0.100235        -1.9234  12/22/18        test   

                                         train_dates  number_sample  
0  3/21/18 10/27/18 3/14/17 9/6/18 1/4/18 3/2/17 ...             10  
0  3/21/18 10/27/18 3/14/17 9/6/18 1/4/18 3/2/17 ...             10  
--------------------------------------------------------------------------------
31 /32
10


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/24/19
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.816909   5.662513  0.930226        -0.0016   2/24/19       train   
0  5.761192  57.271697  0.422972         0.8290   2/24/19        test   

                                         train_dates  number_sample  
0  10/29/17 9/26/18 12/9/17 3/14/17 6/19/17 2/13/...             10  
0  10/29/17 9/26/18 12/9/17 3/14/17 6/19/17 2/13/...             10  
--------------------------------------------------------------------------------
32 /32


In [10]:
#table.to_csv('10_sample_XGB_stats.csv')
#data.to_csv('10_sample_XGB_data.csv')

In [11]:
table.groupby('data_tested').agg({'MAE':'mean', 'R2':'mean', 'Residual Mean':'mean'})

Unnamed: 0_level_0,MAE,R2,Residual Mean
data_tested,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test,4.720789,0.067072,0.065509
train,1.770843,0.921747,-0.000369


In [12]:
res = data['actual'] - data['pred']
res.mean()

-0.025091928456275867

### 15 Samples

In [13]:
table = pd.DataFrame()
data = pd.DataFrame()
count = 0
for i in df['EXP'].unique():
    t,d = rf_model2(i,15)
    table = table.append(t)
    data = data.append(d)
    count += 1
    print(count,'/32')

15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/7/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.009856   6.900102  0.892384         0.0025    1/7/17       train   
0  2.784048  12.291409  0.583658         0.8713    1/7/17        test   

                                         train_dates  number_sample  
0  9/26/18 9/4/18 9/6/18 11/19/17 10/12/18 7/13/1...             15  
0  9/26/18 9/4/18 9/6/18 11/19/17 10/12/18 7/13/1...             15  
--------------------------------------------------------------------------------
1 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.054708   7.189559  0.911929        -0.0049    2/9/17       train   
0  3.705897  21.571960  0.542722        -0.2446    2/9/17        test   

                                         train_dates  number_sample  
0  9/10/18 10/29/17 5/26/18 7/21/18 9/26/18 10/12...             15  
0  9/10/18 10/29/17 5/26/18 7/21/18 9/26/18 10/12...             15  
--------------------------------------------------------------------------------
2 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.046649   7.218671  0.897335        -0.0040   2/13/17       train   
0  6.686653  72.197428  0.106161         4.1634   2/13/17        test   

                                         train_dates  number_sample  
0  2/24/19 9/10/18 2/9/17 12/17/18 8/17/18 7/17/1...             15  
0  2/24/19 9/10/18 2/9/17 12/17/18 8/17/18 7/17/1...             15  
--------------------------------------------------------------------------------
3 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/25/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.926017   6.518436  0.916073         0.0002   2/25/17       train   
0  4.270483  29.050807  0.231212        -0.4987   2/25/17        test   

                                         train_dates  number_sample  
0  9/4/18 6/13/18 3/22/17 11/19/17 5/26/18 5/15/1...             15  
0  9/4/18 6/13/18 3/22/17 11/19/17 5/26/18 5/15/1...             15  
--------------------------------------------------------------------------------
4 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.119402   7.778896  0.892326        -0.0026    3/2/17       train   
0  5.286485  44.223927  0.179263         3.5555    3/2/17        test   

                                         train_dates  number_sample  
0  7/17/18 9/4/18 3/22/17 10/27/18 9/6/18 8/17/18...             15  
0  7/17/18 9/4/18 3/22/17 10/27/18 9/6/18 8/17/18...             15  
--------------------------------------------------------------------------------
5 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/14/17
       MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.91644   6.286220  0.880585        -0.0029   3/14/17       train   
0  3.81935  24.593994  0.730845        -1.1570   3/14/17        test   

                                         train_dates  number_sample  
0  6/19/17 6/13/18 3/2/18 5/26/18 9/26/18 2/9/17 ...             15  
0  6/19/17 6/13/18 3/2/18 5/26/18 9/26/18 2/9/17 ...             15  
--------------------------------------------------------------------------------
6 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/22/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.066543   7.448170  0.887438        -0.0032   3/22/17       train   
0  5.401261  44.912714  0.220609        -0.9348   3/22/17        test   

                                         train_dates  number_sample  
0  2/9/17 10/29/17 9/6/18 10/12/18 10/2/18 12/17/...             15  
0  2/9/17 10/29/17 9/6/18 10/12/18 10/2/18 12/17/...             15  
--------------------------------------------------------------------------------
7 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.134676   7.750087  0.895817         0.0019   6/19/17       train   
0  3.791557  25.872211  0.489607         1.0692   6/19/17        test   

                                         train_dates  number_sample  
0  3/21/18 7/13/17 2/13/17 12/22/18 11/19/17 3/2/...             15  
0  3/21/18 7/13/17 2/13/17 12/22/18 11/19/17 3/2/...             15  
--------------------------------------------------------------------------------
8 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.097230   7.614114  0.904852        -0.0022   7/13/17       train   
0  3.385353  17.849289  0.311206        -0.8779   7/13/17        test   

                                         train_dates  number_sample  
0  2/25/17 12/17/18 3/2/18 1/4/18 3/21/18 2/9/17 ...             15  
0  2/25/17 12/17/18 3/2/18 1/4/18 3/21/18 2/9/17 ...             15  
--------------------------------------------------------------------------------
9 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/29/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.067841   7.340909  0.892711        -0.0010  10/29/17       train   
0  6.825242  71.646749 -0.103483        -2.7593  10/29/17        test   

                                         train_dates  number_sample  
0  5/15/18 5/26/18 6/19/17 6/13/18 12/22/18 1/4/1...             15  
0  5/15/18 5/26/18 6/19/17 6/13/18 12/22/18 1/4/1...             15  
--------------------------------------------------------------------------------
10 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


11/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.981926   6.750899  0.875401        -0.0038  11/19/17       train   
0  6.839199  72.457989 -0.008984         1.8040  11/19/17        test   

                                         train_dates  number_sample  
0  9/26/18 3/21/18 2/25/17 1/7/17 3/7/18 1/4/18 3...             15  
0  9/26/18 3/21/18 2/25/17 1/7/17 3/7/18 1/4/18 3...             15  
--------------------------------------------------------------------------------
11 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.086405   7.559644  0.880627         0.0020   12/9/17       train   
0  4.181605  23.072101 -0.785385        -3.8758   12/9/17        test   

                                         train_dates  number_sample  
0  10/2/18 10/12/18 12/22/18 2/25/17 7/13/17 8/17...             15  
0  10/2/18 10/12/18 12/22/18 2/25/17 7/13/17 8/17...             15  
--------------------------------------------------------------------------------
12 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/4/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.017306   6.921084  0.909743        -0.0016    1/4/18       train   
0  4.178622  31.360710  0.731803         1.5420    1/4/18        test   

                                         train_dates  number_sample  
0  12/22/18 12/9/17 12/17/18 3/14/17 3/2/18 3/2/1...             15  
0  12/22/18 12/9/17 12/17/18 3/14/17 3/2/18 3/2/1...             15  
--------------------------------------------------------------------------------
13 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.141122   7.886871  0.888000         0.0016    3/2/18       train   
0  4.974351  41.877422  0.394673        -0.2576    3/2/18        test   

                                         train_dates  number_sample  
0  3/7/18 10/12/18 3/2/17 11/19/17 9/10/18 5/15/1...             15  
0  3/7/18 10/12/18 3/2/17 11/19/17 9/10/18 5/15/1...             15  
--------------------------------------------------------------------------------
14 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/7/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.979003   6.821300  0.892649         0.0034    3/7/18       train   
0  3.487891  18.064055  0.287985        -1.5805    3/7/18        test   

                                         train_dates  number_sample  
0  7/13/17 10/27/18 10/29/17 5/26/18 9/4/18 7/21/...             15  
0  7/13/17 10/27/18 10/29/17 5/26/18 9/4/18 7/21/...             15  
--------------------------------------------------------------------------------
15 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.119886   7.688368  0.900020        -0.0025   3/21/18       train   
0  5.115955  39.977214 -0.327691        -3.9457   3/21/18        test   

                                         train_dates  number_sample  
0  5/15/18 12/22/18 7/17/18 10/2/18 9/26/18 7/21/...             15  
0  5/15/18 12/22/18 7/17/18 10/2/18 9/26/18 7/21/...             15  
--------------------------------------------------------------------------------
16 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/15/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.057121   7.163940  0.904193        -0.0045   5/15/18       train   
0  4.266916  36.192154  0.344405        -0.2098   5/15/18        test   

                                         train_dates  number_sample  
0  3/22/17 7/17/18 1/4/18 3/21/18 10/2/18 6/13/18...             15  
0  3/22/17 7/17/18 1/4/18 3/21/18 10/2/18 6/13/18...             15  
--------------------------------------------------------------------------------
17 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.982970   6.847669  0.917529        -0.0038   5/26/18       train   
0  3.682058  20.613587 -0.070175        -2.6756   5/26/18        test   

                                         train_dates  number_sample  
0  3/21/18 7/13/17 6/19/17 3/22/17 10/2/18 7/17/1...             15  
0  3/21/18 7/13/17 6/19/17 3/22/17 10/2/18 7/17/1...             15  
--------------------------------------------------------------------------------
18 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/13/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.115190   7.629766  0.903647         0.0038   6/13/18       train   
0  4.178395  29.378224 -0.962093        -1.5289   6/13/18        test   

                                         train_dates  number_sample  
0  3/7/18 10/12/18 10/27/18 1/4/18 8/17/18 12/9/1...             15  
0  3/7/18 10/12/18 10/27/18 1/4/18 8/17/18 12/9/1...             15  
--------------------------------------------------------------------------------
19 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.980748   6.690137  0.904443         0.0028   7/17/18       train   
0  3.151244  19.514204  0.359260         0.2990   7/17/18        test   

                                         train_dates  number_sample  
0  3/14/17 9/26/18 1/7/17 10/27/18 12/17/18 12/9/...             15  
0  3/14/17 9/26/18 1/7/17 10/27/18 12/17/18 12/9/...             15  
--------------------------------------------------------------------------------
20 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.987065   6.772591  0.919091        -0.0026   7/21/18       train   
0  4.534010  31.893665 -0.018437         1.7843   7/21/18        test   

                                         train_dates  number_sample  
0  3/7/18 12/9/17 5/15/18 9/10/18 9/26/18 10/27/1...             15  
0  3/7/18 12/9/17 5/15/18 9/10/18 9/26/18 10/27/1...             15  
--------------------------------------------------------------------------------
21 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


8/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.935339   6.384796  0.891067         0.0017   8/17/18       train   
0  3.930161  25.112009  0.280393        -1.4052   8/17/18        test   

                                         train_dates  number_sample  
0  3/21/18 3/14/17 9/6/18 2/24/19 1/4/18 2/9/17 7...             15  
0  3/21/18 3/14/17 9/6/18 2/24/19 1/4/18 2/9/17 7...             15  
--------------------------------------------------------------------------------
22 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/4/18
        MAE       MSE        R2  Residual Mean test_date data_tested  \
0  2.168573  8.124453  0.896975         0.0012    9/4/18       train   
0  2.245756  7.939561  0.279581         0.3574    9/4/18        test   

                                         train_dates  number_sample  
0  2/13/17 10/29/17 11/19/17 10/2/18 6/19/17 10/2...             15  
0  2/13/17 10/29/17 11/19/17 10/2/18 6/19/17 10/2...             15  
--------------------------------------------------------------------------------
23 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/6/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.320735   9.230928  0.876459        -0.0062    9/6/18       train   
0  2.900094  16.090641  0.367931        -0.1758    9/6/18        test   

                                         train_dates  number_sample  
0  2/9/17 2/25/17 8/17/18 6/19/17 2/13/17 10/2/18...             15  
0  2/9/17 2/25/17 8/17/18 6/19/17 2/13/17 10/2/18...             15  
--------------------------------------------------------------------------------
24 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/10/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.041724   7.132771  0.896593         0.0054   9/10/18       train   
0  4.003422  25.512194  0.037301         1.6137   9/10/18        test   

                                         train_dates  number_sample  
0  3/21/18 9/4/18 7/13/17 3/7/18 10/2/18 1/4/18 2...             15  
0  3/21/18 9/4/18 7/13/17 3/7/18 10/2/18 1/4/18 2...             15  
--------------------------------------------------------------------------------
25 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.995659   6.859758  0.894524        -0.0003   9/26/18       train   
0  3.832578  23.546190 -0.042307         2.2263   9/26/18        test   

                                         train_dates  number_sample  
0  5/26/18 2/24/19 6/19/17 9/6/18 10/29/17 12/17/...             15  
0  5/26/18 2/24/19 6/19/17 9/6/18 10/29/17 12/17/...             15  
--------------------------------------------------------------------------------
26 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.008592   6.949204  0.916433         0.0009   10/2/18       train   
0  3.972043  24.588183  0.136665        -0.6984   10/2/18        test   

                                         train_dates  number_sample  
0  10/29/17 3/2/17 3/14/17 12/22/18 12/9/17 6/13/...             15  
0  10/29/17 3/2/17 3/14/17 12/22/18 12/9/17 6/13/...             15  
--------------------------------------------------------------------------------
27 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/12/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.091482   7.481627  0.914239        -0.0012  10/12/18       train   
0  4.109089  27.263784  0.585618         0.7283  10/12/18        test   

                                         train_dates  number_sample  
0  6/13/18 3/2/17 10/27/18 9/26/18 2/24/19 3/14/1...             15  
0  6/13/18 3/2/17 10/27/18 9/26/18 2/24/19 3/14/1...             15  
--------------------------------------------------------------------------------
28 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/27/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.152852   7.860702  0.889843         0.0026  10/27/18       train   
0  4.238877  30.809154  0.536677         0.4407  10/27/18        test   

                                         train_dates  number_sample  
0  6/13/18 2/24/19 10/29/17 2/25/17 6/19/17 3/7/1...             15  
0  6/13/18 2/24/19 10/29/17 2/25/17 6/19/17 3/7/1...             15  
--------------------------------------------------------------------------------
29 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.133942   7.720601  0.893356        -0.0019  12/17/18       train   
0  4.598832  35.996113  0.420734         1.7425  12/17/18        test   

                                         train_dates  number_sample  
0  12/22/18 7/21/18 2/24/19 9/26/18 9/10/18 10/12...             15  
0  12/22/18 7/21/18 2/24/19 9/26/18 9/10/18 10/12...             15  
--------------------------------------------------------------------------------
30 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/22/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  1.893636   6.270097  0.892797        -0.0001  12/22/18       train   
0  5.488111  47.532818  0.133865        -0.3397  12/22/18        test   

                                         train_dates  number_sample  
0  2/25/17 3/21/18 6/19/17 7/21/18 1/7/17 5/26/18...             15  
0  2/25/17 3/21/18 6/19/17 7/21/18 1/7/17 5/26/18...             15  
--------------------------------------------------------------------------------
31 /32
15


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/24/19
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.078888   7.342101  0.889274         0.0018   2/24/19       train   
0  5.621470  49.246335  0.503830        -1.4916   2/24/19        test   

                                         train_dates  number_sample  
0  9/26/18 10/2/18 1/4/18 3/7/18 2/9/17 2/13/17 6...             15  
0  9/26/18 10/2/18 1/4/18 3/7/18 2/9/17 2/13/17 6...             15  
--------------------------------------------------------------------------------
32 /32


In [14]:
#table.to_csv('15_sample_XGB_stats.csv')
#data.to_csv('15_sample_XGB_data.csv')

In [15]:
table.groupby('data_tested').agg({'MAE':'mean', 'R2':'mean', 'Residual Mean':'mean'})

Unnamed: 0_level_0,MAE,R2,Residual Mean
data_tested,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test,4.358969,0.20242,-0.076853
train,2.053423,0.897449,-0.000547


In [16]:
res = data['actual'] - data['pred']
res.mean()

-0.12989475670624498

### 20 Samples

In [17]:
table = pd.DataFrame()
data = pd.DataFrame()
count = 0
for i in df['EXP'].unique():
    t,d = rf_model2(i,20)
    table = table.append(t)
    data = data.append(d)
    count += 1
    print(count,'/32')

20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/7/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.209770   8.495803  0.898983        -0.0033    1/7/17       train   
0  3.066536  14.677407  0.502838         1.2692    1/7/17        test   

                                         train_dates  number_sample  
0  9/4/18 9/26/18 3/2/17 1/4/18 12/9/17 10/2/18 2...             20  
0  9/4/18 9/26/18 3/2/17 1/4/18 12/9/17 10/2/18 2...             20  
--------------------------------------------------------------------------------
1 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.190780   8.282150  0.879614        -0.0028    2/9/17       train   
0  3.458505  19.375227  0.589288         0.2983    2/9/17        test   

                                         train_dates  number_sample  
0  10/29/17 12/22/18 9/10/18 3/7/18 9/4/18 6/19/1...             20  
0  10/29/17 12/22/18 9/10/18 3/7/18 9/4/18 6/19/1...             20  
--------------------------------------------------------------------------------
2 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.266042   8.780509  0.873436        -0.0046   2/13/17       train   
0  7.566511  91.425935 -0.131897         6.2615   2/13/17        test   

                                         train_dates  number_sample  
0  8/17/18 5/15/18 7/21/18 3/2/18 1/4/18 1/7/17 3...             20  
0  8/17/18 5/15/18 7/21/18 3/2/18 1/4/18 1/7/17 3...             20  
--------------------------------------------------------------------------------
3 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/25/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.190606   8.249188  0.888540        -0.0003   2/25/17       train   
0  4.269177  29.491472  0.219551         0.3235   2/25/17        test   

                                         train_dates  number_sample  
0  9/10/18 5/15/18 1/4/18 6/13/18 7/13/17 7/21/18...             20  
0  9/10/18 5/15/18 1/4/18 6/13/18 7/13/17 7/21/18...             20  
--------------------------------------------------------------------------------
4 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.321097   9.282166  0.877766         0.0005    3/2/17       train   
0  5.544078  48.965934  0.091258         4.1415    3/2/17        test   

                                         train_dates  number_sample  
0  1/7/17 9/6/18 10/29/17 10/12/18 10/2/18 10/27/...             20  
0  1/7/17 9/6/18 10/29/17 10/12/18 10/2/18 10/27/...             20  
--------------------------------------------------------------------------------
5 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/14/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.306940   9.165183  0.880273        -0.0046   3/14/17       train   
0  3.762169  24.942319  0.727033        -0.6374   3/14/17        test   

                                         train_dates  number_sample  
0  3/7/18 9/4/18 7/13/17 9/10/18 3/22/17 3/2/18 3...             20  
0  3/7/18 9/4/18 7/13/17 9/10/18 3/22/17 3/2/18 3...             20  
--------------------------------------------------------------------------------
6 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/22/17
        MAE        MSE       R2  Residual Mean test_date data_tested  \
0  2.185476   8.228962  0.88766        -0.0012   3/22/17       train   
0  4.968365  39.858901  0.30831         0.3315   3/22/17        test   

                                         train_dates  number_sample  
0  11/19/17 3/7/18 9/10/18 9/6/18 10/2/18 3/2/18 ...             20  
0  11/19/17 3/7/18 9/10/18 9/6/18 10/2/18 3/2/18 ...             20  
--------------------------------------------------------------------------------
7 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/19/17
        MAE       MSE        R2  Residual Mean test_date data_tested  \
0  2.094921   7.54168  0.896429         0.0017   6/19/17       train   
0  3.794411  25.61187  0.494743         0.1021   6/19/17        test   

                                         train_dates  number_sample  
0  3/7/18 8/17/18 3/2/17 12/9/17 2/9/17 1/4/18 10...             20  
0  3/7/18 8/17/18 3/2/17 12/9/17 2/9/17 1/4/18 10...             20  
--------------------------------------------------------------------------------
8 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.299484   9.019284  0.864433         0.0001   7/13/17       train   
0  3.279014  16.683963  0.356175        -0.8737   7/13/17        test   

                                         train_dates  number_sample  
0  10/29/17 3/7/18 2/24/19 6/19/17 9/26/18 2/25/1...             20  
0  10/29/17 3/7/18 2/24/19 6/19/17 9/26/18 2/25/1...             20  
--------------------------------------------------------------------------------
9 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/29/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.176519   8.140807  0.887163        -0.0018  10/29/17       train   
0  6.445315  65.723335 -0.012252        -2.6353  10/29/17        test   

                                         train_dates  number_sample  
0  12/22/18 2/9/17 7/21/18 3/2/17 9/10/18 6/13/18...             20  
0  12/22/18 2/9/17 7/21/18 3/2/17 9/10/18 6/13/18...             20  
--------------------------------------------------------------------------------
10 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


11/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.101860   7.642644  0.896610        -0.0002  11/19/17       train   
0  4.990906  37.716648  0.474792        -0.1521  11/19/17        test   

                                         train_dates  number_sample  
0  2/24/19 9/4/18 3/22/17 12/22/18 6/13/18 10/12/...             20  
0  2/24/19 9/4/18 3/22/17 12/22/18 6/13/18 10/12/...             20  
--------------------------------------------------------------------------------
11 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.107208   7.612145  0.878733        -0.0014   12/9/17       train   
0  3.561301  17.014886 -0.316660        -3.1492   12/9/17        test   

                                         train_dates  number_sample  
0  12/17/18 6/19/17 8/17/18 1/7/17 5/26/18 10/2/1...             20  
0  12/17/18 6/19/17 8/17/18 1/7/17 5/26/18 10/2/1...             20  
--------------------------------------------------------------------------------
12 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/4/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.166533   8.121675  0.892772        -0.0019    1/4/18       train   
0  4.233820  31.753592  0.728443         1.9770    1/4/18        test   

                                         train_dates  number_sample  
0  11/19/17 12/9/17 7/13/17 10/29/17 1/7/17 2/13/...             20  
0  11/19/17 12/9/17 7/13/17 10/29/17 1/7/17 2/13/...             20  
--------------------------------------------------------------------------------
13 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.276838   8.869573  0.870139        -0.0002    3/2/18       train   
0  4.795338  38.106568  0.449180        -1.2609    3/2/18        test   

                                         train_dates  number_sample  
0  8/17/18 5/15/18 9/6/18 9/26/18 2/25/17 3/7/18 ...             20  
0  8/17/18 5/15/18 9/6/18 9/26/18 2/25/17 3/7/18 ...             20  
--------------------------------------------------------------------------------
14 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/7/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.313806   9.322665  0.883140         0.0000    3/7/18       train   
0  3.354152  17.240470  0.320448        -1.0231    3/7/18        test   

                                         train_dates  number_sample  
0  7/21/18 10/2/18 6/13/18 9/10/18 9/4/18 1/4/18 ...             20  
0  7/21/18 10/2/18 6/13/18 9/10/18 9/4/18 1/4/18 ...             20  
--------------------------------------------------------------------------------
15 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.336163   9.456910  0.890756        -0.0018   3/21/18       train   
0  4.915946  36.047677 -0.197187        -3.7816   3/21/18        test   

                                         train_dates  number_sample  
0  9/10/18 1/7/17 7/13/17 2/13/17 3/14/17 11/19/1...             20  
0  9/10/18 1/7/17 7/13/17 2/13/17 3/14/17 11/19/1...             20  
--------------------------------------------------------------------------------
16 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/15/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.183932   8.242662  0.885162         0.0061   5/15/18       train   
0  4.080775  33.405218  0.394888        -1.1312   5/15/18        test   

                                         train_dates  number_sample  
0  1/4/18 12/17/18 9/26/18 3/22/17 6/13/18 3/21/1...             20  
0  1/4/18 12/17/18 9/26/18 3/22/17 6/13/18 3/21/1...             20  
--------------------------------------------------------------------------------
17 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.183124   8.363078  0.887915         0.0031   5/26/18       train   
0  3.555345  20.282599 -0.052991        -2.0082   5/26/18        test   

                                         train_dates  number_sample  
0  3/22/17 2/25/17 2/9/17 7/17/18 12/22/18 10/27/...             20  
0  3/22/17 2/25/17 2/9/17 7/17/18 12/22/18 10/27/...             20  
--------------------------------------------------------------------------------
18 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/13/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.240186   8.737058  0.884161         0.0004   6/13/18       train   
0  3.492419  20.470995 -0.367203        -0.8808   6/13/18        test   

                                         train_dates  number_sample  
0  9/10/18 10/27/18 3/21/18 10/29/17 3/14/17 9/4/...             20  
0  9/10/18 10/27/18 3/21/18 10/29/17 3/14/17 9/4/...             20  
--------------------------------------------------------------------------------
19 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.302118   9.097755  0.877046        -0.0012   7/17/18       train   
0  2.967198  18.128163  0.404770         0.3276   7/17/18        test   

                                         train_dates  number_sample  
0  9/10/18 6/13/18 3/7/18 12/17/18 8/17/18 10/27/...             20  
0  9/10/18 6/13/18 3/7/18 12/17/18 8/17/18 10/27/...             20  
--------------------------------------------------------------------------------
20 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/21/18
        MAE       MSE        R2  Residual Mean test_date data_tested  \
0  2.137423   7.85444  0.894722         0.0029   7/21/18       train   
0  4.771200  35.65056 -0.138403         3.0961   7/21/18        test   

                                         train_dates  number_sample  
0  10/29/17 1/7/17 3/14/17 3/22/17 2/24/19 3/21/1...             20  
0  10/29/17 1/7/17 3/14/17 3/22/17 2/24/19 3/21/1...             20  
--------------------------------------------------------------------------------
21 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


8/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.311936   9.170051  0.878135         0.0006   8/17/18       train   
0  3.637360  23.472067  0.327387         0.4638   8/17/18        test   

                                         train_dates  number_sample  
0  9/26/18 9/10/18 3/2/17 12/17/18 2/13/17 10/29/...             20  
0  9/26/18 9/10/18 3/2/17 12/17/18 2/13/17 10/29/...             20  
--------------------------------------------------------------------------------
22 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/4/18
       MAE       MSE        R2  Residual Mean test_date data_tested  \
0  2.24559  8.847867  0.881435        -0.0002    9/4/18       train   
0  1.96726  6.101027  0.446406         0.3707    9/4/18        test   

                                         train_dates  number_sample  
0  10/2/18 9/10/18 1/4/18 7/17/18 7/13/17 8/17/18...             20  
0  10/2/18 9/10/18 1/4/18 7/17/18 7/13/17 8/17/18...             20  
--------------------------------------------------------------------------------
23 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/6/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.251093   8.570192  0.890672         0.0017    9/6/18       train   
0  2.764239  15.673366  0.384322         0.1224    9/6/18        test   

                                         train_dates  number_sample  
0  7/21/18 3/14/17 10/29/17 12/17/18 7/17/18 8/17...             20  
0  7/21/18 3/14/17 10/29/17 12/17/18 7/17/18 8/17...             20  
--------------------------------------------------------------------------------
24 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/10/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.237265   8.620916  0.858880         0.0020   9/10/18       train   
0  3.709814  23.596656  0.109584         1.6378   9/10/18        test   

                                         train_dates  number_sample  
0  10/27/18 7/17/18 10/12/18 1/4/18 7/21/18 2/9/1...             20  
0  10/27/18 7/17/18 10/12/18 1/4/18 7/21/18 2/9/1...             20  
--------------------------------------------------------------------------------
25 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.183685   8.288014  0.885349         0.0015   9/26/18       train   
0  3.858855  23.893019 -0.057660         1.8347   9/26/18        test   

                                         train_dates  number_sample  
0  9/6/18 3/7/18 10/29/17 10/27/18 5/15/18 3/2/17...             20  
0  9/6/18 3/7/18 10/29/17 10/27/18 5/15/18 3/2/17...             20  
--------------------------------------------------------------------------------
26 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.097775   7.662965  0.886562        -0.0005   10/2/18       train   
0  4.314473  29.031595 -0.019352        -1.6577   10/2/18        test   

                                         train_dates  number_sample  
0  7/21/18 5/15/18 9/26/18 3/14/17 1/4/18 10/27/1...             20  
0  7/21/18 5/15/18 9/26/18 3/14/17 1/4/18 10/27/1...             20  
--------------------------------------------------------------------------------
27 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/12/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.190480   8.210838  0.881694         0.0018  10/12/18       train   
0  4.211346  28.474795  0.567212         0.9827  10/12/18        test   

                                         train_dates  number_sample  
0  3/14/17 2/25/17 1/7/17 9/26/18 3/21/18 3/22/17...             20  
0  3/14/17 2/25/17 1/7/17 9/26/18 3/21/18 3/22/17...             20  
--------------------------------------------------------------------------------
28 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/27/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.104381   7.670853  0.905623         0.0060  10/27/18       train   
0  4.162479  28.698048  0.568425        -0.0555  10/27/18        test   

                                         train_dates  number_sample  
0  2/9/17 1/7/17 9/26/18 12/9/17 7/17/18 9/4/18 9...             20  
0  2/9/17 1/7/17 9/26/18 12/9/17 7/17/18 9/4/18 9...             20  
--------------------------------------------------------------------------------
29 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.095600   7.594650  0.895838         0.0002  12/17/18       train   
0  4.534687  33.925307  0.454058         0.8745  12/17/18        test   

                                         train_dates  number_sample  
0  1/4/18 5/26/18 3/14/17 3/21/18 2/13/17 12/22/1...             20  
0  1/4/18 5/26/18 3/14/17 3/21/18 2/13/17 12/22/1...             20  
--------------------------------------------------------------------------------
30 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/22/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.115453   7.786222  0.892574        -0.0004  12/22/18       train   
0  4.620923  32.809627  0.402148         0.6716  12/22/18        test   

                                         train_dates  number_sample  
0  1/7/17 3/14/17 10/12/18 9/4/18 7/17/18 3/7/18 ...             20  
0  1/7/17 3/14/17 10/12/18 9/4/18 7/17/18 3/7/18 ...             20  
--------------------------------------------------------------------------------
31 /32
20


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/24/19
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.158136   7.969725  0.895840        -0.0009   2/24/19       train   
0  5.954072  56.316296  0.432598        -2.2944   2/24/19        test   

                                         train_dates  number_sample  
0  2/13/17 3/2/17 6/19/17 3/2/18 9/4/18 3/14/17 6...             20  
0  2/13/17 3/2/17 6/19/17 3/2/18 9/4/18 3/14/17 6...             20  
--------------------------------------------------------------------------------
32 /32


In [18]:
#table.to_csv('20_sample_XGB_stats.csv')
#data.to_csv('20_sample_XGB_data.csv')

In [19]:
table.groupby('data_tested').agg({'MAE':'mean', 'R2':'mean', 'Residual Mean':'mean'})

Unnamed: 0_level_0,MAE,R2,Residual Mean
data_tested,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test,4.2065,0.264383,0.110794
train,2.205694,0.885252,4.1e-05


In [20]:
res = data['actual'] - data['pred']
res.mean()

0.041527097089895174

### Samples 25

In [21]:
table = pd.DataFrame()
data = pd.DataFrame()
count = 0
for i in df['EXP'].unique():
    t,d = rf_model2(i,25)
    table = table.append(t)
    data = data.append(d)
    count += 1
    print(count,'/32')

25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/7/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.320189   9.363363  0.882486         0.0062    1/7/17       train   
0  2.818212  12.478459  0.577322         1.1294    1/7/17        test   

                                         train_dates  number_sample  
0  3/22/17 9/4/18 6/19/17 5/26/18 5/15/18 7/13/17...             25  
0  3/22/17 9/4/18 6/19/17 5/26/18 5/15/18 7/13/17...             25  
--------------------------------------------------------------------------------
1 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.196972   8.295900  0.882447         0.0009    2/9/17       train   
0  3.599829  20.434654  0.566830        -1.0828    2/9/17        test   

                                         train_dates  number_sample  
0  10/2/18 9/6/18 3/21/18 3/22/17 9/10/18 9/26/18...             25  
0  10/2/18 9/6/18 3/21/18 3/22/17 9/10/18 9/26/18...             25  
--------------------------------------------------------------------------------
2 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.335664   9.484839  0.868156         0.0002   2/13/17       train   
0  6.605805  72.224162  0.105830         4.5476   2/13/17        test   

                                         train_dates  number_sample  
0  10/2/18 7/21/18 3/22/17 6/19/17 12/22/18 2/9/1...             25  
0  10/2/18 7/21/18 3/22/17 6/19/17 12/22/18 2/9/1...             25  
--------------------------------------------------------------------------------
3 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/25/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.368040   9.711512  0.879858         0.0060   2/25/17       train   
0  4.227856  29.177349  0.227863         0.5872   2/25/17        test   

                                         train_dates  number_sample  
0  7/21/18 8/17/18 2/9/17 7/13/17 6/13/18 9/6/18 ...             25  
0  7/21/18 8/17/18 2/9/17 7/13/17 6/13/18 9/6/18 ...             25  
--------------------------------------------------------------------------------
4 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.274163   8.977970  0.860638        -0.0052    3/2/17       train   
0  6.171192  58.918652 -0.093451         4.7289    3/2/17        test   

                                         train_dates  number_sample  
0  9/26/18 7/13/17 6/19/17 6/13/18 3/2/18 8/17/18...             25  
0  9/26/18 7/13/17 6/19/17 6/13/18 3/2/18 8/17/18...             25  
--------------------------------------------------------------------------------
5 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/14/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.444635  10.295303  0.857246         0.0007   3/14/17       train   
0  3.767657  26.206206  0.713201        -0.7411   3/14/17        test   

                                         train_dates  number_sample  
0  1/4/18 10/2/18 12/22/18 2/25/17 9/6/18 11/19/1...             25  
0  1/4/18 10/2/18 12/22/18 2/25/17 9/6/18 11/19/1...             25  
--------------------------------------------------------------------------------
6 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/22/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.306536   9.203578  0.877581        -0.0014   3/22/17       train   
0  5.009395  39.532892  0.313967        -1.1476   3/22/17        test   

                                         train_dates  number_sample  
0  12/9/17 11/19/17 5/15/18 9/6/18 12/17/18 2/24/...             25  
0  12/9/17 11/19/17 5/15/18 9/6/18 12/17/18 2/24/...             25  
--------------------------------------------------------------------------------
7 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.342120   9.596661  0.882371        -0.0007   6/19/17       train   
0  3.675268  25.680888  0.493382         0.6921   6/19/17        test   

                                         train_dates  number_sample  
0  7/13/17 1/4/18 3/2/17 2/25/17 12/9/17 1/7/17 3...             25  
0  7/13/17 1/4/18 3/2/17 2/25/17 12/9/17 1/7/17 3...             25  
--------------------------------------------------------------------------------
8 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.351563   9.539167  0.863291         0.0013   7/13/17       train   
0  3.209036  16.194878  0.375049        -0.7517   7/13/17        test   

                                         train_dates  number_sample  
0  3/21/18 12/17/18 5/26/18 3/22/17 10/12/18 9/6/...             25  
0  3/21/18 12/17/18 5/26/18 3/22/17 10/12/18 9/6/...             25  
--------------------------------------------------------------------------------
9 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/29/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.299566   9.169092  0.881745         0.0002  10/29/17       train   
0  7.108269  77.201199 -0.189031        -3.5403  10/29/17        test   

                                         train_dates  number_sample  
0  5/15/18 2/9/17 12/17/18 1/7/17 3/2/18 3/2/17 1...             25  
0  5/15/18 2/9/17 12/17/18 1/7/17 3/2/18 3/2/17 1...             25  
--------------------------------------------------------------------------------
10 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


11/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.339334   9.409825  0.876067         0.0022  11/19/17       train   
0  5.114302  40.663804  0.433753         0.4897  11/19/17        test   

                                         train_dates  number_sample  
0  2/25/17 10/27/18 1/7/17 10/29/17 12/17/18 3/14...             25  
0  2/25/17 10/27/18 1/7/17 10/29/17 12/17/18 3/14...             25  
--------------------------------------------------------------------------------
11 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.348208   9.565874  0.874890        -0.0007   12/9/17       train   
0  2.686949  10.578325  0.181419        -1.7788   12/9/17        test   

                                         train_dates  number_sample  
0  3/21/18 1/4/18 12/22/18 3/2/18 6/19/17 2/25/17...             25  
0  3/21/18 1/4/18 12/22/18 3/2/18 6/19/17 2/25/17...             25  
--------------------------------------------------------------------------------
12 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/4/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.322900   9.362131  0.856998         0.0017    1/4/18       train   
0  4.113994  31.793058  0.728106         2.1024    1/4/18        test   

                                         train_dates  number_sample  
0  6/13/18 7/13/17 2/24/19 8/17/18 9/10/18 9/26/1...             25  
0  6/13/18 7/13/17 2/24/19 8/17/18 9/10/18 9/26/1...             25  
--------------------------------------------------------------------------------
13 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.362946   9.621062  0.876224         0.0018    3/2/18       train   
0  4.929176  39.306255  0.431839        -2.1042    3/2/18        test   

                                         train_dates  number_sample  
0  1/4/18 6/13/18 11/19/17 9/6/18 2/25/17 5/15/18...             25  
0  1/4/18 6/13/18 11/19/17 9/6/18 2/25/17 5/15/18...             25  
--------------------------------------------------------------------------------
14 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/7/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.361424   9.688202  0.876613         0.0023    3/7/18       train   
0  3.138628  15.046852  0.406912        -0.6416    3/7/18        test   

                                         train_dates  number_sample  
0  3/2/18 11/19/17 8/17/18 12/17/18 3/14/17 10/2/...             25  
0  3/2/18 11/19/17 8/17/18 12/17/18 3/14/17 10/2/...             25  
--------------------------------------------------------------------------------
15 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.325954   9.381444  0.878580         0.0015   3/21/18       train   
0  4.721490  34.786591 -0.155305        -3.3110   3/21/18        test   

                                         train_dates  number_sample  
0  3/7/18 7/21/18 6/13/18 6/19/17 12/17/18 3/14/1...             25  
0  3/7/18 7/21/18 6/13/18 6/19/17 12/17/18 3/14/1...             25  
--------------------------------------------------------------------------------
16 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/15/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.320837   9.256482  0.873872        -0.0002   5/15/18       train   
0  4.315804  36.541904  0.338069        -0.5812   5/15/18        test   

                                         train_dates  number_sample  
0  9/10/18 3/21/18 9/4/18 2/9/17 12/22/18 5/26/18...             25  
0  9/10/18 3/21/18 9/4/18 2/9/17 12/22/18 5/26/18...             25  
--------------------------------------------------------------------------------
17 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.393971   9.962459  0.876091         0.0039   5/26/18       train   
0  3.334593  17.610959  0.085710        -1.8188   5/26/18        test   

                                         train_dates  number_sample  
0  1/4/18 3/14/17 10/29/17 2/25/17 6/13/18 2/24/1...             25  
0  1/4/18 3/14/17 10/29/17 2/25/17 6/13/18 2/24/1...             25  
--------------------------------------------------------------------------------
18 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/13/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.208591   8.449134  0.878951         0.0024   6/13/18       train   
0  3.256008  17.557686 -0.172631        -0.8043   6/13/18        test   

                                         train_dates  number_sample  
0  3/14/17 12/9/17 2/25/17 7/17/18 9/26/18 7/21/1...             25  
0  3/14/17 12/9/17 2/25/17 7/17/18 9/26/18 7/21/1...             25  
--------------------------------------------------------------------------------
19 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.305516   9.217079  0.883097         0.0051   7/17/18       train   
0  2.906577  18.180916  0.403038         1.0322   7/17/18        test   

                                         train_dates  number_sample  
0  2/24/19 5/15/18 9/4/18 1/4/18 3/21/18 7/21/18 ...             25  
0  2/24/19 5/15/18 9/4/18 1/4/18 3/21/18 7/21/18 ...             25  
--------------------------------------------------------------------------------
20 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.319495   9.225419  0.874392         0.0013   7/21/18       train   
0  4.274660  29.017269  0.073413         1.8081   7/21/18        test   

                                         train_dates  number_sample  
0  3/22/17 3/14/17 9/6/18 10/29/17 10/12/18 2/9/1...             25  
0  3/22/17 3/14/17 9/6/18 10/29/17 10/12/18 2/9/1...             25  
--------------------------------------------------------------------------------
21 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


8/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.273518   8.907006  0.872497         0.0002   8/17/18       train   
0  3.818466  25.291766  0.275242        -0.0520   8/17/18        test   

                                         train_dates  number_sample  
0  10/29/17 3/22/17 3/7/18 10/12/18 1/4/18 9/6/18...             25  
0  10/29/17 3/22/17 3/7/18 10/12/18 1/4/18 9/6/18...             25  
--------------------------------------------------------------------------------
22 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/4/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.420736  10.153275  0.870605         0.0005    9/4/18       train   
0  2.437704   9.288638  0.157168         1.2057    9/4/18        test   

                                         train_dates  number_sample  
0  3/2/17 3/14/17 2/25/17 10/27/18 3/2/18 5/26/18...             25  
0  3/2/17 3/14/17 2/25/17 10/27/18 3/2/18 5/26/18...             25  
--------------------------------------------------------------------------------
23 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/6/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.325891   9.344825  0.866281        -0.0022    9/6/18       train   
0  2.774667  15.367413  0.396340        -0.0623    9/6/18        test   

                                         train_dates  number_sample  
0  10/29/17 1/7/17 10/2/18 9/26/18 2/9/17 12/17/1...             25  
0  10/29/17 1/7/17 10/2/18 9/26/18 2/9/17 12/17/1...             25  
--------------------------------------------------------------------------------
24 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/10/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.320602   9.276785  0.874952         0.0027   9/10/18       train   
0  3.590766  21.430253  0.191333         1.4886   9/10/18        test   

                                         train_dates  number_sample  
0  1/4/18 2/24/19 6/13/18 3/22/17 9/26/18 3/14/17...             25  
0  1/4/18 2/24/19 6/13/18 3/22/17 9/26/18 3/14/17...             25  
--------------------------------------------------------------------------------
25 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.230055   8.623094  0.887535         0.0017   9/26/18       train   
0  3.746348  22.500579  0.003978         1.5636   9/26/18        test   

                                         train_dates  number_sample  
0  11/19/17 9/10/18 12/9/17 1/7/17 6/13/18 7/13/1...             25  
0  11/19/17 9/10/18 12/9/17 1/7/17 6/13/18 7/13/1...             25  
--------------------------------------------------------------------------------
26 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.274057   9.038704  0.881085         0.0012   10/2/18       train   
0  4.165513  27.481585  0.035072        -1.6636   10/2/18        test   

                                         train_dates  number_sample  
0  12/9/17 6/19/17 6/13/18 9/4/18 10/27/18 3/2/18...             25  
0  12/9/17 6/19/17 6/13/18 9/4/18 10/27/18 3/2/18...             25  
--------------------------------------------------------------------------------
27 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/12/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.314706   9.318622  0.880165         0.0038  10/12/18       train   
0  4.262476  28.631362  0.564832         0.8303  10/12/18        test   

                                         train_dates  number_sample  
0  3/22/17 9/10/18 2/24/19 10/2/18 7/17/18 2/13/1...             25  
0  3/22/17 9/10/18 2/24/19 10/2/18 7/17/18 2/13/1...             25  
--------------------------------------------------------------------------------
28 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/27/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.334180   9.458716  0.872547         0.0015  10/27/18       train   
0  4.346842  30.882947  0.535567         0.9740  10/27/18        test   

                                         train_dates  number_sample  
0  9/6/18 5/15/18 3/2/17 9/10/18 2/9/17 7/13/17 3...             25  
0  9/6/18 5/15/18 3/2/17 9/10/18 2/9/17 7/13/17 3...             25  
--------------------------------------------------------------------------------
29 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.357504   9.537863  0.856733         0.0001  12/17/18       train   
0  4.570402  34.231102  0.449137         1.1634  12/17/18        test   

                                         train_dates  number_sample  
0  3/14/17 6/19/17 11/19/17 5/15/18 12/22/18 1/7/...             25  
0  3/14/17 6/19/17 11/19/17 5/15/18 12/22/18 1/7/...             25  
--------------------------------------------------------------------------------
30 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/22/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.185330   8.279074  0.875905         0.0025  12/22/18       train   
0  4.415142  30.178561  0.450091         0.5210  12/22/18        test   

                                         train_dates  number_sample  
0  10/27/18 1/4/18 9/10/18 3/7/18 12/9/17 9/4/18 ...             25  
0  10/27/18 1/4/18 9/10/18 3/7/18 12/9/17 9/4/18 ...             25  
--------------------------------------------------------------------------------
31 /32
25


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/24/19
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.364502   9.595022  0.867885        -0.0007   2/24/19       train   
0  6.154267  60.792304  0.387501        -2.3719   2/24/19        test   

                                         train_dates  number_sample  
0  9/10/18 8/17/18 3/21/18 3/2/18 9/26/18 7/21/18...             25  
0  9/10/18 8/17/18 3/21/18 3/2/18 9/26/18 7/21/18...             25  
--------------------------------------------------------------------------------
32 /32


In [22]:
#table.to_csv('25_sample_XGB_stats.csv')
#data.to_csv('25_sample_XGB_data.csv')

In [23]:
table.groupby('data_tested').agg({'MAE':'mean', 'R2':'mean', 'Residual Mean':'mean'})

Unnamed: 0_level_0,MAE,R2,Residual Mean
data_tested,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test,4.164603,0.290361,0.075344
train,2.320303,0.873993,0.001275


In [24]:
res = data['actual'] - data['pred']
res.mean()

0.022984543286979436

### 30 Samples 

In [25]:
table = pd.DataFrame()
data = pd.DataFrame()
count = 0
for i in df['EXP'].unique():
    t,d = rf_model2(i,30)
    table = table.append(t)
    data = data.append(d)
    count += 1
    print(count,'/32')

30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/7/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.457562  10.469138  0.860387         0.0038    1/7/17       train   
0  2.844239  12.728281  0.568860         1.1870    1/7/17        test   

                                         train_dates  number_sample  
0  10/2/18 12/22/18 6/13/18 5/15/18 1/4/18 2/9/17...             30  
0  10/2/18 12/22/18 6/13/18 5/15/18 1/4/18 2/9/17...             30  
--------------------------------------------------------------------------------
1 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.442085  10.306441  0.863906         0.0059    2/9/17       train   
0  3.637721  20.856422  0.557890        -1.1957    2/9/17        test   

                                         train_dates  number_sample  
0  2/13/17 9/10/18 12/17/18 10/2/18 2/25/17 5/26/...             30  
0  2/13/17 9/10/18 12/17/18 10/2/18 2/25/17 5/26/...             30  
--------------------------------------------------------------------------------
2 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.398297   9.924875  0.860237        -0.0011   2/13/17       train   
0  6.540937  69.481479  0.139786         4.5715   2/13/17        test   

                                         train_dates  number_sample  
0  3/22/17 7/13/17 3/2/18 1/4/18 10/12/18 3/21/18...             30  
0  3/22/17 7/13/17 3/2/18 1/4/18 10/12/18 3/21/18...             30  
--------------------------------------------------------------------------------
3 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/25/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.422804  10.187373  0.865467         0.0068   2/25/17       train   
0  4.318611  29.713362  0.213679         0.0311   2/25/17        test   

                                         train_dates  number_sample  
0  3/7/18 9/10/18 9/26/18 7/17/18 12/17/18 9/6/18...             30  
0  3/7/18 9/10/18 9/26/18 7/17/18 12/17/18 9/6/18...             30  
--------------------------------------------------------------------------------
4 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.414825  10.104077  0.853934        -0.0035    3/2/17       train   
0  4.945769  38.778010  0.280332         3.2427    3/2/17        test   

                                         train_dates  number_sample  
0  9/26/18 5/26/18 8/17/18 12/22/18 12/9/17 7/21/...             30  
0  9/26/18 5/26/18 8/17/18 12/22/18 12/9/17 7/21/...             30  
--------------------------------------------------------------------------------
5 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/14/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.415425  10.136161  0.861019        -0.0020   3/14/17       train   
0  3.899302  27.397668  0.700162        -0.9655   3/14/17        test   

                                         train_dates  number_sample  
0  10/2/18 11/19/17 7/17/18 6/13/18 10/27/18 10/2...             30  
0  10/2/18 11/19/17 7/17/18 6/13/18 10/27/18 10/2...             30  
--------------------------------------------------------------------------------
6 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/22/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.426875  10.186688  0.859050         0.0045   3/22/17       train   
0  4.946494  38.541755  0.331167        -1.3018   3/22/17        test   

                                         train_dates  number_sample  
0  3/14/17 2/24/19 7/13/17 3/2/18 2/13/17 12/22/1...             30  
0  3/14/17 2/24/19 7/13/17 3/2/18 2/13/17 12/22/1...             30  
--------------------------------------------------------------------------------
7 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.423962  10.173627  0.866779         0.0041   6/19/17       train   
0  3.672123  24.486577  0.516942         0.2027   6/19/17        test   

                                         train_dates  number_sample  
0  9/10/18 1/7/17 9/4/18 7/13/17 7/21/18 10/29/17...             30  
0  9/10/18 1/7/17 9/4/18 7/13/17 7/21/18 10/29/17...             30  
--------------------------------------------------------------------------------
8 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/13/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.441385  10.286616  0.863423         0.0017   7/13/17       train   
0  3.244499  16.860658  0.349357        -0.5755   7/13/17        test   

                                         train_dates  number_sample  
0  3/21/18 2/9/17 9/26/18 2/13/17 1/7/17 3/2/18 1...             30  
0  3/21/18 2/9/17 9/26/18 2/13/17 1/7/17 3/2/18 1...             30  
--------------------------------------------------------------------------------
9 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/29/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.363447   9.657512  0.861821         0.0023  10/29/17       train   
0  6.725327  71.651395 -0.103555        -2.3036  10/29/17        test   

                                         train_dates  number_sample  
0  1/7/17 6/19/17 12/9/17 10/12/18 5/15/18 9/26/1...             30  
0  1/7/17 6/19/17 12/9/17 10/12/18 5/15/18 9/26/1...             30  
--------------------------------------------------------------------------------
10 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


11/19/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.401267  10.030907  0.865066        -0.0018  11/19/17       train   
0  4.975282  37.458482  0.478387        -0.6285  11/19/17        test   

                                         train_dates  number_sample  
0  9/26/18 1/7/17 7/13/17 12/17/18 6/13/18 3/22/1...             30  
0  9/26/18 1/7/17 7/13/17 12/17/18 6/13/18 3/22/1...             30  
--------------------------------------------------------------------------------
11 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/9/17
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.488345  10.660723  0.858661         0.0006   12/9/17       train   
0  2.418755   8.653146  0.330395        -1.3137   12/9/17        test   

                                         train_dates  number_sample  
0  2/9/17 12/22/18 7/17/18 5/15/18 2/25/17 1/4/18...             30  
0  2/9/17 12/22/18 7/17/18 5/15/18 2/25/17 1/4/18...             30  
--------------------------------------------------------------------------------
12 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


1/4/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.425619  10.191443  0.858980         0.0008    1/4/18       train   
0  4.140861  30.317358  0.740726         1.6335    1/4/18        test   

                                         train_dates  number_sample  
0  3/2/18 12/17/18 2/13/17 6/19/17 3/7/18 2/24/19...             30  
0  3/2/18 12/17/18 2/13/17 6/19/17 3/7/18 2/24/19...             30  
--------------------------------------------------------------------------------
13 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.429427  10.237933  0.858476         0.0048    3/2/18       train   
0  4.821383  39.604531  0.427527        -0.3490    3/2/18        test   

                                         train_dates  number_sample  
0  9/10/18 10/2/18 2/24/19 12/22/18 10/27/18 12/9...             30  
0  9/10/18 10/2/18 2/24/19 12/22/18 10/27/18 12/9...             30  
--------------------------------------------------------------------------------
14 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/7/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.497892  10.779654  0.857539         0.0037    3/7/18       train   
0  3.188201  15.770599  0.378384        -0.2580    3/7/18        test   

                                         train_dates  number_sample  
0  10/2/18 11/19/17 5/15/18 5/26/18 10/12/18 3/2/...             30  
0  10/2/18 11/19/17 5/15/18 5/26/18 10/12/18 3/2/...             30  
--------------------------------------------------------------------------------
15 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


3/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.466339  10.518487  0.860661         0.0031   3/21/18       train   
0  4.571947  32.839391 -0.090636        -2.9059   3/21/18        test   

                                         train_dates  number_sample  
0  10/27/18 2/9/17 9/26/18 3/22/17 7/13/17 6/13/1...             30  
0  10/27/18 2/9/17 9/26/18 3/22/17 7/13/17 6/13/1...             30  
--------------------------------------------------------------------------------
16 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/15/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.424284  10.090349  0.865511        -0.0000   5/15/18       train   
0  4.012492  33.067579  0.401004        -0.4696   5/15/18        test   

                                         train_dates  number_sample  
0  10/29/17 10/2/18 2/25/17 12/9/17 5/26/18 3/22/...             30  
0  10/29/17 10/2/18 2/25/17 12/9/17 5/26/18 3/22/...             30  
--------------------------------------------------------------------------------
17 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


5/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.402001  10.009907  0.858734         0.0067   5/26/18       train   
0  3.428978  18.439700  0.042685        -2.0000   5/26/18        test   

                                         train_dates  number_sample  
0  3/2/18 9/10/18 9/4/18 7/17/18 12/22/18 12/9/17...             30  
0  3/2/18 9/10/18 9/4/18 7/17/18 12/22/18 12/9/17...             30  
--------------------------------------------------------------------------------
18 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


6/13/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.399576   9.977465  0.865859        -0.0007   6/13/18       train   
0  3.169865  16.403106 -0.095520        -0.0501   6/13/18        test   

                                         train_dates  number_sample  
0  3/14/17 12/17/18 3/2/18 10/29/17 10/27/18 6/19...             30  
0  3/14/17 12/17/18 3/2/18 10/29/17 10/27/18 6/19...             30  
--------------------------------------------------------------------------------
19 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.454488  10.391528  0.857766         0.0013   7/17/18       train   
0  2.881805  17.245302  0.433758         0.3949   7/17/18        test   

                                         train_dates  number_sample  
0  1/7/17 9/26/18 3/22/17 9/4/18 2/24/19 6/19/17 ...             30  
0  1/7/17 9/26/18 3/22/17 9/4/18 2/24/19 6/19/17 ...             30  
--------------------------------------------------------------------------------
20 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


7/21/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.417416  10.124623  0.865628        -0.0020   7/21/18       train   
0  3.857491  24.683534  0.211799         0.9755   7/21/18        test   

                                         train_dates  number_sample  
0  9/6/18 7/17/18 5/15/18 12/9/17 7/13/17 9/4/18 ...             30  
0  9/6/18 7/17/18 5/15/18 12/9/17 7/13/17 9/4/18 ...             30  
--------------------------------------------------------------------------------
21 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


8/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.473106  10.482726  0.859485         0.0016   8/17/18       train   
0  3.667696  23.634301  0.322738        -0.1771   8/17/18        test   

                                         train_dates  number_sample  
0  2/13/17 3/2/18 3/21/18 5/15/18 7/13/17 9/4/18 ...             30  
0  2/13/17 3/2/18 3/21/18 5/15/18 7/13/17 9/4/18 ...             30  
--------------------------------------------------------------------------------
22 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/4/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.464113  10.443648  0.859856         0.0005    9/4/18       train   
0  1.931189   5.774347  0.476048        -0.4357    9/4/18        test   

                                         train_dates  number_sample  
0  12/22/18 6/13/18 3/7/18 7/13/17 3/22/17 5/26/1...             30  
0  12/22/18 6/13/18 3/7/18 7/13/17 3/22/17 5/26/1...             30  
--------------------------------------------------------------------------------
23 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/6/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.445169  10.295078  0.861187        -0.0029    9/6/18       train   
0  2.658880  14.792584  0.418921         0.2669    9/6/18        test   

                                         train_dates  number_sample  
0  3/14/17 8/17/18 6/19/17 3/7/18 1/4/18 2/13/17 ...             30  
0  3/14/17 8/17/18 6/19/17 3/7/18 1/4/18 2/13/17 ...             30  
--------------------------------------------------------------------------------
24 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/10/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.413458  10.094371  0.858642         0.0012   9/10/18       train   
0  3.516595  21.284580  0.196830         1.0637   9/10/18        test   

                                         train_dates  number_sample  
0  12/22/18 7/17/18 1/4/18 7/21/18 7/13/17 2/9/17...             30  
0  12/22/18 7/17/18 1/4/18 7/21/18 7/13/17 2/9/17...             30  
--------------------------------------------------------------------------------
25 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


9/26/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.412261  10.092509  0.867987         0.0036   9/26/18       train   
0  3.891788  24.124230 -0.067895         1.9524   9/26/18        test   

                                         train_dates  number_sample  
0  10/2/18 9/4/18 10/27/18 6/19/17 3/14/17 12/17/...             30  
0  10/2/18 9/4/18 10/27/18 6/19/17 3/14/17 12/17/...             30  
--------------------------------------------------------------------------------
26 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/2/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.370976   9.730884  0.870397        -0.0014   10/2/18       train   
0  4.258645  27.659716  0.028817        -1.9558   10/2/18        test   

                                         train_dates  number_sample  
0  6/19/17 9/10/18 11/19/17 2/13/17 1/4/18 3/14/1...             30  
0  6/19/17 9/10/18 11/19/17 2/13/17 1/4/18 3/14/1...             30  
--------------------------------------------------------------------------------
27 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/12/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.416937  10.100361  0.865559        -0.0005  10/12/18       train   
0  4.472513  30.638564  0.534325        -0.2394  10/12/18        test   

                                         train_dates  number_sample  
0  3/2/18 9/10/18 3/22/17 12/17/18 10/27/18 10/29...             30  
0  3/2/18 9/10/18 3/22/17 12/17/18 10/27/18 10/29...             30  
--------------------------------------------------------------------------------
28 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


10/27/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.443757  10.375522  0.861171         0.0017  10/27/18       train   
0  3.910022  25.689679  0.613666         0.7555  10/27/18        test   

                                         train_dates  number_sample  
0  10/29/17 1/4/18 5/15/18 9/10/18 8/17/18 3/14/1...             30  
0  10/29/17 1/4/18 5/15/18 9/10/18 8/17/18 3/14/1...             30  
--------------------------------------------------------------------------------
29 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/17/18
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.432083  10.260085  0.862073         0.0032  12/17/18       train   
0  4.255255  29.477987  0.525627         0.6585  12/17/18        test   

                                         train_dates  number_sample  
0  10/2/18 10/29/17 3/22/17 2/13/17 2/24/19 3/2/1...             30  
0  10/2/18 10/29/17 3/22/17 2/13/17 2/24/19 3/2/1...             30  
--------------------------------------------------------------------------------
30 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


12/22/18
        MAE       MSE        R2  Residual Mean test_date data_tested  \
0  2.425626  10.15709  0.863526         0.0005  12/22/18       train   
0  4.169786  27.67812  0.495654        -0.2654  12/22/18        test   

                                         train_dates  number_sample  
0  3/7/18 7/21/18 5/15/18 3/2/17 10/29/17 9/26/18...             30  
0  3/7/18 7/21/18 5/15/18 3/2/17 10/29/17 9/26/18...             30  
--------------------------------------------------------------------------------
31 /32
30


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


2/24/19
        MAE        MSE        R2  Residual Mean test_date data_tested  \
0  2.378014   9.803600  0.857731        -0.0024   2/24/19       train   
0  5.840580  53.741277  0.458542        -1.5570   2/24/19        test   

                                         train_dates  number_sample  
0  1/7/17 1/4/18 3/22/17 10/12/18 9/6/18 5/15/18 ...             30  
0  1/7/17 1/4/18 3/22/17 10/12/18 9/6/18 5/15/18 ...             30  
--------------------------------------------------------------------------------
32 /32


In [26]:
#table.to_csv('30_sample_XGB_stats.csv')
#data.to_csv('30_sample_XGB_data.csv')

In [27]:
table.groupby('data_tested').agg({'MAE':'mean', 'R2':'mean', 'Residual Mean':'mean'})

Unnamed: 0_level_0,MAE,R2,Residual Mean
data_tested,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test,4.02547,0.338013,-0.062856
train,2.427776,0.861766,0.001378


In [28]:
res = data['actual'] - data['pred']
res.mean()

-0.0997783277667618