In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor, DMatrix
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from collections import Counter

### Laod Data

In [2]:
df = pd.read_csv("15min_dateset.csv")
df.drop('Unnamed: 0', inplace = True, axis = 1)
print(len(df['EXP'].unique()))
df.head()

32


Unnamed: 0,Date,Type,from,EXP,jul_date,hour_angle,max_wind_speed,avg_wind_speed,R1,R2,...,HYBL_one,HYBL_two,HYBL_three,HYBL_four,HYBL_five,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700
0,2017-01-07 06:00:00,ANDE,MESONET,1/7/17,7,-1.570795,2.332608,1.295893,2.630564,2.322274,...,0.62923,0.903721,3.610055,5.01167,6.054984,0.739,-5000.0,40.293,23.757,16.787335
1,2017-01-07 06:15:00,ANDE,MESONET,1/7/17,7,-1.505345,1.555072,0.647947,2.055383,1.813005,...,0.748575,1.493288,3.643217,5.232982,6.394255,1.05,-5000.0,40.293,23.959,16.195689
2,2017-01-07 06:30:00,ANDE,MESONET,1/7/17,7,-1.439895,2.332608,1.360688,1.792587,1.671478,...,1.154196,1.645343,3.346854,5.054551,6.395084,1.211,-5000.0,40.293,24.118,15.636587
3,2017-01-07 06:45:00,ANDE,MESONET,1/7/17,7,-1.374446,1.555072,0.453563,1.442889,1.352497,...,2.183654,2.266223,3.550607,5.363623,6.869857,0.943,-5000.0,40.293,24.22,16.767452
4,2017-01-07 07:00:00,ANDE,MESONET,1/7/17,7,-1.308996,2.526992,1.360688,2.283417,1.677404,...,2.40729,2.633109,3.692489,5.309871,6.783159,0.751,-5000.0,40.293,24.149,17.249375


### Functions Used

In [3]:
#### Clasification results
def model_info1(x, y, test_date, data_tested):
    cm = confusion_matrix(x,y)
    acc = accuracy_score(x,y)
    cm = confusion_matrix(x,y)
    if len(cm) > 1:
        hit = cm[1][1]
        false_alarms = cm[0][1]
        misses = cm[1][0]
        no_events = cm[0][0]
        csi = hit / (false_alarms + misses + hit)
        hit_rate = hit / (hit + misses)
        false_alarm_rate = false_alarms / (hit + false_alarms)
    else:
        hit = 0
        false_alarms = 0
        misses = 0
        no_events = cm[0][0]
        csi = 0
        hit_rate =0
        false_alarm_rate = 0
    data = [{"Accuracy":acc,"Hits":hit, "False Alarms":false_alarms, 'Misses':misses,'No Events': no_events, 'CSI':csi, 'Hit Rate':hit_rate, 'False Alarm Rate':false_alarm_rate}]
    df = pd.DataFrame(data)
    df['test_date'] = test_date
    df['data_tested'] = data_tested
    if len(Counter(x)) == 1:
        l30 = Counter(x)[0]
        df['greter_than_30_knots'] = '0'
        df['less_than_30_knots'] = l30
    else:
        g30 = Counter(x)[1]
        l30 = Counter(x)[0]
        df['greter_than_30_knots'] = g30
        df['less_than_30_knots'] = l30
    return (df)



### Regression results
def model_info2(x, y,test_date,data_tested):
    mae = mean_absolute_error(x,y)
    mse = mean_squared_error(x,y)
    r2 = r2_score(x,y)
    residual_0 = x - y
    res_0 = np.array(residual_0)
    res_mean = round(res_0.mean(),4)
    data = [{"MAE":mae ,"MSE":mse, 'Residual Mean':res_mean}]
    df = pd.DataFrame(data)
    df['test_date'] = test_date
    df['data_tested'] = data_tested
    return (df)

In [5]:
le = LabelEncoder()
df['from'] = le.fit_transform(df['from'])
df.tail(2)

Unnamed: 0,Date,Type,from,EXP,jul_date,hour_angle,max_wind_speed,avg_wind_speed,R1,R2,...,HYBL_one,HYBL_two,HYBL_three,HYBL_four,HYBL_five,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700
72647,2018-07-22 05:45:00,WBOU,1,7/21/18,203,-1.636245,17.105792,8.552896,0.279731,0.693772,...,9.63431,16.364893,18.456569,21.401767,24.263238,3.607,554.661,81.379,0.706,8.964475
72648,2018-07-22 06:00:00,WBOU,1,7/21/18,203,-1.570795,17.105792,10.043173,0.231301,0.605321,...,8.939975,16.514877,18.701148,22.068564,24.935163,3.349,555.042,81.379,0.171,7.752404


In [6]:
df["max_th"] = np.where(df["max_wind_speed"] > 30, 2, np.where(df["max_wind_speed"] < 15, 0, 1))
df['max_th'].value_counts()

0    44574
1    24445
2     3630
Name: max_th, dtype: int64

### XGB basic model no grid search 

In [9]:
def xgb_model_basic(x): 
    test_1 = df[df.EXP == x]
    train_1 = df[df.EXP != x]

    train_1 = train_1.drop(['EXP', 'jul_date' ,'hour_angle', 'avg_wind_speed'], axis = 1)
    test_1 = test_1.drop(['EXP', 'jul_date' ,'hour_angle', 'avg_wind_speed'], axis = 1)

    df_0 = train_1[train_1['max_th'] == 0]
    df_1 = train_1[train_1['max_th'] == 1]
    df_2 = train_1[train_1['max_th'] == 2]

    df0_sample = df_0.sample(9000 ,random_state=19)
    df1_sample = df_1.sample(9000 ,random_state=19)
    df2_sample = df_2.sample(3000 ,random_state=19)

    df_new = pd.concat([df0_sample, df1_sample, df2_sample], axis = 0)

    x_train = df_new.drop('max_wind_speed', 1)
    x_train.drop(['Date','Type','max_th'], axis=1, inplace=True)
    y_train = df_new.max_wind_speed
    x_test = test_1.drop('max_wind_speed', 1)
    x_test_copy = x_test.copy()
    x_test.drop(['Date','Type','max_th'], axis=1, inplace=True)
    y_test = test_1.max_wind_speed


    model = XGBRegressor(booster='gbtree', objective='reg:squarederror',random_state = 19)
    model.fit(x_train, y_train)

    pred_train = model.predict(x_train)
    train_info = model_info2(y_train, pred_train, x , "train")

    y_train30 = np.where(y_train > 30, 1, 0 )
    pred_train30 = np.where(pred_train > 30, 1, 0 )
    train_info2 = model_info1(y_train30, pred_train30, x,'train')

    pred_test = model.predict(x_test)
    test_info = model_info2(y_test, pred_test, x , "test")

    y_test30 = np.where(y_test > 30, 1, 0 )
    pred_test30 = np.where(pred_test > 30, 1, 0 )
    test_info2 = model_info1(y_test30, pred_test30, x,'test')

    table = train_info.append(test_info)
    table2 = train_info2.append(test_info2)
    print(x)
    print(table)
    print("==" *10)
    print(table2)
    print('--' * 40)

    df_class = pd.DataFrame(pred_test30, columns=['Class_results'])
    df_class['Class_results'] = np.where(df_class['Class_results'] == 1, 'Greater than 30 knots', 'Less than 30 knots')
    df_class = df_class.reset_index()
    df_class.drop('index', axis=1, inplace=True)
    df4 = pd.DataFrame(y_test)
    df4 = df4.rename(columns={'max_wind_speed': 'actual'})
    df4 = df4.reset_index()
    df4.drop('index', axis=1, inplace=True)
    x_test_copy = x_test_copy.reset_index()
    x_test_copy.drop("index", axis=1, inplace=True)
    df5 = pd.concat([x_test_copy, df4, df_class], axis=1)
    pred_df = pd.DataFrame(pred_test, columns=["pred"])
    df6 = pd.concat([df5, pred_df], axis=1)
    df6['From'] = np.where(df6['from'] == 1, 'Mesonet', 'ASOS')
    df6.drop(['from', 'max_th'], inplace=True, axis=1)
    df6['test_date'] = x
    return(table, table2, df6)

In [10]:
table = pd.DataFrame()
table_class = pd.DataFrame()
data = pd.DataFrame()
for i in df['EXP'].unique():
    t,tc,d = xgb_model_basic(i)
    table = table.append(t)
    table_class = table_class.append(tc)
    data = data.append(d)

1/7/17
        MAE        MSE  Residual Mean test_date data_tested
0  3.837926  25.355370         0.0006    1/7/17       train
0  2.671488  11.408716         0.7114    1/7/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.911762  1438           291    1562      17709  0.436949  0.479333   
0  1.000000     0             0       0       1959  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.168305    1/7/17       train                 3000   
0          0.000000    1/7/17        test                    0   

   less_than_30_knots  
0               18000  
0                1959  
--------------------------------------------------------------------------------
2/9/17
        MAE        MSE  Residual Mean test_date data_tested
0  3.833782  25.200074         0.0007    2/9/17       train
0  3.841905  23.043714        -1.7483    2/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events 

12/9/17
        MAE        MSE  Residual Mean test_date data_tested
0  3.836094  25.262697         0.0008   12/9/17       train
0  2.332064   7.812781        -1.2583   12/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.910714  1471           346    1529      17654  0.439629  0.490333   
0  1.000000     0             0       0       2531  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.190424   12/9/17       train                 3000   
0          0.000000   12/9/17        test                    0   

   less_than_30_knots  
0               18000  
0                2531  
--------------------------------------------------------------------------------
1/4/18
        MAE        MSE  Residual Mean test_date data_tested
0  3.799329  24.919283         0.0007    1/4/18       train
0  4.147786  28.536782         1.1610    1/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events

9/4/18
        MAE        MSE  Residual Mean test_date data_tested
0  3.836855  25.160043         0.0008    9/4/18       train
0  2.234856   7.470884        -0.9723    9/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0   0.91281  1481           312    1519      17688  0.447162  0.493667   
0   1.00000     0             0       0       2133  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0           0.17401    9/4/18       train                 3000   
0           0.00000    9/4/18        test                    0   

   less_than_30_knots  
0               18000  
0                2133  
--------------------------------------------------------------------------------
9/6/18
        MAE        MSE  Residual Mean test_date data_tested
0  3.815609  24.843172         0.0007    9/6/18       train
0  2.923043  17.180520         0.6967    9/6/18        test
   Accuracy  Hits  False Alarms  Misses  No Events 

In [11]:
#table.to_csv('XGB_basic_regression_results.csv')
table[table['data_tested'] == 'test']

Unnamed: 0,MAE,MSE,Residual Mean,test_date,data_tested
0,2.671488,11.408716,0.7114,1/7/17,test
0,3.841905,23.043714,-1.7483,2/9/17,test
0,5.510579,47.90574,1.6116,2/13/17,test
0,4.536429,32.268627,-1.1813,2/25/17,test
0,4.659851,34.211659,1.2587,3/2/17,test
0,3.711668,23.408129,-1.3191,3/14/17,test
0,5.539398,45.752869,-2.6382,3/22/17,test
0,3.677698,24.446202,-0.2969,6/19/17,test
0,3.399815,18.084509,-1.399,7/13/17,test
0,6.965487,75.055267,-3.5314,10/29/17,test


In [12]:
#table_class.to_csv('XGB_basic_classfication_results.csv')
table_class[table_class['data_tested'] == 'test']

Unnamed: 0,Accuracy,Hits,False Alarms,Misses,No Events,CSI,Hit Rate,False Alarm Rate,test_date,data_tested,greter_than_30_knots,less_than_30_knots
0,1.0,0,0,0,1959,0.0,0.0,0.0,1/7/17,test,0,1959
0,0.968947,18,29,35,1979,0.219512,0.339623,0.617021,2/9/17,test,53,2008
0,0.78743,158,33,383,1383,0.275261,0.292052,0.172775,2/13/17,test,541,1416
0,0.98257,0,4,31,1973,0.0,0.0,1.0,2/25/17,test,31,1977
0,0.790762,229,53,400,1483,0.335777,0.36407,0.187943,3/2/17,test,629,1536
0,0.918894,127,119,54,1833,0.423333,0.701657,0.48374,3/14/17,test,181,1952
0,0.77403,258,326,163,1417,0.345382,0.612827,0.558219,3/22/17,test,421,1743
0,0.980247,1,3,45,2381,0.020408,0.021739,0.75,6/19/17,test,46,2384
0,0.999154,0,0,2,2361,0.0,0.0,,7/13/17,test,2,2361
0,0.945171,23,42,88,2218,0.150327,0.207207,0.646154,10/29/17,test,111,2260


In [13]:
#data.to_csv('XGB_basic_data_results.csv')
data.head(2)

Unnamed: 0,Date,Type,R1,R2,Pressure_reduced_to_MSL_.Pa..0.MSL,Derived_radar_reflectivity_.dB..1.HYBL,u.component_of_wind_.m.s..85000.ISBL,v.component_of_wind_.m.s..85000.ISBL,wind_speed_85000,wind_shear_85000,...,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700,actual,Class_results,pred,From,test_date
0,2017-01-07 06:00:00,ANDE,2.630564,2.322274,102879.7,0.0,5.447,-1.457,5.638498,5.269992,...,0.739,-5000.0,40.293,23.757,16.787335,2.332608,Less than 30 knots,5.746608,Mesonet,1/7/17
1,2017-01-07 06:15:00,ANDE,2.055383,1.813005,102951.0,0.0,5.629,-1.464,5.816265,5.773324,...,1.05,-5000.0,40.293,23.959,16.195689,1.555072,Less than 30 knots,5.746608,Mesonet,1/7/17


### Grid Search Model

In [20]:
# df_0 = df2[df2['max_th'] == 0]
# df_1 = df2[df2['max_th'] == 1]
# df_2 = df2[df2['max_th'] == 2]

# df0_sample = df_0.sample(9000 ,random_state=19)
# df1_sample = df_1.sample(9000 ,random_state=19)
# df2_sample = df_2.sample(3000 ,random_state=19)

# df_gs = pd.concat([df0_sample, df1_sample, df2_sample], axis = 0)
# df_gs['max_th'].value_counts()

1    9000
0    9000
2    3000
Name: max_th, dtype: int64

In [21]:
# X = df_gs.drop(['Date','max_wind_speed', 'Type', 'EXP', 'max_th'], 1)
# Y = df_gs.max_wind_speed.values
# X

Unnamed: 0,from,R1,R2,Pressure_reduced_to_MSL_.Pa..0.MSL,Derived_radar_reflectivity_.dB..1.HYBL,u.component_of_wind_.m.s..85000.ISBL,v.component_of_wind_.m.s..85000.ISBL,wind_speed_85000,wind_shear_85000,Wind_speed_.gust._.m.s..0.SFC,HYBL_one,HYBL_two,HYBL_three,HYBL_four,HYBL_five,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700
10628,1,3.261219,6.132343,102305.945,0.000,7.229,-2.143,7.539953,11.683057,13.157,4.774199,5.474310,7.426896,8.629481,7.586525,0.079,9613.756,87.831,-0.913,2.643295
9344,1,0.152229,0.247941,100969.600,33.099,-18.893,-8.557,20.740485,12.970719,35.733,8.706624,13.847472,17.020268,19.371065,21.503364,-1.993,2451.432,36.072,9.649,13.108330
56227,1,0.318725,0.382697,100941.227,0.000,15.297,4.152,15.850467,11.993877,18.805,5.420390,7.235121,8.691705,10.875789,13.237866,-3.119,-5000.000,86.181,-0.045,2.921928
70523,1,7.627781,3.370534,100430.800,27.962,-8.825,-11.532,14.521283,13.937777,23.745,1.787987,4.414027,10.467418,13.408971,14.921248,0.387,496.275,38.977,17.013,14.884441
48479,1,168.072532,118.652089,101041.789,0.000,6.342,-0.381,6.353434,3.322680,6.390,3.299620,3.346181,4.309025,6.146063,7.883806,0.485,5767.350,44.053,18.313,16.078714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68215,1,1.037844,1.485693,99718.380,27.139,-13.345,-17.285,21.837130,15.755341,35.936,8.856065,17.014168,18.759977,20.652048,22.376745,-3.191,474.611,43.724,18.400,20.115060
48516,1,0.256680,0.386990,99734.828,17.453,-4.688,10.497,11.496276,18.281077,35.772,9.059464,15.681797,18.568731,20.858800,22.640065,3.231,266.416,41.801,-0.042,11.009412
5065,1,0.662720,0.743379,98702.180,33.386,-30.792,-20.424,36.949791,31.360209,47.098,9.966333,21.252886,23.848851,26.730762,29.941884,-0.986,559.418,41.206,8.700,2.228930
41001,0,0.012512,0.019735,100375.700,0.000,25.212,0.160,25.212508,16.084006,31.691,9.129168,19.528904,22.060336,23.619324,24.663153,-5.191,-5000.000,32.040,19.561,13.250822


In [22]:
# from sklearn.model_selection import GridSearchCV
# pram = {'subsample': [0.6, 0.8, 1.0],
#         'colsample_bytree': [0.6, 0.8, 1.0],
#         'max_depth': [3, 4, 5],
#         'learning_rate': [.01,0.1,0.3],
#         'n_estimators': [100,250,500],
#        'gamma': [0,0.3,0.7,1]}
# clf = GridSearchCV(XGBRegressor(booster='gbtree',objective= 'reg:squarederror' , random_state=19), pram, cv=3, n_jobs=-1, verbose=2, scoring='neg_mean_absolute_error')
# clf.fit(X,Y)

Fitting 3 folds for each of 972 candidates, totalling 2916 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  7.8min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed: 13.9min
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed: 22.1min
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed: 33.5min
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed: 47.4min
[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed: 65.7min
[Parallel(n_jobs=-1)]: Done 2916 out of 2916 | elapsed: 76.5min finished


GridSearchCV(cv=3, error_score=nan,
             estimator=XGBRegressor(base_score=0.5, booster='gbtree',
                                    colsample_bylevel=1, colsample_bynode=1,
                                    colsample_bytree=1, gamma=0,
                                    importance_type='gain', learning_rate=0.1,
                                    max_delta_step=0, max_depth=3,
                                    min_child_weight=1, missing=None,
                                    n_estimators=100, n_jobs=1, nthread=None,
                                    objective='reg:squarederror',
                                    random_state=19, reg_alpha...
                                    scale_pos_weight=1, seed=None, silent=None,
                                    subsample=1, verbosity=1),
             iid='deprecated', n_jobs=-1,
             param_grid={'colsample_bytree': [0.6, 0.8, 1.0],
                         'gamma': [0, 0.3, 0.7, 1],
                         'l

In [23]:
# clf.best_score_, clf.best_params_

(-4.4978801590795445,
 {'colsample_bytree': 1.0,
  'gamma': 0.3,
  'learning_rate': 0.1,
  'max_depth': 5,
  'n_estimators': 500,
  'subsample': 0.6})

In [14]:
def xgb_model(x): 
    test_1 = df[df.EXP == x]
    train_1 = df[df.EXP != x]

    train_1 = train_1.drop(['EXP', 'jul_date' ,'hour_angle', 'avg_wind_speed'], axis = 1)
    test_1 = test_1.drop(['EXP', 'jul_date' ,'hour_angle', 'avg_wind_speed'], axis = 1)

    df_0 = train_1[train_1['max_th'] == 0]
    df_1 = train_1[train_1['max_th'] == 1]
    df_2 = train_1[train_1['max_th'] == 2]

    df0_sample = df_0.sample(9000 ,random_state=19)
    df1_sample = df_1.sample(9000 ,random_state=19)
    df2_sample = df_2.sample(3000 ,random_state=19)

    df_new = pd.concat([df0_sample, df1_sample, df2_sample], axis = 0)

    x_train = df_new.drop('max_wind_speed', 1)
    x_train.drop(['Date','Type','max_th'], axis=1, inplace=True)
    y_train = df_new.max_wind_speed
    x_test = test_1.drop('max_wind_speed', 1)
    x_test_copy = x_test.copy()
    x_test.drop(['Date','Type','max_th'], axis=1, inplace=True)
    y_test = test_1.max_wind_speed

    model = XGBRegressor(booster='gbtree', objective='reg:squarederror', n_estimators = 500 ,
                         colsample_bytree = 1.0,  gamma = 0.3, max_depth = 5, 
                         subsample = 0.6, random_state = 19)
    model.fit(x_train, y_train)

    pred_train = model.predict(x_train)
    train_info = model_info2(y_train, pred_train, x , "train")

    y_train30 = np.where(y_train > 30, 1, 0 )
    pred_train30 = np.where(pred_train > 30, 1, 0 )
    train_info2 = model_info1(y_train30, pred_train30, x,'train')

    pred_test = model.predict(x_test)
    test_info = model_info2(y_test, pred_test, x , "test")

    y_test30 = np.where(y_test > 30, 1, 0 )
    pred_test30 = np.where(pred_test > 30, 1, 0 )
    test_info2 = model_info1(y_test30, pred_test30, x,'test')

    table = train_info.append(test_info)
    table2 = train_info2.append(test_info2)
    print(x)
    print(table)
    print("==" *10)
    print(table2)
    print('--' * 40)

    df_class = pd.DataFrame(pred_test30, columns=['Class_results'])
    df_class['Class_results'] = np.where(df_class['Class_results'] == 1, 'Greater than 30 knots', 'Less than 30 knots')
    df_class = df_class.reset_index()
    df_class.drop('index', axis=1, inplace=True)
    df4 = pd.DataFrame(y_test)
    df4 = df4.rename(columns={'max_wind_speed': 'actual'})
    df4 = df4.reset_index()
    df4.drop('index', axis=1, inplace=True)
    x_test_copy = x_test_copy.reset_index()
    x_test_copy.drop("index", axis=1, inplace=True)
    df5 = pd.concat([x_test_copy, df4, df_class], axis=1)
    pred_df = pd.DataFrame(pred_test, columns=["pred"])
    df6 = pd.concat([df5, pred_df], axis=1)
    df6['From'] = np.where(df6['from'] == 1, 'Mesonet', 'ASOS')
    df6.drop(['from', 'max_th'], inplace=True, axis=1)
    df6['test_date'] = x
    return(table, table2, df6)

In [15]:
table2 = pd.DataFrame()
table_class2 = pd.DataFrame()
data2 = pd.DataFrame()
for i in df['EXP'].unique():
    t,tc,d = xgb_model(i)
    table2 = table2.append(t)
    table_class2 = table_class2.append(tc)
    data2 = data2.append(d)

1/7/17
        MAE        MSE  Residual Mean test_date data_tested
0  2.243412   8.503966         0.0009    1/7/17       train
0  2.790713  12.718934         0.4276    1/7/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.953381  2239           218     761      17782  0.695774  0.746333   
0  0.999490     0             1       0       1958  0.000000       NaN   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.088726    1/7/17       train                 3000   
0          1.000000    1/7/17        test                    0   

   less_than_30_knots  
0               18000  
0                1959  
--------------------------------------------------------------------------------
2/9/17
        MAE        MSE  Residual Mean test_date data_tested
0  2.239793   8.466018         0.0036    2/9/17       train
0  3.905667  24.057773        -1.8967    2/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events 

12/9/17
        MAE       MSE  Residual Mean test_date data_tested
0  2.254458  8.583737        -0.0004   12/9/17       train
0  2.289846  8.130250        -0.7872   12/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events      CSI  Hit Rate  \
0  0.952667  2238           232     762      17768  0.69245     0.746   
0  1.000000     0             0       0       2531  0.00000     0.000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.093927   12/9/17       train                 3000   
0          0.000000   12/9/17        test                    0   

   less_than_30_knots  
0               18000  
0                2531  
--------------------------------------------------------------------------------
1/4/18
        MAE        MSE  Residual Mean test_date data_tested
0  2.237022   8.500076        -0.0017    1/4/18       train
0  3.856672  25.918970         1.2393    1/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events      

9/4/18
        MAE       MSE  Residual Mean test_date data_tested
0  2.253980  8.610888          0.000    9/4/18       train
0  1.984375  6.238177         -0.017    9/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.952238  2229           232     771      17768  0.689666     0.743   
0  1.000000     0             0       0       2133  0.000000     0.000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.094271    9/4/18       train                 3000   
0          0.000000    9/4/18        test                    0   

   less_than_30_knots  
0               18000  
0                2133  
--------------------------------------------------------------------------------
9/6/18
        MAE        MSE  Residual Mean test_date data_tested
0  2.223098   8.350257         0.0007    9/6/18       train
0  2.903912  16.401755         0.6879    9/6/18        test
   Accuracy  Hits  False Alarms  Misses  No Events    

In [16]:
#table2.to_csv('XGB_search_regression_results.csv')
table2[table2['data_tested'] == 'test']

Unnamed: 0,MAE,MSE,Residual Mean,test_date,data_tested
0,2.790713,12.718934,0.4276,1/7/17,test
0,3.905667,24.057773,-1.8967,2/9/17,test
0,6.230148,63.353309,4.0128,2/13/17,test
0,4.434021,31.5299,-1.0737,2/25/17,test
0,4.901993,38.299254,2.5105,3/2/17,test
0,4.025611,28.229662,-1.3765,3/14/17,test
0,5.041289,39.653989,-1.6675,3/22/17,test
0,3.863309,27.318058,-0.5419,6/19/17,test
0,3.335722,17.409224,-1.0595,7/13/17,test
0,6.657792,68.754617,-3.0417,10/29/17,test


In [17]:
#table_class2.to_csv('XGB_search_classification_results.csv')
table_class2[table_class2['data_tested'] == 'test']

Unnamed: 0,Accuracy,Hits,False Alarms,Misses,No Events,CSI,Hit Rate,False Alarm Rate,test_date,data_tested,greter_than_30_knots,less_than_30_knots
0,0.99949,0,1,0,1958,0.0,,1.0,1/7/17,test,0,1959
0,0.967492,30,44,23,1964,0.309278,0.566038,0.594595,2/9/17,test,53,2008
0,0.769034,105,16,436,1400,0.18851,0.194085,0.132231,2/13/17,test,541,1416
0,0.98257,0,4,31,1973,0.0,0.0,1.0,2/25/17,test,31,1977
0,0.78291,192,33,437,1503,0.29003,0.305246,0.146667,3/2/17,test,629,1536
0,0.905767,121,141,60,1811,0.375776,0.668508,0.538168,3/14/17,test,181,1952
0,0.803604,229,233,192,1510,0.350153,0.543943,0.504329,3/22/17,test,421,1743
0,0.978189,5,12,41,2372,0.086207,0.108696,0.705882,6/19/17,test,46,2384
0,0.999154,0,0,2,2361,0.0,0.0,,7/13/17,test,2,2361
0,0.943906,24,46,87,2214,0.152866,0.216216,0.657143,10/29/17,test,111,2260


In [18]:
#data2.to_csv('XGB_search_data.csv')
data2.head()

Unnamed: 0,Date,Type,R1,R2,Pressure_reduced_to_MSL_.Pa..0.MSL,Derived_radar_reflectivity_.dB..1.HYBL,u.component_of_wind_.m.s..85000.ISBL,v.component_of_wind_.m.s..85000.ISBL,wind_speed_85000,wind_shear_85000,...,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700,actual,Class_results,pred,From,test_date
0,2017-01-07 06:00:00,ANDE,2.630564,2.322274,102879.7,0.0,5.447,-1.457,5.638498,5.269992,...,0.739,-5000.0,40.293,23.757,16.787335,2.332608,Less than 30 knots,2.168547,Mesonet,1/7/17
1,2017-01-07 06:15:00,ANDE,2.055383,1.813005,102951.0,0.0,5.629,-1.464,5.816265,5.773324,...,1.05,-5000.0,40.293,23.959,16.195689,1.555072,Less than 30 knots,2.439883,Mesonet,1/7/17
2,2017-01-07 06:30:00,ANDE,1.792587,1.671478,102890.6,0.0,5.653,-1.56,5.8643,6.23214,...,1.211,-5000.0,40.293,24.118,15.636587,2.332608,Less than 30 knots,2.84316,Mesonet,1/7/17
3,2017-01-07 06:45:00,ANDE,1.442889,1.352497,102931.0,0.0,5.692,-2.445,6.194908,6.806583,...,0.943,-5000.0,40.293,24.22,16.767452,1.555072,Less than 30 knots,3.277068,Mesonet,1/7/17
4,2017-01-07 07:00:00,ANDE,2.283417,1.677404,102882.5,0.0,5.226,-3.06,6.055962,5.554556,...,0.751,-5000.0,40.293,24.149,17.249375,2.526992,Less than 30 knots,3.706889,Mesonet,1/7/17
