In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from collections import Counter

### Load Data

In [2]:
df = pd.read_csv("15min_dateset.csv")
df.drop('Unnamed: 0', inplace = True, axis = 1)
print(len(df['EXP'].unique()))
df.head()

32


Unnamed: 0,Date,Type,from,EXP,jul_date,hour_angle,max_wind_speed,avg_wind_speed,R1,R2,...,HYBL_one,HYBL_two,HYBL_three,HYBL_four,HYBL_five,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700
0,2017-01-07 06:00:00,ANDE,MESONET,1/7/17,7,-1.570795,2.332608,1.295893,2.630564,2.322274,...,0.62923,0.903721,3.610055,5.01167,6.054984,0.739,-5000.0,40.293,23.757,16.787335
1,2017-01-07 06:15:00,ANDE,MESONET,1/7/17,7,-1.505345,1.555072,0.647947,2.055383,1.813005,...,0.748575,1.493288,3.643217,5.232982,6.394255,1.05,-5000.0,40.293,23.959,16.195689
2,2017-01-07 06:30:00,ANDE,MESONET,1/7/17,7,-1.439895,2.332608,1.360688,1.792587,1.671478,...,1.154196,1.645343,3.346854,5.054551,6.395084,1.211,-5000.0,40.293,24.118,15.636587
3,2017-01-07 06:45:00,ANDE,MESONET,1/7/17,7,-1.374446,1.555072,0.453563,1.442889,1.352497,...,2.183654,2.266223,3.550607,5.363623,6.869857,0.943,-5000.0,40.293,24.22,16.767452
4,2017-01-07 07:00:00,ANDE,MESONET,1/7/17,7,-1.308996,2.526992,1.360688,2.283417,1.677404,...,2.40729,2.633109,3.692489,5.309871,6.783159,0.751,-5000.0,40.293,24.149,17.249375


### Function used

In [3]:
#### Clasification results
def model_info1(x, y, test_date, data_tested):
    cm = confusion_matrix(x,y)
    acc = accuracy_score(x,y)
    cm = confusion_matrix(x,y)
    if len(cm) > 1:
        hit = cm[1][1]
        false_alarms = cm[0][1]
        misses = cm[1][0]
        no_events = cm[0][0]
        csi = hit / (false_alarms + misses + hit)
        hit_rate = hit / (hit + misses)
        false_alarm_rate = false_alarms / (hit + false_alarms)
    else:
        hit = 0
        false_alarms = 0
        misses = 0
        no_events = cm[0][0]
        csi = 0
        hit_rate =0
        false_alarm_rate = 0
    data = [{"Accuracy":acc,"Hits":hit, "False Alarms":false_alarms, 'Misses':misses,'No Events': no_events, 'CSI':csi, 'Hit Rate':hit_rate, 'False Alarm Rate':false_alarm_rate}]
    df = pd.DataFrame(data)
    df['test_date'] = test_date
    df['data_tested'] = data_tested
    if len(Counter(x)) == 1:
        l30 = Counter(x)[0]
        df['greter_than_30_knots'] = '0'
        df['less_than_30_knots'] = l30
    else:
        g30 = Counter(x)[1]
        l30 = Counter(x)[0]
        df['greter_than_30_knots'] = g30
        df['less_than_30_knots'] = l30
    return (df)



### Regression results
def model_info2(x, y,test_date,data_tested):
    mae = mean_absolute_error(x,y)
    mse = mean_squared_error(x,y)
    r2 = r2_score(x,y)
    residual_0 = x - y
    res_0 = np.array(residual_0)
    res_mean = round(res_0.mean(),4)
    data = [{"MAE":mae ,"MSE":mse, 'Residual Mean':res_mean}]
    df = pd.DataFrame(data)
    df['test_date'] = test_date
    df['data_tested'] = data_tested
    return (df)

In [4]:
le = LabelEncoder()
df['from'] = le.fit_transform(df['from'])
df.tail(2)

Unnamed: 0,Date,Type,from,EXP,jul_date,hour_angle,max_wind_speed,avg_wind_speed,R1,R2,...,HYBL_one,HYBL_two,HYBL_three,HYBL_four,HYBL_five,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700
72647,2018-07-22 05:45:00,WBOU,1,7/21/18,203,-1.636245,17.105792,8.552896,0.279731,0.693772,...,9.63431,16.364893,18.456569,21.401767,24.263238,3.607,554.661,81.379,0.706,8.964475
72648,2018-07-22 06:00:00,WBOU,1,7/21/18,203,-1.570795,17.105792,10.043173,0.231301,0.605321,...,8.939975,16.514877,18.701148,22.068564,24.935163,3.349,555.042,81.379,0.171,7.752404


In [5]:
df["max_th"] = np.where(df["max_wind_speed"] > 30, 2, np.where(df["max_wind_speed"] < 15, 0, 1))
df['max_th'].value_counts()

0    44574
1    24445
2     3630
Name: max_th, dtype: int64

### basic model no grid search

In [6]:
def rf_model_basic(x): 
    test_1 = df[df.EXP == x]
    train_1 = df[df.EXP != x]

    train_1 = train_1.drop(['EXP','jul_date','hour_angle','avg_wind_speed'], axis = 1)

    test_1 = test_1.drop(['EXP','jul_date','hour_angle','avg_wind_speed'], axis = 1)

    df_0 = train_1[train_1['max_th'] == 0]
    df_1 = train_1[train_1['max_th'] == 1]
    df_2 = train_1[train_1['max_th'] == 2]

    df0_sample = df_0.sample(9000 ,random_state=19)
    df1_sample = df_1.sample(9000 ,random_state=19)
    df2_sample = df_2.sample(3000 ,random_state=19)

    df_new = pd.concat([df0_sample, df1_sample, df2_sample], axis = 0)

    x_train = df_new.drop('max_wind_speed', 1)
    x_train.drop(['Date','Type','max_th'], axis=1, inplace=True)
    y_train = df_new.max_wind_speed
    x_test = test_1.drop('max_wind_speed', 1)
    x_test_copy = x_test.copy()
    x_test.drop(['Date','Type','max_th'], axis=1, inplace=True)
    y_test = test_1.max_wind_speed


    model = RandomForestRegressor(random_state = 19)
    model.fit(x_train, y_train)

    pred_train = model.predict(x_train)
    train_info = model_info2(y_train, pred_train, x , "train")

    y_train30 = np.where(y_train > 30, 1, 0 )
    pred_train30 = np.where(pred_train > 30, 1, 0 )
    train_info2 = model_info1(y_train30, pred_train30, x,'train')

    pred_test = model.predict(x_test)
    test_info = model_info2(y_test, pred_test, x , "test")

    y_test30 = np.where(y_test > 30, 1, 0 )
    pred_test30 = np.where(pred_test > 30, 1, 0 )
    test_info2 = model_info1(y_test30, pred_test30, x,'test')

    table = train_info.append(test_info)
    table2 = train_info2.append(test_info2)
    print(x)
    print(table)
    print("==" *10)
    print(table2)
    print('--' * 40)

    df_class = pd.DataFrame(pred_test30, columns=['Class_results'])
    df_class['Class_results'] = np.where(df_class['Class_results'] == 1, 'Greater than 30 knots', 'Less than 30 knots')
    df_class = df_class.reset_index()
    df_class.drop('index', axis=1, inplace=True)
    df4 = pd.DataFrame(y_test)
    df4 = df4.rename(columns={'max_wind_speed': 'actual'})
    df4 = df4.reset_index()
    df4.drop('index', axis=1, inplace=True)
    x_test_copy = x_test_copy.reset_index()
    x_test_copy.drop("index", axis=1, inplace=True)
    df5 = pd.concat([x_test_copy, df4, df_class], axis=1)
    pred_df = pd.DataFrame(pred_test, columns=["pred"])
    df6 = pd.concat([df5, pred_df], axis=1)
    df6['From'] = np.where(df6['from'] == 1, 'Mesonet', 'ASOS')
    df6.drop(['from', 'max_th'], inplace=True, axis=1)
    df6['test_date'] = x
    return(table, table2, df6)

In [7]:
table = pd.DataFrame()
table_class = pd.DataFrame()
data = pd.DataFrame()
for i in df['EXP'].unique():
    t,tc,d = rf_model_basic(i)
    table = table.append(t)
    table_class = table_class.append(tc)
    data = data.append(d)

1/7/17
        MAE        MSE  Residual Mean test_date data_tested
0  1.171080   2.594524        -0.0305    1/7/17       train
0  2.628188  10.909912         0.1850    1/7/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.972905  2545           114     455      17886  0.817277  0.848333   
0  1.000000     0             0       0       1959  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.042873    1/7/17       train                 3000   
0          0.000000    1/7/17        test                    0   

   less_than_30_knots  
0               18000  
0                1959  
--------------------------------------------------------------------------------
2/9/17
        MAE        MSE  Residual Mean test_date data_tested
0  1.173478   2.611868        -0.0343    2/9/17       train
0  3.814669  22.877609        -1.9157    2/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events 

12/9/17
        MAE       MSE  Residual Mean test_date data_tested
0  1.160522  2.546377        -0.0324   12/9/17       train
0  2.391919  8.554695        -1.3047   12/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.973333  2560           120     440      17880  0.820513  0.853333   
0  1.000000     0             0       0       2531  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.044776   12/9/17       train                 3000   
0          0.000000   12/9/17        test                    0   

   less_than_30_knots  
0               18000  
0                2531  
--------------------------------------------------------------------------------
1/4/18
        MAE        MSE  Residual Mean test_date data_tested
0  1.170249   2.622783        -0.0304    1/4/18       train
0  4.007853  27.875091         1.5334    1/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events   

9/4/18
        MAE       MSE  Residual Mean test_date data_tested
0  1.161735  2.529966        -0.0366    9/4/18       train
0  2.065784  6.206311        -0.6805    9/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.974238  2569           110     431      17890  0.826045  0.856333   
0  1.000000     0             0       0       2133  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0           0.04106    9/4/18       train                 3000   
0           0.00000    9/4/18        test                    0   

   less_than_30_knots  
0               18000  
0                2133  
--------------------------------------------------------------------------------
9/6/18
        MAE        MSE  Residual Mean test_date data_tested
0  1.149886   2.477512        -0.0267    9/6/18       train
0  2.782932  15.404184         0.1646    9/6/18        test
   Accuracy  Hits  False Alarms  Misses  No Events    

In [8]:
#table.to_csv('RF_basic_regression_results.csv')
table[table['data_tested'] == 'test']

Unnamed: 0,MAE,MSE,Residual Mean,test_date,data_tested
0,2.628188,10.909912,0.185,1/7/17,test
0,3.814669,22.877609,-1.9157,2/9/17,test
0,5.606519,50.979304,1.9197,2/13/17,test
0,4.624652,34.385473,-1.2595,2/25/17,test
0,4.715084,35.216118,1.5366,3/2/17,test
0,3.624915,22.777126,-0.8829,3/14/17,test
0,4.817011,36.179686,-0.8901,3/22/17,test
0,3.98114,27.480078,-0.2523,6/19/17,test
0,3.51482,18.979531,-1.521,7/13/17,test
0,6.764024,70.11462,-3.5388,10/29/17,test


In [9]:
#table_class.to_csv('RF_basic_classfication_results.csv')
table_class[table_class['data_tested'] == 'test']

Unnamed: 0,Accuracy,Hits,False Alarms,Misses,No Events,CSI,Hit Rate,False Alarm Rate,test_date,data_tested,greter_than_30_knots,less_than_30_knots
0,1.0,0,0,0,1959,0.0,0.0,0.0,1/7/17,test,0,1959
0,0.973799,35,36,18,1972,0.393258,0.660377,0.507042,2/9/17,test,53,2008
0,0.784875,152,32,389,1384,0.265271,0.280961,0.173913,2/13/17,test,541,1416
0,0.98257,0,4,31,1973,0.0,0.0,1.0,2/25/17,test,31,1977
0,0.797229,251,61,378,1475,0.363768,0.399046,0.195513,3/2/17,test,629,1536
0,0.930614,82,49,99,1903,0.356522,0.453039,0.374046,3/14/17,test,181,1952
0,0.840573,150,74,271,1669,0.30303,0.356295,0.330357,3/22/17,test,421,1743
0,0.979835,1,4,45,2380,0.02,0.021739,0.8,6/19/17,test,46,2384
0,0.999154,0,0,2,2361,0.0,0.0,,7/13/17,test,2,2361
0,0.936736,10,49,101,2211,0.0625,0.09009,0.830508,10/29/17,test,111,2260


In [18]:
#data.to_csv('RF_basic_data_results.csv')
data.head(2)

Unnamed: 0,Date,Type,R1,R2,Pressure_reduced_to_MSL_.Pa..0.MSL,Derived_radar_reflectivity_.dB..1.HYBL,u.component_of_wind_.m.s..85000.ISBL,v.component_of_wind_.m.s..85000.ISBL,wind_speed_85000,wind_shear_85000,...,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700,actual,Class_results,pred,From,test_date
0,2017-01-07 06:00:00,ANDE,2.630564,2.322274,102879.7,0.0,5.447,-1.457,5.638498,5.269992,...,0.739,-5000.0,40.293,23.757,16.787335,2.332608,Less than 30 knots,3.284963,Mesonet,1/7/17
1,2017-01-07 06:15:00,ANDE,2.055383,1.813005,102951.0,0.0,5.629,-1.464,5.816265,5.773324,...,1.05,-5000.0,40.293,23.959,16.195689,1.555072,Less than 30 knots,3.52438,Mesonet,1/7/17


### Random Forest Grid Searched Model

In [26]:
# df_0 = df2[df2['max_th'] == 0]
# df_1 = df2[df2['max_th'] == 1]
# df_2 = df2[df2['max_th'] == 2]

# df0_sample = df_0.sample(9000 ,random_state=19)
# df1_sample = df_1.sample(9000 ,random_state=19)
# df2_sample = df_2.sample(3000 ,random_state=19)

# df_gs = pd.concat([df0_sample, df1_sample, df2_sample], axis = 0)
# df_gs['max_th'].value_counts()

1    9000
0    9000
2    3000
Name: max_th, dtype: int64

In [33]:
# X = df_gs.drop(['Date','max_wind_speed', 'Type', 'EXP', 'max_th'], 1)
# Y = df_gs.max_wind_speed.values
# X

Unnamed: 0,from,R1,R2,Pressure_reduced_to_MSL_.Pa..0.MSL,Derived_radar_reflectivity_.dB..1.HYBL,u.component_of_wind_.m.s..85000.ISBL,v.component_of_wind_.m.s..85000.ISBL,wind_speed_85000,wind_shear_85000,Wind_speed_.gust._.m.s..0.SFC,...,HYBL_two,HYBL_three,HYBL_four,HYBL_five,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700,max_th
10628,1,3.261219,6.132343,102305.945,0.000,7.229,-2.143,7.539953,11.683057,13.157,...,5.474310,7.426896,8.629481,7.586525,0.079,9613.756,87.831,-0.913,2.643295,0
9344,1,0.152229,0.247941,100969.600,33.099,-18.893,-8.557,20.740485,12.970719,35.733,...,13.847472,17.020268,19.371065,21.503364,-1.993,2451.432,36.072,9.649,13.108330,0
56227,1,0.318725,0.382697,100941.227,0.000,15.297,4.152,15.850467,11.993877,18.805,...,7.235121,8.691705,10.875789,13.237866,-3.119,-5000.000,86.181,-0.045,2.921928,0
70523,1,7.627781,3.370534,100430.800,27.962,-8.825,-11.532,14.521283,13.937777,23.745,...,4.414027,10.467418,13.408971,14.921248,0.387,496.275,38.977,17.013,14.884441,0
48479,1,168.072532,118.652089,101041.789,0.000,6.342,-0.381,6.353434,3.322680,6.390,...,3.346181,4.309025,6.146063,7.883806,0.485,5767.350,44.053,18.313,16.078714,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68215,1,1.037844,1.485693,99718.380,27.139,-13.345,-17.285,21.837130,15.755341,35.936,...,17.014168,18.759977,20.652048,22.376745,-3.191,474.611,43.724,18.400,20.115060,2
48516,1,0.256680,0.386990,99734.828,17.453,-4.688,10.497,11.496276,18.281077,35.772,...,15.681797,18.568731,20.858800,22.640065,3.231,266.416,41.801,-0.042,11.009412,2
5065,1,0.662720,0.743379,98702.180,33.386,-30.792,-20.424,36.949791,31.360209,47.098,...,21.252886,23.848851,26.730762,29.941884,-0.986,559.418,41.206,8.700,2.228930,2
41001,0,0.012512,0.019735,100375.700,0.000,25.212,0.160,25.212508,16.084006,31.691,...,19.528904,22.060336,23.619324,24.663153,-5.191,-5000.000,32.040,19.561,13.250822,2


In [40]:
# from sklearn.model_selection import GridSearchCV
# pram = {'max_depth' : [3,5,7], 'min_samples_split': [3, 5, 7],'n_estimators':[100,350 ,500] , 'max_features' : [0.2,0.3,0.4]}
# clf = GridSearchCV(RandomForestRegressor(random_state=19), pram, cv=3, n_jobs=-1, verbose=3, scoring = 'neg_mean_absolute_error')
# clf.fit(X,Y)

Fitting 3 folds for each of 81 candidates, totalling 243 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   12.3s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 243 out of 243 | elapsed:  5.6min finished


GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, random_state=19,
                                             verbose=0, warm_start=False),
             iid='deprecated', n_jobs

In [41]:
# clf.best_score_, clf.best_params_

(-3.9179784140825844,
 {'max_depth': 7,
  'max_features': 0.4,
  'min_samples_split': 3,
  'n_estimators': 100})

### Model

In [10]:
def rf_model(x): 
    test_1 = df[df.EXP == x]
    train_1 = df[df.EXP != x]

    train_1 = train_1.drop(['EXP','jul_date','hour_angle','avg_wind_speed'], axis = 1)
    test_1 = test_1.drop(['EXP','jul_date','hour_angle','avg_wind_speed'], axis = 1)

    df_0 = train_1[train_1['max_th'] == 0]
    df_1 = train_1[train_1['max_th'] == 1]
    df_2 = train_1[train_1['max_th'] == 2]

    df0_sample = df_0.sample(9000 ,random_state=19)
    df1_sample = df_1.sample(9000 ,random_state=19)
    df2_sample = df_2.sample(3000 ,random_state=19)

    df_new = pd.concat([df0_sample, df1_sample, df2_sample], axis = 0)

    x_train = df_new.drop('max_wind_speed', 1)
    x_train.drop(['Date','Type','max_th'], axis=1, inplace=True)
    y_train = df_new.max_wind_speed
    x_test = test_1.drop('max_wind_speed', 1)
    x_test_copy = x_test.copy()
    x_test.drop(['Date','Type','max_th'], axis=1, inplace=True)
    y_test = test_1.max_wind_speed

    
    model = RandomForestRegressor(n_estimators=100, min_samples_split = 3, max_features = 0.4, max_depth = 7 ,
                                  random_state = 19)
    model.fit(x_train, y_train)

    pred_train = model.predict(x_train)
    train_info = model_info2(y_train, pred_train, x , "train")

    y_train30 = np.where(y_train > 30, 1, 0 )
    pred_train30 = np.where(pred_train > 30, 1, 0 )
    train_info2 = model_info1(y_train30, pred_train30, x,'train')

    pred_test = model.predict(x_test)
    test_info = model_info2(y_test, pred_test, x , "test")

    y_test30 = np.where(y_test > 30, 1, 0 )
    pred_test30 = np.where(pred_test > 30, 1, 0 )
    test_info2 = model_info1(y_test30, pred_test30, x,'test')

    table = train_info.append(test_info)
    table2 = train_info2.append(test_info2)
    print(x)
    print(table)
    print("==" *10)
    print(table2)
    print('--' * 40)

    df_class = pd.DataFrame(pred_test30, columns=['Class_results'])
    df_class['Class_results'] = np.where(df_class['Class_results'] == 1, 'Greater than 30 knots', 'Less than 30 knots')
    df_class = df_class.reset_index()
    df_class.drop('index', axis=1, inplace=True)
    df4 = pd.DataFrame(y_test)
    df4 = df4.rename(columns={'max_wind_speed': 'actual'})
    df4 = df4.reset_index()
    df4.drop('index', axis=1, inplace=True)
    x_test_copy = x_test_copy.reset_index()
    x_test_copy.drop("index", axis=1, inplace=True)
    df5 = pd.concat([x_test_copy, df4, df_class], axis=1)
    pred_df = pd.DataFrame(pred_test, columns=["pred"])
    df6 = pd.concat([df5, pred_df], axis=1)
    df6['From'] = np.where(df6['from'] == 1, 'Mesonet', 'ASOS')
    df6.drop(['from', 'max_th'], inplace=True, axis=1)
    df6['test_date'] = x
    return(table, table2, df6)

In [11]:
table2 = pd.DataFrame()
table_class2 = pd.DataFrame()
data2 = pd.DataFrame()
for i in df['EXP'].unique():
    t,tc,d = rf_model(i)
    table2 = table2.append(t)
    table_class2 = table_class2.append(tc)
    data2 = data2.append(d)

1/7/17
        MAE        MSE  Residual Mean test_date data_tested
0  3.923876  25.878245        -0.0014    1/7/17       train
0  2.570526  10.497127        -0.0257    1/7/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.908238  1270           197    1730      17803  0.397247  0.423333   
0  1.000000     0             0       0       1959  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.134288    1/7/17       train                 3000   
0          0.000000    1/7/17        test                    0   

   less_than_30_knots  
0               18000  
0                1959  
--------------------------------------------------------------------------------
2/9/17
        MAE        MSE  Residual Mean test_date data_tested
0  3.912492  25.648377          0.004    2/9/17       train
0  4.034421  24.838817         -2.177    2/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events 

12/9/17
        MAE        MSE  Residual Mean test_date data_tested
0  3.892812  25.417662         0.0020   12/9/17       train
0  2.428728   8.403593        -1.4089   12/9/17        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.909524  1330           230    1670      17770  0.411765  0.443333   
0  1.000000     0             0       0       2531  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.147436   12/9/17       train                 3000   
0          0.000000   12/9/17        test                    0   

   less_than_30_knots  
0               18000  
0                2531  
--------------------------------------------------------------------------------
1/4/18
        MAE        MSE  Residual Mean test_date data_tested
0  3.878193  25.378016         0.0022    1/4/18       train
0  4.067514  28.147643         1.1112    1/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events

9/4/18
        MAE        MSE  Residual Mean test_date data_tested
0  3.908053  25.499743         0.0002    9/4/18       train
0  2.390097   8.188313        -1.0054    9/4/18        test
   Accuracy  Hits  False Alarms  Misses  No Events       CSI  Hit Rate  \
0  0.908095  1306           236    1694      17764  0.403585  0.435333   
0  1.000000     0             0       0       2133  0.000000  0.000000   

   False Alarm Rate test_date data_tested greter_than_30_knots  \
0          0.153048    9/4/18       train                 3000   
0          0.000000    9/4/18        test                    0   

   less_than_30_knots  
0               18000  
0                2133  
--------------------------------------------------------------------------------
9/6/18
        MAE        MSE  Residual Mean test_date data_tested
0  3.892020  25.298299         0.0060    9/6/18       train
0  3.136706  18.244626        -0.0847    9/6/18        test
   Accuracy  Hits  False Alarms  Misses  No Events 

In [12]:
#table2.to_csv('RF_search_regression_results.csv')
table2[table2['data_tested'] == 'test']

Unnamed: 0,MAE,MSE,Residual Mean,test_date,data_tested
0,2.570526,10.497127,-0.0257,1/7/17,test
0,4.034421,24.838817,-2.177,2/9/17,test
0,5.550683,48.004103,1.1328,2/13/17,test
0,4.670351,34.338927,-1.1483,2/25/17,test
0,4.726951,34.769552,1.0439,3/2/17,test
0,3.811096,24.05753,-0.7612,3/14/17,test
0,4.981614,37.833952,-1.1908,3/22/17,test
0,3.850474,25.267243,-0.4566,6/19/17,test
0,3.862459,22.308292,-2.1456,7/13/17,test
0,7.037079,76.738074,-3.9863,10/29/17,test


In [13]:
#table_class2.to_csv('RF_search_classification_results.csv')
table_class2[table_class2['data_tested'] == 'test']

Unnamed: 0,Accuracy,Hits,False Alarms,Misses,No Events,CSI,Hit Rate,False Alarm Rate,test_date,data_tested,greter_than_30_knots,less_than_30_knots
0,1.0,0,0,0,1959,0.0,0.0,0.0,1/7/17,test,0,1959
0,0.969918,20,29,33,1979,0.243902,0.377358,0.591837,2/9/17,test,53,2008
0,0.784364,146,27,395,1389,0.257042,0.269871,0.156069,2/13/17,test,541,1416
0,0.984064,0,1,31,1976,0.0,0.0,1.0,2/25/17,test,31,1977
0,0.788453,213,42,416,1494,0.317437,0.338633,0.164706,3/2/17,test,629,1536
0,0.94421,75,13,106,1939,0.386598,0.414365,0.147727,3/14/17,test,181,1952
0,0.830869,158,103,263,1640,0.301527,0.375297,0.394636,3/22/17,test,421,1743
0,0.98107,0,0,46,2384,0.0,0.0,,6/19/17,test,46,2384
0,0.999154,0,0,2,2361,0.0,0.0,,7/13/17,test,2,2361
0,0.941375,9,37,102,2223,0.060811,0.081081,0.804348,10/29/17,test,111,2260


In [14]:
#data2.to_csv('RF_search_data.csv')
data2.head()

Unnamed: 0,Date,Type,R1,R2,Pressure_reduced_to_MSL_.Pa..0.MSL,Derived_radar_reflectivity_.dB..1.HYBL,u.component_of_wind_.m.s..85000.ISBL,v.component_of_wind_.m.s..85000.ISBL,wind_speed_85000,wind_shear_85000,...,diff_temp,geo_cbl,veg_sfc,best_4_layer,ws_700,actual,Class_results,pred,From,test_date
0,2017-01-07 06:00:00,ANDE,2.630564,2.322274,102879.7,0.0,5.447,-1.457,5.638498,5.269992,...,0.739,-5000.0,40.293,23.757,16.787335,2.332608,Less than 30 knots,4.930173,Mesonet,1/7/17
1,2017-01-07 06:15:00,ANDE,2.055383,1.813005,102951.0,0.0,5.629,-1.464,5.816265,5.773324,...,1.05,-5000.0,40.293,23.959,16.195689,1.555072,Less than 30 knots,4.9954,Mesonet,1/7/17
2,2017-01-07 06:30:00,ANDE,1.792587,1.671478,102890.6,0.0,5.653,-1.56,5.8643,6.23214,...,1.211,-5000.0,40.293,24.118,15.636587,2.332608,Less than 30 knots,5.24487,Mesonet,1/7/17
3,2017-01-07 06:45:00,ANDE,1.442889,1.352497,102931.0,0.0,5.692,-2.445,6.194908,6.806583,...,0.943,-5000.0,40.293,24.22,16.767452,1.555072,Less than 30 knots,5.958163,Mesonet,1/7/17
4,2017-01-07 07:00:00,ANDE,2.283417,1.677404,102882.5,0.0,5.226,-3.06,6.055962,5.554556,...,0.751,-5000.0,40.293,24.149,17.249375,2.526992,Less than 30 knots,5.468028,Mesonet,1/7/17
