In [0]:
from sklearn.metrics import precision_recall_fscore_support,confusion_matrix,accuracy_score
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,cross_val_score,cross_validate,GridSearchCV
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier,AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import RandomizedSearchCV
from sklearn.naive_bayes import GaussianNB

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
def evaluate(y_test,y_pred):
  index=["less than 2 m/s","medium","more than 10 m/s"]
  print(pd.DataFrame(confusion_matrix(y_test, y_pred), index=index, columns=index))
  print("****************")
  print("Accuracy=","{:.2%}".format(accuracy_score(y_test, y_pred)),"// Model Accuracy=45%")
  results= precision_recall_fscore_support(y_test, y_pred, average=None, )
  df=pd.DataFrame({"Precision":results[0],"Recall":results[1],"F1":results[2],"W_SPD":index})
  df=df.set_index("W_SPD")
  print("Average precision =","{:.2%}".format(df["Precision"].mean()),"// Model precision=28%")
  print("Average recall =","{:.2%}".format(df["Recall"].mean()),"// Model recall=28%")
  results= precision_recall_fscore_support(y_test, y_pred, average='weighted', )
  print("Precision weighted=","{:.2%}".format(results[0]),"//Model weighted=56%")
  print("Recall weighted =","{:.2%}".format(results[1]),"//Model weighted=53%")
  print("****************")
  print(df)

In [0]:
drive_4km="/content/drive/My Drive/Colab Notebooks/model_vs_data/Coron_4km_h24toh48_dir/"
drive_1km="/content/drive/My Drive/Colab Notebooks/model_vs_data/Coron_Mars_H24to48_dir/"
dr=[drive_4km,drive_1km]

In [0]:
y_data=pd.read_excel(dr[0]+"y_coron_spd.xlsx",index_col=0)
x_data=pd.read_csv(dr[0]+"x_coron.csv",index_col=0)
x_data=x_data.iloc[:,9:18]
y_data=y_data[y_data>0]# delete station errors
result = x_data.join(y_data, how='outer').dropna()
x_data=result.iloc[:,0:9]
y_data=result.iloc[:,9:10]
y_data=y_data/3.6 #Km/h to m/s

In [0]:
L=y_data[y_data["value"]<2]
L["label"]="less than 2 m/s"
M=y_data[(y_data["value"]>=2) & (y_data["value"]<=10)]
M["label"]="medium"
H=y_data[y_data["value"]>10]
H["label"]="more than 10 m/s"
spds=[L,M,H]
y_data=pd.concat(spds)
y_data=y_data.sort_index()

In [0]:
y_data.describe()

In [0]:
x_data.describe()

**Dummy**

In [0]:
x_train, x_test, y_train, y_test = train_test_split(x_data,y_data["label"], test_size=0.3, random_state=5)
y_pred = DummyClassifier(strategy="most_frequent").fit(x_train, y_train).predict(x_test)
evaluate(y_test,y_pred)

                  less than 2 m/s  medium  more than 10 m/s
less than 2 m/s                 0     592                 0
medium                          0    1802                 0
more than 10 m/s                0     156                 0
****************
Accuracy= 70.67% // Model Accuracy=45%
Average precision = 23.56% // Model precision=28%
Average recall = 33.33% // Model recall=28%
Precision weighted= 49.94% //Model weighted=56%
Recall weighted = 70.67% //Model weighted=53%
****************
                  Precision  Recall        F1
W_SPD                                        
less than 2 m/s    0.000000     0.0  0.000000
medium             0.706667     1.0  0.828125
more than 10 m/s   0.000000     0.0  0.000000


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [0]:
clas = [RandomForestClassifier(n_estimators= 200, max_depth= 100, bootstrap= True),
       ExtraTreesClassifier(),AdaBoostClassifier(),GaussianNB()]


x_train, x_test, y_train, y_test = train_test_split(x_data,y_data["label"], test_size=0.3, random_state=5)
y_pred = clas[0].fit(x_train, y_train).predict(x_test)
evaluate(y_test,y_pred)

                  less than 2 m/s  medium  more than 10 m/s
less than 2 m/s               290     300                 2
medium                        170    1610                22
more than 10 m/s                2     116                38
****************
Accuracy= 76.00% // Model Accuracy=45%
Average precision = 67.84% // Model precision=28%
Average recall = 54.23% // Model recall=28%
Precision weighted= 74.48% //Model weighted=56%
Recall weighted = 76.00% //Model weighted=53%
****************
                  Precision    Recall        F1
W_SPD                                          
less than 2 m/s    0.627706  0.489865  0.550285
medium             0.794669  0.893452  0.841170
more than 10 m/s   0.612903  0.243590  0.348624


**K_Folds**

In [0]:
scoring = ['precision_macro', 'recall_macro','f1_macro',"accuracy","precision_weighted","recall_weighted","f1_weighted"]
scores = cross_validate(clas[0], x_data, y_data["label"], scoring=scoring,
                        cv=5, return_train_score=False)
print("Accuracy: {:.2%} (+/- {:.2%})" .format (scores["test_accuracy"].mean(), scores["test_accuracy"].std() * 2))
print("Recall: {:.2%} (+/- {:.2%})" .format (scores["test_recall_macro"].mean(), scores["test_recall_macro"].std() * 2))
print("Precision: {:.2%} (+/- {:.2%})" .format (scores["test_precision_macro"].mean(), scores["test_precision_macro"].std() * 2))
print("f1 : {:.2%} (+/-{:.2%} )".format (scores["test_f1_macro"].mean(), scores["test_f1_macro"].std() * 2))
print("Recall weighted: {:.2%} (+/- {:.2%})" .format (scores["test_recall_weighted"].mean(), scores["test_recall_weighted"].std() * 2))
print("Precision weighted: {:.2%} (+/- {:.2%})" .format (scores["test_precision_weighted"].mean(), scores["test_precision_weighted"].std() * 2))
print("f1 weighted: {:.2%} (+/-{:.2%} )".format (scores["test_f1_weighted"].mean(), scores["test_f1_weighted"].std() * 2))

Accuracy: 74.28% (+/- 1.56%)
Recall: 50.14% (+/- 2.76%)
Precision: 61.69% (+/- 7.32%)
f1 : 52.34% (+/-2.76% )
Recall weighted: 74.28% (+/- 1.56%)
Precision weighted: 72.21% (+/- 1.76%)
f1 weighted: 72.16% (+/-1.34% )
