In [1]:
# Python code to build Machine Learning model for hurricane intensity forecast  
import pandas as pd # For data manipulation and analysis
pd.set_option('display.max_columns', 500)
import numpy as np # For scientific computing

#!pip3 install sklearn # Install machine learning library
import sklearn # For machine learning library
from sklearn.ensemble import RandomForestClassifier  # Random forest classifier
#from sklearn.ensemble import ExtraTreesClassifier    # Extra tree classifier
from sklearn.metrics import confusion_matrix # Compute confusion matrix to evaluate the accuracy of a classification.
from sklearn.metrics import brier_score_loss  # Compute the Brier score

import matplotlib.pyplot as plt  #plotting library

In [2]:
#====================================
#Read SHIPS spread-sheet data
#====================================
# Set up the location of the SHIPS data
fname='C:\\Users\\HP\\Desktop\\Hurricane\\Dataset_SHIPS_RII_ATL.csv'
#fname='Dataset_SHIPS_RII_EPAC.csv'

# Read SHIPS data
ships = pd.read_csv(fname)
ships.head()

Unnamed: 0,NAME,DATE,HOUR,VMX0,LAT,LON,MSLP,ID,DELV12,DELV24,DELV36,DELV48,PER,SHRD,D200,RHLO,PX30,SDBT,POT,OHC,TPW,PC2,U200,TPWC,AVBT,RSST
0,ALEX,980727,12,25,11.3,-25.4,1009,AL011998,0,5,10,10,9999,6.3,103,68,72,13.8,-101,12,0,-58,-7.9,55.7,-473,27.4
1,ALEX,980727,18,25,11.7,-27.2,1009,AL011998,0,5,10,10,9999,11.2,118,69,55,12.6,-102,17,0,-10,-6.4,55.7,-360,27.4
2,ALEX,980728,0,25,12.2,-29.2,1009,AL011998,5,10,10,10,0,8.6,116,71,70,12.8,-105,21,0,-3,-8.8,56.9,-381,27.4
3,ALEX,980728,6,25,12.6,-31.3,1008,AL011998,5,10,10,15,0,12.2,91,71,57,12.2,-100,29,0,-44,-6.0,52.7,-481,27.2
4,ALEX,980728,12,30,12.9,-33.3,1007,AL011998,5,5,5,10,5,10.5,88,71,83,10.1,-89,15,190,-46,-6.5,55.3,-516,27.1


In [3]:
#================================
# Set up parameters
#================================
# Year range for training and validating
year_train=['1998','2008']

# Year range for forecast
year_fcst=['2009','2017']

# Variable names for predictors
PredictorName=['PER','SHRD','D200','TPW','PC2','SDBT','POT','OHC']  # delete 'VMX0'

# Variable name for predictand
TargetName='DELV24'

# Threshold of Rapid Intensification 
RIValue=30

# Climatology of RI (30 kt) frequency at Atlantic basin (Kaplan et al. 2015)
clim=0.125   #ATL 30 kt
#clim=0.084   #EPAC 30 kt

In [4]:
#================================
# Data pre-processing
#================================
# Set all 9999s as NaNs
ships = ships.replace(9999,np.NaN)

# drop NaNs
ships=ships.dropna()

# Pad the date columns with 00 for the year 2000
ships['DATE'] = ships['DATE'].apply(lambda x: str(x).zfill(6))

# Extract month from date
ships['MONTH'] = ships['DATE'].apply(lambda x: str(x)[2:4])

# Extract year from date
ships['YEAR'] = ships['DATE'].apply(lambda x: ('19' + str(x)[0:2]) if (str(x)[0:1]!= '0' and str(x)[0:1]!= '1') else ('20' + str(x)[0:2]))
ships.head()

# Set the target column
ships['TAR'] = ships[TargetName].apply(lambda x: 1 if x >= RIValue else 0)
ships.head()

Unnamed: 0,NAME,DATE,HOUR,VMX0,LAT,LON,MSLP,ID,DELV12,DELV24,DELV36,DELV48,PER,SHRD,D200,RHLO,PX30,SDBT,POT,OHC,TPW,PC2,U200,TPWC,AVBT,RSST,MONTH,YEAR,TAR
2,ALEX,980728,0,25,12.2,-29.2,1009,AL011998,5.0,10.0,10.0,10.0,0.0,8.6,116,71,70.0,12.8,-105,21.0,0,-3.0,-8.8,56.9,-381.0,27.4,7,1998,0
3,ALEX,980728,6,25,12.6,-31.3,1008,AL011998,5.0,10.0,10.0,15.0,0.0,12.2,91,71,57.0,12.2,-100,29.0,0,-44.0,-6.0,52.7,-481.0,27.2,7,1998,0
4,ALEX,980728,12,30,12.9,-33.3,1007,AL011998,5.0,5.0,5.0,10.0,5.0,10.5,88,71,83.0,10.1,-89,15.0,190,-46.0,-6.5,55.3,-516.0,27.1,7,1998,0
5,ALEX,980728,18,30,13.1,-35.1,1006,AL011998,5.0,5.0,10.0,15.0,5.0,9.7,44,72,35.0,15.9,-86,22.0,15,36.0,-7.2,56.6,-270.0,27.1,7,1998,0
6,ALEX,980729,0,35,13.3,-36.8,1005,AL011998,0.0,0.0,5.0,10.0,5.0,9.9,37,74,56.0,14.5,-80,24.0,0,-6.0,-10.3,57.7,-443.0,27.1,7,1998,0


In [5]:
ships.YEAR.value_counts().sort_index()

1998    246
1999    254
2000    119
2001    176
2002    167
2003    236
2004    281
2005    349
2006    160
2007     72
2008    219
2009     47
2010    228
2011    193
2012    262
2013     59
2014     80
2015    119
2016    239
2017    237
Name: YEAR, dtype: int64

In [6]:
# Data within training and validating years
data_train = ships[(ships['YEAR']>=year_train[0]) & (ships['YEAR']<=year_train[1])]

# All predictors for training and validating
XData = data_train[PredictorName]

# All predictand for training and validating
YData = data_train['TAR']

In [7]:
corrMatrix = XData.corr()
corrMatrix

Unnamed: 0,PER,SHRD,D200,TPW,PC2,SDBT,POT,OHC
PER,1.0,-0.248301,0.134089,-0.151524,-0.262457,-0.165744,0.035958,0.092771
SHRD,-0.248301,1.0,-0.227194,0.391774,0.369694,0.292508,-0.05157,-0.185855
D200,0.134089,-0.227194,1.0,-0.462381,-0.512974,-0.2214,0.041412,0.339883
TPW,-0.151524,0.391774,-0.462381,1.0,0.550803,0.212208,0.092249,-0.482222
PC2,-0.262457,0.369694,-0.512974,0.550803,1.0,0.590434,-0.327496,-0.428193
SDBT,-0.165744,0.292508,-0.2214,0.212208,0.590434,1.0,-0.499691,-0.188057
POT,0.035958,-0.05157,0.041412,0.092249,-0.327496,-0.499691,1.0,-0.107507
OHC,0.092771,-0.185855,0.339883,-0.482222,-0.428193,-0.188057,-0.107507,1.0


### Random Forest, deleting variables

In [8]:
#===============================================
# Forecast model : Random Forest
#===============================================
# Forecast model set up. 
#    Note: n_estimators is based on hyperparameter selection of (50, 100, 250) with maximum PSS
model_fcst = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=66, max_depth=6, min_samples_leaf=2, class_weight='balanced')
model_fcst

RandomForestClassifier(class_weight='balanced', max_depth=6, min_samples_leaf=2,
                       n_jobs=-1, random_state=66)

In [9]:
# Train model using all the training and validating data
model_fcst.fit(XData,YData)
model_fcst

RandomForestClassifier(class_weight='balanced', max_depth=6, min_samples_leaf=2,
                       n_jobs=-1, random_state=66)

In [10]:
# Data within forecast years
data_fcst = ships[(ships['YEAR']>=year_fcst[0]) & (ships['YEAR']<=year_fcst[1])]
#data_fcst

In [11]:
# All predictors for forecast
XData_fcst = data_fcst[PredictorName]
#XData_fcst

In [12]:
# All truth of predictand for forecast
YData_fcst = data_fcst['TAR']
#YData_fcst

In [13]:
# Predict class for x
y_pred_fcst = model_fcst.predict(XData_fcst)
#y_pred_fcst

In [14]:
# Confusion matrix for one year
cmatrix_fcst = confusion_matrix(YData_fcst, y_pred_fcst)
cmatrix_fcst

array([[1049,  287],
       [  57,   71]], dtype=int64)

In [15]:
# Pierce Skill Score
pss=((cmatrix_fcst[0,0] * cmatrix_fcst[1,1]) - (cmatrix_fcst[0,1] * cmatrix_fcst[1,0])) * 1.0 / ((cmatrix_fcst[1,1] + cmatrix_fcst[1,0]) * (cmatrix_fcst[0,1] + cmatrix_fcst[0,0]))
print('Forecast PSS = ',pss)

Forecast PSS =  0.3398671407185629


In [16]:
# False Alarm Ratio
far=(cmatrix_fcst[0,1] * 1.0) / (cmatrix_fcst[0,1] + cmatrix_fcst[1,1])
print('Forecast FAR = ',far)

Forecast FAR =  0.8016759776536313


In [17]:
# Probability of Detection
pod=(cmatrix_fcst[1,1] * 1.0) / (cmatrix_fcst[1,0] + cmatrix_fcst[1,1]) 
print('Forecast POD = ',pod)

Forecast POD =  0.5546875


### Random Forest, with adding variables

In [22]:
# Variable names for predictors
PredictorName=['PER','SHRD','D200','TPW','PC2','SDBT','POT','OHC','LAT','LON','U200','TPWC','RSST']
# Data within training and validating years
data_train = ships[(ships['YEAR']>=year_train[0]) & (ships['YEAR']<=year_train[1])]

# All predictors for training and validating
XData = data_train[PredictorName]

# All predictand for training and validating
YData = data_train['TAR']

In [23]:
corrMatrix = XData.corr()
corrMatrix

Unnamed: 0,PER,SHRD,D200,TPW,PC2,SDBT,POT,OHC,LAT,LON,U200,TPWC,RSST
PER,1.0,-0.248301,0.134089,-0.151524,-0.262457,-0.165744,0.035958,0.092771,-0.176813,-0.009734,-0.174605,0.099404,0.109632
SHRD,-0.248301,1.0,-0.227194,0.391774,0.369694,0.292508,-0.05157,-0.185855,0.214858,0.096674,0.52882,-0.400569,-0.25706
D200,0.134089,-0.227194,1.0,-0.462381,-0.512974,-0.2214,0.041412,0.339883,-0.401338,-0.123234,-0.108139,0.544051,0.335547
TPW,-0.151524,0.391774,-0.462381,1.0,0.550803,0.212208,0.092249,-0.482222,0.522342,0.20742,0.485303,-0.781839,-0.686743
PC2,-0.262457,0.369694,-0.512974,0.550803,1.0,0.590434,-0.327496,-0.428193,0.420037,0.110497,0.291031,-0.641654,-0.425456
SDBT,-0.165744,0.292508,-0.2214,0.212208,0.590434,1.0,-0.499691,-0.188057,0.103422,0.08526,0.185567,-0.327706,-0.149162
POT,0.035958,-0.05157,0.041412,0.092249,-0.327496,-0.499691,1.0,-0.107507,0.213008,0.068556,0.078836,0.087663,-0.318678
OHC,0.092771,-0.185855,0.339883,-0.482222,-0.428193,-0.188057,-0.107507,1.0,-0.347463,-0.50027,-0.206389,0.549741,0.636883
LAT,-0.176813,0.214858,-0.401338,0.522342,0.420037,0.103422,0.213008,-0.347463,1.0,-0.259553,0.501097,-0.326232,-0.316134
LON,-0.009734,0.096674,-0.123234,0.20742,0.110497,0.08526,0.068556,-0.50027,-0.259553,1.0,-0.064369,-0.428148,-0.532911


In [24]:
model_fcst = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=66, max_depth=6, min_samples_leaf=2, class_weight='balanced')
model_fcst

RandomForestClassifier(class_weight='balanced', max_depth=6, min_samples_leaf=2,
                       n_jobs=-1, random_state=66)

In [25]:
# Train model using all the training and validating data
model_fcst.fit(XData,YData)
model_fcst

RandomForestClassifier(class_weight='balanced', max_depth=6, min_samples_leaf=2,
                       n_jobs=-1, random_state=66)

In [26]:
# Data within forecast years
data_fcst = ships[(ships['YEAR']>=year_fcst[0]) & (ships['YEAR']<=year_fcst[1])]
#data_fcst

# All predictors for forecast
XData_fcst = data_fcst[PredictorName]
#XData_fcst

# All truth of predictand for forecast
YData_fcst = data_fcst['TAR']
#YData_fcst

# Predict class for x
y_pred_fcst = model_fcst.predict(XData_fcst)
#y_pred_fcst

In [27]:
# Confusion matrix for one year
cmatrix_fcst = confusion_matrix(YData_fcst, y_pred_fcst)
cmatrix_fcst

array([[1063,  273],
       [  63,   65]], dtype=int64)

In [28]:
# Pierce Skill Score
pss=((cmatrix_fcst[0,0] * cmatrix_fcst[1,1]) - (cmatrix_fcst[0,1] * cmatrix_fcst[1,0])) * 1.0 / ((cmatrix_fcst[1,1] + cmatrix_fcst[1,0]) * (cmatrix_fcst[0,1] + cmatrix_fcst[0,0]))
print('Forecast PSS = ',pss)

Forecast PSS =  0.30347118263473055


In [29]:
# False Alarm Ratio
far=(cmatrix_fcst[0,1] * 1.0) / (cmatrix_fcst[0,1] + cmatrix_fcst[1,1])
print('Forecast FAR = ',far)

Forecast FAR =  0.8076923076923077


In [30]:
# Probability of Detection
pod=(cmatrix_fcst[1,1] * 1.0) / (cmatrix_fcst[1,0] + cmatrix_fcst[1,1]) 
print('Forecast POD = ',pod)

Forecast POD =  0.5078125


### Gradient Boosting Classifier

In [31]:
# Variable names for predictors
PredictorName=['PER','SHRD','D200','TPW','PC2','SDBT','POT','OHC','VMX0']
# Data within training and validating years
data_train = ships[(ships['YEAR']>=year_train[0]) & (ships['YEAR']<=year_train[1])]

# All predictors for training and validating
XData = data_train[PredictorName]

# All predictand for training and validating
YData = data_train['TAR']

In [33]:
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier(n_estimators=100)
gbc.fit(XData,YData)

GradientBoostingClassifier()

##### tuning parameters

In [52]:
from sklearn.model_selection import GridSearchCV
#params={'n_estimators':[x for x in range(200,300,10)], 'max_depth':[x for x in range(1,10,1)],'min_samples_leaf':[x for x in range(1,10,1)],'class_weight':list('balanced')}
params={'max_depth':[x for x in range(6,11,2)], 'min_samples_leaf':[x for x in range(0,6,2)]}
gbc= GradientBoostingClassifier(n_estimators=100,learning_rate=1.0)
grid = GridSearchCV(gbc, params, cv=10, scoring="f1")
grid.fit(XData,YData)

Traceback (most recent call last):
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 500, in fit
    sample_weight_val, begin_at_stage, monitor, X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 557, in _fit_stages
    random_state, X_idx_sorted, X_csc, X_csr)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 212, in _fit_stage
    check_input=False, X_idx_sorted=X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\tree\_classes.py", line 1246, in fit
    X_idx_sorted=X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\tree\_classes.py", line 217, in fit
    % self.min_samples_leaf)
Value

Traceback (most recent call last):
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 500, in fit
    sample_weight_val, begin_at_stage, monitor, X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 557, in _fit_stages
    random_state, X_idx_sorted, X_csc, X_csr)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 212, in _fit_stage
    check_input=False, X_idx_sorted=X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\tree\_classes.py", line 1246, in fit
    X_idx_sorted=X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\tree\_classes.py", line 217, in fit
    % self.min_samples_leaf)
Value

Traceback (most recent call last):
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 500, in fit
    sample_weight_val, begin_at_stage, monitor, X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 557, in _fit_stages
    random_state, X_idx_sorted, X_csc, X_csr)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\ensemble\_gb.py", line 212, in _fit_stage
    check_input=False, X_idx_sorted=X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\tree\_classes.py", line 1246, in fit
    X_idx_sorted=X_idx_sorted)
  File "c:\users\hp\pycharmprojects\test1\venv\lib\site-packages\sklearn\tree\_classes.py", line 217, in fit
    % self.min_samples_leaf)
Value

GridSearchCV(cv=10, estimator=GradientBoostingClassifier(learning_rate=1.0),
             param_grid={'max_depth': [6, 8, 10],
                         'min_samples_leaf': [0, 2, 4]},
             scoring='f1')

In [53]:
grid.best_score_ 

0.24866282428691294

In [54]:
grid.best_params_ 

{'max_depth': 6, 'min_samples_leaf': 2}

In [55]:
grid.best_estimator_

GradientBoostingClassifier(learning_rate=1.0, max_depth=6, min_samples_leaf=2)

In [56]:
gbcbest_y_pred_fcst = grid.best_estimator_.predict(XData_fcst)

In [57]:
# Confusion matrix for one year; after tuning paramters
cmatrix_fcst = confusion_matrix(YData_fcst, gbcbest_y_pred_fcst)
cmatrix_fcst

array([[1265,   71],
       [ 108,   20]], dtype=int64)

In [58]:
# Pierce Skill Score
pss=((cmatrix_fcst[0,0] * cmatrix_fcst[1,1]) - (cmatrix_fcst[0,1] * cmatrix_fcst[1,0])) * 1.0 / ((cmatrix_fcst[1,1] + cmatrix_fcst[1,0]) * (cmatrix_fcst[0,1] + cmatrix_fcst[0,0]))
print('Forecast PSS = ',pss)

Forecast PSS =  0.1031062874251497


In [59]:
# False Alarm Ratio
far=(cmatrix_fcst[0,1] * 1.0) / (cmatrix_fcst[0,1] + cmatrix_fcst[1,1])
print('Forecast FAR = ',far)

Forecast FAR =  0.7802197802197802


In [60]:
# Probability of Detection
pod=(cmatrix_fcst[1,1] * 1.0) / (cmatrix_fcst[1,0] + cmatrix_fcst[1,1]) 
print('Forecast POD = ',pod)

Forecast POD =  0.15625


In [34]:
# Data within forecast years
data_fcst = ships[(ships['YEAR']>=year_fcst[0]) & (ships['YEAR']<=year_fcst[1])]
#data_fcst

# All predictors for forecast
XData_fcst = data_fcst[PredictorName]
#XData_fcst

# All truth of predictand for forecast
YData_fcst = data_fcst['TAR']
#YData_fcst

# Predict class for x
gbc_y_pred_fcst = gbc.predict(XData_fcst)
#y_pred_fcst

In [35]:
# Confusion matrix for one year
cmatrix_fcst = confusion_matrix(YData_fcst, gbc_y_pred_fcst)
cmatrix_fcst

array([[1307,   29],
       [ 114,   14]], dtype=int64)

In [36]:
# Pierce Skill Score
pss=((cmatrix_fcst[0,0] * cmatrix_fcst[1,1]) - (cmatrix_fcst[0,1] * cmatrix_fcst[1,0])) * 1.0 / ((cmatrix_fcst[1,1] + cmatrix_fcst[1,0]) * (cmatrix_fcst[0,1] + cmatrix_fcst[0,0]))
print('Forecast PSS = ',pss)

Forecast PSS =  0.08766841317365269


In [37]:
# False Alarm Ratio
far=(cmatrix_fcst[0,1] * 1.0) / (cmatrix_fcst[0,1] + cmatrix_fcst[1,1])
print('Forecast FAR = ',far)

Forecast FAR =  0.6744186046511628


In [38]:
# Probability of Detection
pod=(cmatrix_fcst[1,1] * 1.0) / (cmatrix_fcst[1,0] + cmatrix_fcst[1,1]) 
print('Forecast POD = ',pod)

Forecast POD =  0.109375


### Logistic regression

In [45]:
# Variable names for predictors
PredictorName=['PER','SHRD','D200','TPW','PC2','SDBT','POT','OHC']
# Data within training and validating years
data_train = ships[(ships['YEAR']>=year_train[0]) & (ships['YEAR']<=year_train[1])]

# All predictors for training and validating
XData = data_train[PredictorName]

# All predictand for training and validating
YData = data_train['TAR']

In [46]:
# logistic regression
import sklearn.linear_model as sk_linear
#logReg = sk_linear.LogisticRegression(penalty='l2',dual=False,C=1.0,n_jobs=1,random_state=20,fit_intercept=True)
logReg = sk_linear.LogisticRegression(class_weight='balanced')
logReg.fit(XData,YData)

LogisticRegression(class_weight='balanced')

In [47]:
# Data within forecast years
data_fcst = ships[(ships['YEAR']>=year_fcst[0]) & (ships['YEAR']<=year_fcst[1])]
#data_fcst

# All predictors for forecast
XData_fcst = data_fcst[PredictorName]
#XData_fcst

# All truth of predictand for forecast
YData_fcst = data_fcst['TAR']
#YData_fcst

# Predict class for x
logReg_y_pred_fcst = logReg.predict(XData_fcst)
#y_pred_fcst

In [48]:
# Confusion matrix for one year
cmatrix_fcst = confusion_matrix(YData_fcst, logReg_y_pred_fcst)
cmatrix_fcst

array([[933, 403],
       [ 43,  85]], dtype=int64)

In [49]:
# Pierce Skill Score
pss=((cmatrix_fcst[0,0] * cmatrix_fcst[1,1]) - (cmatrix_fcst[0,1] * cmatrix_fcst[1,0])) * 1.0 / ((cmatrix_fcst[1,1] + cmatrix_fcst[1,0]) * (cmatrix_fcst[0,1] + cmatrix_fcst[0,0]))
print('Forecast PSS = ',pss)

Forecast PSS =  0.36241579341317365


In [50]:
# False Alarm Ratio
far=(cmatrix_fcst[0,1] * 1.0) / (cmatrix_fcst[0,1] + cmatrix_fcst[1,1])
print('Forecast FAR = ',far)

Forecast FAR =  0.8258196721311475


In [51]:
# Probability of Detection
pod=(cmatrix_fcst[1,1] * 1.0) / (cmatrix_fcst[1,0] + cmatrix_fcst[1,1]) 
print('Forecast POD = ',pod)

Forecast POD =  0.6640625


### SVM

In [61]:
# Variable names for predictors
PredictorName=['PER','SHRD','D200','TPW','PC2','SDBT','POT','OHC','VMX0']
# Data within training and validating years
data_train = ships[(ships['YEAR']>=year_train[0]) & (ships['YEAR']<=year_train[1])]

# All predictors for training and validating
XData = data_train[PredictorName]

# All predictand for training and validating
YData = data_train['TAR']

In [62]:
import sklearn.svm as sk_svm
svm = sk_svm.SVC(C=1.0,kernel='rbf',gamma='auto')
svm.fit(XData,YData)
#acc=model.score(X_test,y_test) #根据给定数据与标签返回正确率的均值
#print('SVM模型评价:',acc)

SVC(gamma='auto')

In [63]:
# Data within forecast years
data_fcst = ships[(ships['YEAR']>=year_fcst[0]) & (ships['YEAR']<=year_fcst[1])]
#data_fcst

# All predictors for forecast
XData_fcst = data_fcst[PredictorName]
#XData_fcst

# All truth of predictand for forecast
YData_fcst = data_fcst['TAR']
#YData_fcst

# Predict class for x
svm_y_pred_fcst = svm.predict(XData_fcst)
#y_pred_fcst

In [64]:
# Confusion matrix for one year
cmatrix_fcst = confusion_matrix(YData_fcst, svm_y_pred_fcst)
cmatrix_fcst

array([[1336,    0],
       [ 128,    0]], dtype=int64)

In [65]:
# Pierce Skill Score
pss=((cmatrix_fcst[0,0] * cmatrix_fcst[1,1]) - (cmatrix_fcst[0,1] * cmatrix_fcst[1,0])) * 1.0 / ((cmatrix_fcst[1,1] + cmatrix_fcst[1,0]) * (cmatrix_fcst[0,1] + cmatrix_fcst[0,0]))
print('Forecast PSS = ',pss)

Forecast PSS =  0.0


In [66]:
# False Alarm Ratio
far=(cmatrix_fcst[0,1] * 1.0) / (cmatrix_fcst[0,1] + cmatrix_fcst[1,1])
print('Forecast FAR = ',far)

Forecast FAR =  nan


  


In [67]:
# Probability of Detection
pod=(cmatrix_fcst[1,1] * 1.0) / (cmatrix_fcst[1,0] + cmatrix_fcst[1,1]) 
print('Forecast POD = ',pod)

Forecast POD =  0.0
