In [1]:
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import VotingClassifier


In [2]:
# load from csvs
df_features = pd.read_csv('../csvs/features.csv',index_col=0)
df_regional = pd.read_csv('../csvs/regional_features.csv',index_col=0)

# concatenate the two df's together
df_regional.drop(['file','label'],axis=1,inplace=True)
df = pd.concat([df_features, df_regional], axis=1)

# drop values with null labels (blind testing data)
df_dropna = df.dropna()

In [3]:
X = df_dropna.drop(['file','label'],axis=1).values
y = df_dropna.label.values

print('X (data)',X.shape,type(X))
print('y (target)',y.shape,type(y),np.unique(y))

X (data) (1005, 119) <class 'numpy.ndarray'>
y (target) (1005,) <class 'numpy.ndarray'> [0. 1. 2. 3.]


In [4]:
def getAcc(y_true,y_pred):
    return accuracy_score(y_true,y_pred)

def getPD(y_true,y_pred):
    '''get percent detected (# targets detected / # targets)'''
    return recall_score(y_true,y_pred)

def getPFA(y_true,y_pred):
    '''get percent false alarms (# false alarms / # non-targets)'''
    return 1-accuracy_score(1-y_true,1-y_pred)

In [5]:
X = preprocessing.scale(X)

In [6]:
clf = AdaBoostClassifier(n_estimators=100)
kf = KFold(n_splits=5)
PD,PD1,PD2,PD3 = [],[],[],[]
PFA = []
acc1,acc2,acc3=[],[],[]
acc_mult,acc_mult_to_bin,acc_bin,acc_comb = [],[],[],[]

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    
    predict_mult = clf.fit(X_train, y_train).predict(X_test)
    
    predict_mult_to_bin = predict_mult!=0
    
    predict_bin = clf.fit(X_train, y_train!=0).predict(X_test)
    
    predict1 = clf.fit(X_train, y_train==1).predict(X_test)
    predict2 = clf.fit(X_train, y_train==2).predict(X_test)
    predict3 = clf.fit(X_train, y_train==3).predict(X_test)
    
    predict_target = np.logical_or.reduce((predict1, predict2, predict3))
    
    acc_mult.append(getAcc(y_test, predict_mult))
    acc_mult_to_bin.append(getAcc(y_test!=0, predict_mult_to_bin))
    acc_bin.append(getAcc(y_test!=0, predict_bin))
    acc_comb.append(getAcc(y_test!=0, predict_target))
    acc1.append(getAcc(y_test==1, predict1))
    acc2.append(getAcc(y_test==2, predict2))
    acc3.append(getAcc(y_test==3, predict3))
    PD.append(getPD(y_test!=0, predict_bin))
    PD1.append(getPD(y_test==1, predict_bin))
    PD2.append(getPD(y_test==2, predict_bin))
    PD3.append(getPD(y_test==3, predict_bin))
    PFA.append(getPFA(y_test!=0, predict_bin))

In [7]:
print('accuracy mult          ',np.mean(acc_mult))
print('accuracy mult to bin   ',np.mean(predict_mult_to_bin))
print('accuracy bin           ',np.mean(acc_bin))
print('accuracy comb          ',np.mean(acc_comb))
print('accuracy saline only   ',np.mean(acc1))
print('accuracy rubber only   ',np.mean(acc2))
print('accuracy clay only     ',np.mean(acc3))
print('percent detected       ',np.mean(PD))
print('percent detected saline',np.mean(PD1))
print('percent detected rubber',np.mean(PD2))
print('percent detected clay ' ,np.mean(PD3))
print('percent false alarm    ',np.mean(PFA))

accuracy mult           0.709452736318408
accuracy mult to bin    0.25870646766169153
accuracy bin            0.8477611940298507
accuracy comb           0.8497512437810946
accuracy saline only    0.9343283582089551
accuracy rubber only    0.9462686567164178
accuracy clay only      0.9592039800995025
percent detected        0.741113102076752
percent detected saline 0.706047619047619
percent detected rubber 0.7672772191023465
percent detected clay  0.7472588522588524
percent false alarm     0.15223880597014922
