# About

This notebook computes accuracy metrics obtained by models with diferent features when tested on the initial 2500 points test dataset. Output is saved as a csv in the data/accuracy_data directory. 

In [None]:
import os
import time
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

from accuracy_info_df import accuracy_info_df

In [None]:
# Assume repository's parent directory is the home directory
home = os.path.expanduser("~")
os.chdir(os.path.join(home,'iceplant-detection-santa-barbara'))

# **************************************************************
root = os.path.join(os.getcwd(), 
                    'data',
                    'iceplant_data',
                    'initial_dataset')
train_file_path = os.path.join(root, 'train_2500.csv')
test_file_path = os.path.join(root, 'test_2500.csv')

label_name = 'iceplant'

# ------------------------------
# IMPORT TRAIN DATA
X_train = pd.read_csv(train_file_path)
y_train = pd.read_csv(train_file_path).loc[:,label_name] 
y_train = y_train.to_numpy()

# ------------------------------
# IMPORT TEST DATA
X_test = pd.read_csv(test_file_path)
y_test = pd.read_csv(test_file_path).loc[:,label_name] 
y_test = y_test.to_numpy()

# ------------------------------
box_sides = [3,5,7,9,11,13,15,17,19,21,23,25,27]  

In [None]:
# ****************************************
# TRAIN MODELS AND COMPUTE ACCURACIES
results = []

# ---------------------------------------------------
# calculate accuracies for spectral bands and spectral + dates

# feats is only vector with the labels for the different models
feats = ['spectral','spectral+date','spectral+ndvi','spectral+ndvi+date']

bands = ['r','g','b','nir','ndvi']

for cols in [['r','g','b','nir'], 
             ['r','g','b','nir','month', 'day_in_year'], 
             bands, 
             bands + ['month', 'day_in_year']]:

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))

# ---------------------------------------------------
# calculate accuracies for average + entropy textures
feats = feats + ['avg_ent_'+str(x) for x in box_sides]

for box_s in box_sides:
    
    window_features = [band + x + str(box_s) for band in bands for x in ['_avg', '_entr']]
    cols =  bands + window_features + ['month', 'day_in_year']

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))

# ---------------------------------------------------
# calculate accuracies for glcm correlation + contrast features
feats = feats + ['glcm_'+str(x) for x in box_sides]

for box_s in box_sides:
    
    window_features = [band + x + str(box_s) for band in ['r','g','b','nir'] for x in ['_contN_', '_corrN_', '_contE_','_corrE_']]
    cols =  bands + window_features + ['month', 'day_in_year']

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))
    
# ---------------------------------------------------
# calculate accuracies for entropy textures
feats = feats + ['entr_'+str(x) for x in box_sides]

for box_s in box_sides:
    
    window_features = [band + '_entr' + str(box_s) for band in bands]
    cols =  bands + window_features + ['month', 'day_in_year']

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))

    
# ---------------------------------------------------
# calculate accuracies for entropy textures
feats = feats + ['avg_'+str(x) for x in box_sides]

for box_s in box_sides:
    
    window_features = [band + '_avg' + str(box_s) for band in bands]
    cols =  bands + window_features + ['month', 'day_in_year']

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))

# ---------------------------------------------------
# cocatenate results and save
R = pd.concat(results).reset_index(drop=True)
R.insert(loc=0, 
         column = 'features',
         value = feats)

out_file_path = os.path.join(os.getcwd(),
                             'data',
                             'accuracy_data',
                             'accuracies_feature_experiments.csv')
R.to_csv(out_file_path, index=False)