In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import pandas
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from mpl_toolkits.mplot3d import Axes3D
import sklearn
from datetime import datetime, date
import warnings
import timeit
from collections import defaultdict, OrderedDict
import time
from skimage import io, transform
import glob


timeit.template = """
def inner(_it, _timer{init}):
    {setup}
    _t0 = _timer()
    for _i in _it:
        retval = {stmt}
    _t1 = _timer()
    return _t1 - _t0, retval
"""

matplotlib.style.use('ggplot')

RANDOM_SEED = 33

In [None]:
from iterative_lda import IterativeLDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

## Load iceberg data

In [None]:
iceberg = pandas.read_json('./data/iterative_lda/iceberg/train.json')

In [None]:
iceberg.shape

In [None]:
iceberg.head(n=20)

In [None]:
band1 = np.asarray([[float(x) for x in row] for row in iceberg['band_1']])
for index, col in enumerate(band1.T):
    iceberg['band_1_{i}'.format(i=index)] = col

In [None]:
band2 = np.asarray([[float(x) for x in row] for row in iceberg['band_2']])
for index, col in enumerate(band2.T):
    iceberg['band_2_{i}'.format(i=index)] = col

In [None]:
iceberg['inc_angle'] = iceberg['inc_angle'].replace('na', None)

In [None]:
del iceberg['band_1']
del iceberg['band_2']
del iceberg['id']

In [None]:
iceberg.head()

In [None]:
cols = iceberg.columns.tolist()
cols = cols[1:2] + cols[0:1] + cols[2:]
iceberg = iceberg[cols]

In [None]:
X = iceberg.as_matrix()
y = X[:,0].astype(int)
X = X[:,1:]
X.shape, y.shape

## Fit an LDA model to the entire data-set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, test_size=0.1, 
                                                    random_state=RANDOM_SEED,
                                                    stratify=y)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
print(np.sum(lda.predict(X_train) == y_train) / y_train.shape[0])
print(np.sum(lda.predict(X_test) == y_test) / y_test.shape[0])

## And an Iterative LDA model

In [None]:
# raise ValueError('Dont run again')
ilda = IterativeLDA(2, verbose=True)
transformed_X_train = ilda.fit_transform(X_train, y_train)
transformed_X_test = ilda.transform(X_test)

In [None]:
rfc = RandomForestClassifier(verbose=True)
rfc.fit(transformed_X_train, y_train)

print(np.sum(rfc.predict(transformed_X_train) == y_train) / y_train.shape[0])
print(np.sum(rfc.predict(transformed_X_test) == y_test) / y_test.shape[0])

In [None]:
svm = SVC(verbose=True)
svm.fit(transformed_X_train, y_train)

print(np.sum(svm.predict(transformed_X_train) == y_train) / y_train.shape[0])
print(np.sum(svm.predict(transformed_X_test) == y_test) / y_test.shape[0])