## import libraries

In [None]:
import cv2, sys, os, random, json
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
from sklearn.svm import SVC, SVR, LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report

## read in training/testing data

In [None]:
# read in contrasted data and tags
%cd '/home/uzumochi/eigenjuno/data/train/contrast_256'
vec_size = 256 * 256 * 3;
data = np.empty((78 * 2, vec_size))

# ensure dataset class distribution is even
storm_indices = []
no_storm_indices = []
tags = genfromtxt('/home/uzumochi/eigenjuno/data/train/tags.csv', delimiter=',')
for t in range(len(tags)):
    if tags[t] == 1:
        storm_indices.append(t)
    else:
        no_storm_indices.append(t)
selected_imgs = random.sample(storm_indices, 78) + no_storm_indices
curr = 0
for i in range(1, 281):
    if i in selected_imgs:
        img = mpimg.imread(str(i) + '.png')
        img = img[:, :, :3]
        img_1d = np.reshape(img, vec_size)
        data[curr, :] = img_1d;
        curr += 1
tags = [tags[i] for i in selected_imgs]

# set loop parameters
iterations = 5
test_size = 40
num_tests = 1
accuracy = np.zeros(num_tests)
total_predictions = iterations * test_size

## display contrasted dataset

In [None]:
fig, axes = plt.subplots(int(data.shape[0] / 6), 6, figsize = (15, 50));
for i, ax in enumerate(axes.flat):
    ax.imshow(data[i - 1 , :].reshape(256, 256, 3))
    ax.axis('off')
plt.show()

## perform training w/ pca and eigenimaging

In [None]:
for j in range(iterations):
    # split data into training and test sets
    x_train, x_test, y_train, y_test = train_test_split(data, tags, test_size = test_size)

    # perform original transform again for classification purposes
    pca_x = PCA().fit(x_train)
    x_train_pca = pca_x.transform(x_train)

    # compute eigenfaces
    eigenfaces = pca_x.components_.reshape((x_train.shape[0], 256, 256, 3))

    # apply pca transform to x_test
    x_test_pca = pca_x.transform(x_test)
    classifier_svc = SVC().fit(x_train_pca, y_train)
#     classifier_linsvc = LinearSVC().fit(x_train_pca, y_train)
#     classifier_mlp = MLPClassifier(activation='logistic').fit(x_train_pca, y_train)
#     classifier_nearestneighbor = KNeighborsClassifier(weights = 'distance').fit(x_train_pca, y_train)
#     classifier_decisiontree = DecisionTreeClassifier().fit(x_train_pca, y_train)
#     classifier_randomforest = RandomForestClassifier().fit(x_train_pca, y_train)
#     classifier_adaboost = AdaBoostClassifier().fit(x_train_pca, y_train)
#     classifier_naivebayes = GaussianNB().fit(x_train_pca, y_train)
#     classifier_lda = LinearDiscriminantAnalysis().fit(x_train_pca, y_train)
#     classifier_qda = QuadraticDiscriminantAnalysis().fit(x_train_pca, y_train)
    
    # generate predictions
    y_pred_SVC = classifier_svc.predict(x_test_pca)
#     y_pred_LSVC = classifier_linsvc.predict(x_test_pca)
#     y_pred_MLP = classifier_mlp.predict(x_test_pca)
#     y_pred_NN = classifier_nearestneighbor.predict(x_test_pca)
#     y_pred_DT = classifier_decisiontree.predict(x_test_pca)
#     y_pred_RF = classifier_randomforest.predict(x_test_pca)
#     y_pred_AB = classifier_adaboost.predict(x_test_pca)
#     y_pred_NB = classifier_naivebayes.predict(x_test_pca)
#     y_pred_LDA = classifier_lda.predict(x_test_pca)
#     y_pred_QDA = classifier_qda.predict(x_test_pca)
#     predictions = [y_pred_SVC, y_pred_LSVC, y_pred_MLP, y_pred_NN, y_pred_DT, 
#                    y_pred_RF, y_pred_AB, y_pred_NB, y_pred_LDA, y_pred_QDA]

    # log accuracy
#     for i in range(num_tests):
#         for k in range(test_size):
#             if predictions[i][k] == y_test[k]:
#                 accuracy[i] += 1

## print results and analysis

In [None]:
# accuracy = accuracy / total_predictions
# print('Support Vector Classifier: ' + str("{:.3f}".format(accuracy[0])))
print(classification_report(y_test, y_pred_SVC))
# print('Linear Support Vector Classifier: ' + str("{:.2f}".format(accuracy[1])))
# print(classification_report(y_test, y_pred_LSVC, target_names = ['No storms', 'Storms']))
# print('Multilayer Perceptron Classifier: ' + str("{:.2f}".format(accuracy[2])))
# print(classification_report(y_test, y_pred_MLP))
# print('K-Nearest Neighbor: ' + str("{:.2f}".format(accuracy[3])))
# print(classification_report(y_test, y_pred_NN))
# print('Decision Tree: ' + str("{:.2f}".format(accuracy[4])))
# print(classification_report(y_test, y_pred_DT))
# print('Random Forest: ' + str("{:.2f}".format(accuracy[5])))
# print(classification_report(y_test, y_pred_RF))
# print('Adaptive Boosting: ' + str("{:.2f}".format(accuracy[6])))
# print(classification_report(y_test, y_pred_AB))
# print('Gaussian Naive Bayes: ' + str("{:.2f}".format(accuracy[7])))
# print(classification_report(y_test, y_pred_NB))
# print('Linear Discriminant Analysis: ' + str("{:.2f}".format(accuracy[8])))
# print(classification_report(y_test, y_pred_LDA))
# print('Quadratic Discriminant Analysis: ' + str("{:.2f}".format(accuracy[9])))
# print(classification_report(y_test, y_pred_QDA))

## plot eigenfaces

In [None]:
eigenfaces = (abs(eigenfaces.astype('float32'))).astype('uint8')
fig, axes = plt.subplots(4, 6, figsize = (15, 10));
for i, ax in enumerate(axes.flat):
    ax.imshow(eigenfaces[i, :]);
    ax.axis('off')
plt.show()

## visualize svc results w/ images

In [None]:
fig, axes = plt.subplots(6, 6, figsize = (15, 20));
for i, ax in enumerate(axes.flat):
    ax.imshow(x_test[i - 1, :].reshape(256, 256, 3))
    ax.title.set_text('val: ' + str(int(y_test[i - 1])) + " / pred: " + str(int(y_pred_SVC[i - 1])))
    ax.axis('off')
plt.show()

## contrast single test image

In [None]:
name = ''
img = cv2.imread('/home/uzumochi/eigenjuno/data/test/' + name + '.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l, a, b = cv2.split(img);
clahe = cv2.createCLAHE(clipLimit = 3.0, tileGridSize = (8, 8));
img_l = clahe.apply(l);
img_l = cv2.merge((img_l, a, b));
final = cv2.cvtColor(img_l, cv2.COLOR_LAB2BGR);
final = cv2.cvtColor(final, cv2.COLOR_BGR2RGB);
img = cv2.resize(final, (2048, 2048), interpolation = cv2.INTER_NEAREST)
plt.imsave('/home/uzumochi/eigenjuno/data/test/' + name + '_contrast.png', img)

## divide-and-conquer pipeline for testing full images

In [None]:
%cd '/home/uzumochi/eigenjuno/data/test'

# calculate pca/eigenfaces of full dataset
pca_of_data = PCA().fit(data)
transform_data = pca_of_data.transform(data)
eigenfaces = pca_of_data.components_.reshape((data.shape[0], 256, 256, 3))
classifier_svc = SVC().fit(transform_data, tags)

# chop up image
test_name = 'test3_contrast'
vec_size = 256 * 256 * 3
test_predict = np.empty(64)
img = mpimg.imread(test_name + '.png')
img = img[:, :, :3]
split_img = np.empty((64, vec_size))
start_row, end_row, start_col, end_col = 0, 256, 0, 256
row_idx = np.array([])
NUM = 0
for i in range(64):
    block = img[start_row : end_row, start_col : end_col]
    if end_col != 2048:
        start_col += 256
        end_col += 256
    else:
        start_col, end_col = 0, 256
        start_row += 256
        end_row += 256
    split_img[i, :] = np.reshape(block, vec_size)
    if np.mean(split_img[i, :]) >= 0.35:
        row_idx = np.append(row_idx, i)
        NUM += 1

# generate predictions for individual blocks
testing_blocks = split_img[row_idx.astype(int), :]
transform_test = pca_of_data.transform(testing_blocks)
split_pred = classifier_svc.predict(transform_test)

# graph results
fig, axes = plt.subplots(8, 8, figsize = (18, 18));
index = 0
for i, ax in enumerate(axes.flat):
    ax.imshow(split_img[i - 1, :].reshape(256, 256, 3))
    if (i - 1) in row_idx:
        ax.title.set_text(str("{:.2f}".format(split_pred[index])))
        index += 1
    ax.axis('off')

## contrast images, resize to 256x256, and save

In [None]:
%cd '/home/uzumochi/eigenjuno/data/train/cropped'
for i in range(1, 281):
    img = cv2.imread(str(i) + '.png')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
    l, a, b = cv2.split(img);
    clahe = cv2.createCLAHE(clipLimit = 3.0, tileGridSize = (8, 8));
    img_l = clahe.apply(l);
    img_l = cv2.merge((img_l, a, b));
    final = cv2.cvtColor(img_l, cv2.COLOR_LAB2BGR);
    final = cv2.cvtColor(final, cv2.COLOR_BGR2RGB);
    img = cv2.resize(final, (256, 256), interpolation = cv2.INTER_NEAREST)
    plt.imsave('/home/uzumochi/eigenjuno/data/train/contrast_256/' + str(i) + '.png', img)

## pca with uncontrasted data - minimum components

In [None]:
pca = PCA(n_components = 6).fit(data)
plt.figure(figsize = (15, 5));
plt.plot(pca.explained_variance_ratio_.cumsum());
plt.xlabel('Number of Principal Components');
plt.ylabel('Explained Variance Ratio');
plt.title('Explained Variance Ratio of Principal Components in a Contrasted Dataset: First 5 Components');

## pca with uncontrasted data - all components

In [None]:
pca = PCA().fit(data)
plt.figure(figsize = (15, 5));
plt.plot(pca.explained_variance_ratio_.cumsum());
plt.xlabel('Number of Principal Components');
plt.ylabel('Explained Variance Ratio');
plt.title('Explained Variance Ratio of Principal Components in a Contrasted Dataset');

## pca scatter plot - first two components

In [None]:
proj = PCA().fit_transform(data)
plt.scatter(proj[:, 0], proj[:, 1], cmap=plt.cm.get_cmap('rainbow', 2));
plt.xlabel('component 1');
plt.ylabel('component 2');
plt.colorbar();

## read meta data

In [None]:
with f as open(test_name + '.json', 'r'):
    data = json.load(f)
    solar_distance = data['SOLAR_DISTANCE']
    craft_altitude = data['SPACECRAFT_ALTITUDE']
    sub_latitude = data['SUB_SPACECRAFT_LATITUDE']
    sub_longitude = data['SUB_SPACECRAFT_LONGITUDE']
    
# find lat/long of center of identified image
# zoom in/out to focus on desired feature
# track features with centerpoint
# save in log file w/ coordinates, perijove, and feature id

In [None]:
# end of notebook