## import libraries

In [None]:
# import libraries and navigate to directory
%cd '/home/uzumochi/training_set_small'
import cv2
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
import os
from sklearn.svm import SVC, SVR, LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report

## pca with contrasted data

tag system:  
0 - no storms  
1 - small storms  
2 - big storms

In [None]:
# read in contrasted data and tags, set loop parameters (1)
vec_size = 256 * 256 * 3;
data = np.empty((186, vec_size))
for i in range(1, 187):
    img = mpimg.imread(str(i) + '.png')
    img = img[:, :, :3]
    img_1d = np.reshape(img, vec_size)
    data[i - 1, :] = img_1d;
tags = genfromtxt('/home/uzumochi/tags.csv', delimiter=',')
for i in range(186):
    if tags[i] == 2:
        tags[i] = 1
iterations = 5
test_size = 40
num_tests = 10
accuracy = np.zeros(num_tests)
total_predictions = iterations * test_size

In [None]:
for j in range(iterations):
    # split data into training and test sets (2)
    x_train, x_test, y_train, y_test = train_test_split(data, tags, test_size = test_size)

    # perform original transform again for classification purposes (3)
    pca_x = PCA().fit(x_train)
    x_train_pca = pca_x.transform(x_train)

    # compute eigenfaces (4)
    eigenfaces = pca_x.components_.reshape((x_train.shape[0], 256, 256, 3))

    # apply pca transform to x_test (5)
    x_test_pca = pca_x.transform(x_test)
    classifier_linsvc = LinearSVC().fit(x_train_pca, y_train)
    classifier_svc = SVC().fit(x_train_pca, y_train)
    classifier_mlp = MLPClassifier(activation='logistic').fit(x_train_pca, y_train)
    classifier_nearestneighbor = KNeighborsClassifier(weights = 'distance').fit(x_train_pca, y_train)
    classifier_decisiontree = DecisionTreeClassifier().fit(x_train_pca, y_train)
    classifier_randomforest = RandomForestClassifier().fit(x_train_pca, y_train)
    classifier_adaboost = AdaBoostClassifier().fit(x_train_pca, y_train)
    classifier_naivebayes = GaussianNB().fit(x_train_pca, y_train)
    classifier_lda = LinearDiscriminantAnalysis().fit(x_train_pca, y_train)
    classifier_qda = QuadraticDiscriminantAnalysis().fit(x_train_pca, y_train)
    y_pred_LSVC = classifier_linsvc.predict(x_test_pca)
    y_pred_SVC = classifier_svc.predict(x_test_pca)
    y_pred_MLP = classifier_mlp.predict(x_test_pca)
    y_pred_NN = classifier_nearestneighbor.predict(x_test_pca)
    y_pred_DT = classifier_decisiontree.predict(x_test_pca)
    y_pred_RF = classifier_randomforest.predict(x_test_pca)
    y_pred_AB = classifier_adaboost.predict(x_test_pca)
    y_pred_NB = classifier_naivebayes.predict(x_test_pca)
    y_pred_LDA = classifier_lda.predict(x_test_pca)
    y_pred_QDA = classifier_qda.predict(x_test_pca)
    predictions = [y_pred_SVC, y_pred_LSVC, y_pred_MLP, y_pred_NN, y_pred_DT, y_pred_RF, y_pred_AB, y_pred_NB, y_pred_LDA, y_pred_QDA]

    # log accuracy (6)
    for i in range(num_tests):
        for k in range(test_size):
            if predictions[i][k] == y_test[k]:
                accuracy[i] += 1

In [None]:
# print results from testing (7)
accuracy = accuracy / total_predictions
print('Support Vector Classifier: ' + str("{:.3f}".format(accuracy[0])))
print('Linear Support Vector Classifier: ' + str("{:.2f}".format(accuracy[1])))
print('Multilayer Perceptron Classifier: ' + str("{:.2f}".format(accuracy[2])))
print('K-Nearest Neighbor: ' + str("{:.2f}".format(accuracy[3])))
print('Decision Tree: ' + str("{:.2f}".format(accuracy[4])))
print('Random Forest: ' + str("{:.2f}".format(accuracy[5])))
print('Adaptive Boosting: ' + str("{:.2f}".format(accuracy[6])))
print('Gaussian Naive Bayes: ' + str("{:.2f}".format(accuracy[7])))
print('Linear Discriminant Analysis: ' + str("{:.2f}".format(accuracy[8])))
print('Quadratic Discriminant Analysis: ' + str("{:.2f}".format(accuracy[9])))

In [None]:
pca_data = PCA().fit(data)
data_pca = pca_data.transform(data)
eigenfaces = pca_data.components_.reshape((data.shape[0], 256, 256, 3))
classifier_svc = SVC().fit(data_pca, tags)

In [None]:
# read in new data and predict
%cd '/home/uzumochi/test_imgs'
vec_size = 256 * 256 * 3
test_predict = np.empty(64)
# img = cv2.imread('1.png')
# img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
# l, a, b = cv2.split(img);
# clahe = cv2.createCLAHE(clipLimit = 3.0, tileGridSize = (8, 8));
# img_l = clahe.apply(l);
# img_l = cv2.merge((img_l, a, b));
# final = cv2.cvtColor(img_l, cv2.COLOR_LAB2BGR);
# final = cv2.cvtColor(final, cv2.COLOR_BGR2RGB);
# img = cv2.resize(final, (2048, 2048), interpolation = cv2.INTER_NEAREST)
# plt.imsave('/home/uzumochi/test_imgs/1_contrast.png', img)
img = mpimg.imread('1_contrast.png')
split_img = np.empty((64, vec_size))
start_row = 0
start_col = 0
end_row = 256
end_col = 256
row_idx = np.array([])
for i in range(64):
    block = img[start_row : end_row, start_col : end_col]
    block = block[:, :, :3]
    if end_col != 2048:
        start_col += 256
        end_col += 256
    else:
        start_col = 0
        end_col = 256
        start_row += 256
        end_row += 256
    split_img[i, :] = np.reshape(block, vec_size)
    if np.mean(split_img[i, :]) >= 0.35:
        row_idx = np.append(row_idx, i)

split_test_pca = pca_data.transform(split_img[row_idx.astype(int), :])
# print(split_test_pca)
plt.plot(pca_data.explained_variance_ratio_.cumsum());
split_pred = classifier_svc.predict(split_test_pca)
# print(split_pred)
# for pred in split_pred:
#     if pred != 0:
#         cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)
fig, axes = plt.subplots(8, 8, figsize = (18, 18));
index = 0
for i, ax in enumerate(axes.flat):
    ax.imshow(split_img[i - 1, :].reshape(256, 256, 3))
    if (i - 1) in row_idx:
        ax.title.set_text(str("{:.2f}".format(split_pred[index])))
        index += 1
    ax.axis('off')
# plt.show()

# -----------------------------------------------------------------------------------------------------------------

## adjust images to uniform 400x400 size
*only run me if uniform images are not already present

In [None]:
# navigate to directory and adjust data to uniform size
%cd 'training_set_crop'
for i in range(1,187):
    img = mpimg.imread(str(i) + '.png')
    img = img[:,:,:3]
    img = cv2.resize(img, (400, 400), interpolation = cv2.INTER_NEAREST)
    plt.imsave('/home/uzumochi/training_set_crop_uniform/' + str(i) + '.png', img)

## contrast images and save
*only run me if contrasted images are not already present# 

In [None]:
# navigate to directory and contrast images using CLAHE
%cd 'training_set_crop_uniform'
for i in range(1,187):
    img = cv2.imread(str(i) + '.png')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
    l, a, b = cv2.split(img);
    clahe = cv2.createCLAHE(clipLimit = 3.0, tileGridSize = (8, 8));
    img_l = clahe.apply(l);
    img_l = cv2.merge((img_l, a, b));
    final = cv2.cvtColor(img_l, cv2.COLOR_LAB2BGR);
    final = cv2.cvtColor(final, cv2.COLOR_BGR2RGB);
    img = cv2.resize(final, (400, 400), interpolation = cv2.INTER_NEAREST)
    plt.imsave('/home/uzumochi/training_set_crop_uniform_contrast/' + str(i) + '.png', img)

## pca with uncontrasted data
*only keeping for archival purposes

In [None]:
# read in data
vec_size = 400 * 400 * 3;
train = np.empty((186, vec_size))
for i in range(1, 187):
    img = mpimg.imread(str(i) + '.png')
    img = img[:, :, :3]
    img_1d = np.reshape(img, vec_size)
    train[i - 1, :] = img_1d * 255
    
# plot images
fig, axes = plt.subplots(31, 6, figsize = (15, 50));
for i, ax in enumerate(axes.flat):
    ax.imshow(train[i - 1, :].reshape(400, 400, 3) / 255)
    ax.axis('off')
plt.show()

# calculate and plot pca
pca = PCA().fit(train)
plt.figure(figsize = (15, 5))
plt.plot(pca.explained_variance_ratio_.cumsum());

# transform data with pca
data_pca = pca.transform(train)
print(np.where(pca.explained_variance_ratio_.cumsum() > 0.95));

# transform data with minimum required principal components
pca = PCA(n_components = 6).fit(train)
plt.figure(figsize = (15, 5))
plt.plot(pca.explained_variance_ratio_.cumsum());

## random code 

In [None]:
# visualize results
fig, axes = plt.subplots(4, 4, figsize = (15, 15));
for i, ax in enumerate(axes.flat):
    ax.imshow(x_test[i - 1, :].reshape(256, 256, 3))
    ax.title.set_text(str(int(y_test[i - 1])) + " / " + str(int(y_pred_SVC[i - 1])))
    ax.axis('off')
plt.show()

In [None]:
# plot contrasted images
fig, axes = plt.subplots(31, 6, figsize = (15, 50));
for i, ax in enumerate(axes.flat):
    ax.imshow(data[i - 1 , :].reshape(256, 256, 3))
    ax.axis('off')
plt.show()

In [None]:
# calculate and plot contrasted pca
pca_x = PCA().fit(x_train)
plt.figure(figsize = (15, 5))
plt.plot(pca_x.explained_variance_ratio_.cumsum());

In [None]:
# transform data with contrasted pca
x_train_pca = pca_x.transform(x_train)
print(np.where(pca_x.explained_variance_ratio_.cumsum() > 0.95));

In [None]:
# visualize first 5 pca components
pca_x = PCA(n_components = 6).fit(x_train)
plt.figure(figsize = (15, 5))
plt.plot(pca_x.explained_variance_ratio_.cumsum());

In [None]:
# transform data with minimum required contrasted principal components
pca_x = PCA(n_components = 75).fit(x_train)
plt.figure(figsize = (15, 5))
plt.plot(pca_x.explained_variance_ratio_.cumsum());

In [None]:
print(eigenfaces.shape)

In [None]:
# plot eigenfaces
fig, axes = plt.subplots(93, 2, figsize = (4, 200));
for i, ax in enumerate(axes.flat):
    ax.imshow(abs(eigenfaces[i, :]) * 255);
    ax.axis('off')
plt.show()

In [None]:
# transform data to 2 dimensions
x_pca = PCA(2).fit(x_train)
x_train_pca = x_pca.transform(x_train)
plt.scatter(x_train_pca[:, 0], x_train_pca[:, 1], edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', 2))
plt.xlabel('component 1')
plt.ylabel('component 2')
plt.colorbar();

In [None]:
# inverse transform data
x_train_new = x_pca.inverse_transform(x_train_pca)
plt.scatter(x_train[:, 0], x_train[:, 1], alpha=0.2)
plt.scatter(x_train_new[:, 0], x_train_new[:, 1], alpha=0.8)
plt.axis('equal');

In [None]:
# read in new data and predict
%cd '/home/uzumochi/test_imgs'
img = mpimg.imread(str(1) + '.png')
img = img[:, :, :3]
print(img.shape)

In [None]:
img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l, a, b = cv2.split(img);
clahe = cv2.createCLAHE(clipLimit = 3.0, tileGridSize = (8, 8));
img_l = clahe.apply(l);
img_l = cv2.merge((img_l, a, b));
final = cv2.cvtColor(img_l, cv2.COLOR_LAB2BGR);
final = cv2.cvtColor(final, cv2.COLOR_BGR2RGB);
img = cv2.resize(final, (400, 400), interpolation = cv2.INTER_NEAREST)
img_1d = np.reshape(img, vec_size)
test_predict = classifier_svc.predict(img_1d)
print(test_predict)

In [None]:
# print('LinearSVC')
# print(classification_report(y_test, y_pred_LSVC, target_names = ['No storms', 'Storms']))
# print('\nSVC')
# print(classification_report(y_test, y_pred_SVC))
# print('\nMLPClassifier')
# print(classification_report(y_test, y_pred_MLP))
# print('\nNearest Neighbor')
# print(classification_report(y_test, y_pred_NN))
# print('\nDecision Tree')
# print(classification_report(y_test, y_pred_DT))
# print('\nRandom Forest')
# print(classification_report(y_test, y_pred_RF))
# print('\nAda Boost')
# print(classification_report(y_test, y_pred_AB))
# print('\nNaive Bayes')
# print(classification_report(y_test, y_pred_NB))
# print('\nLDA')
# print(classification_report(y_test, y_pred_LDA))
# print('\nQDA')
# print(classification_report(y_test, y_pred_QDA))

In [None]:
data_new = np.empty((114, vec_size))
tags_new = np.array([])
index = 0
for i in range(186):
    if tags[i] != 1:
        tags_new = np.append(tags_new, tags[i])
        data_new[index] = data[i, :]
        index += 1
data = data_new
tags = tags_new