In [1]:
## import custom functions
from eigenjuno import *

In [2]:
## figure readbility
plt.rcParams.update({'font.family': 'Serif'})
plt.rcParams.update({'font.size': 16})
plt.rcParams.update({'figure.max_open_warning': 0})

In [3]:
## useful globals
TRAIN_DIR = os.path.expanduser("~/eigenjuno/DATA/TRAIN/")
TEST_DIR = os.path.expanduser("~/eigenjuno/DATA/TEST/")
KERNEL_DIR = os.path.expanduser("~/eigenjuno/STITCHING/KERNELS/")
TRAIN_SIZE = 400
TEST_IMGS = [5192, 6582, 6587, 6743, 6745, 6746, 6749, 6750, 6881, 6882, 6883, 6884, 6923, 6978, 6980, 6983, 6984, 6991, 6993, 6994, 10348]

In [None]:
## read in data and tags
data, tags = read_data(TRAIN_DIR, TRAIN_SIZE)

In [None]:
## train and return svm and pca models
svm, pca = train(data, tags)

In [None]:
## test series of images, log detections, save some useful figures
for TEST_NAME in TEST_IMGS:
    print('Running image', TEST_NAME, 'through test pipeline')
    detections = test(TEST_DIR, TEST_NAME)
    log_detections(TEST_DIR, TEST_NAME, detections)

Running image 5192 through test pipeline
Running image 6582 through test pipeline
Running image 6587 through test pipeline
Running image 6743 through test pipeline


In [None]:
## load kernels
load_kernels(KERNEL_DIR)

## read image metadata
et = get_etime(TEST_DIR, TEST_NAME)

## get positional data
long, lat, orient = get_position(et)

## convert surface raster to lat/long
coords = raster_to_planeto(TEST_DIR, TEST_NAME)

In [None]:
## contrast and resize new training images
for r, d, f in os.walk(TRAIN_DIR + 'PROCESS/'):
    for file in f:
        num = int(file.split('.')[0])
        img = contrast_resize(os.path.join(TRAIN_DIR, 'PROCESS/' + str(num) + '.png'), (256, 256))
        plt.imsave(os.path.join(TRAIN_DIR, 'ONE/' + str(num) + '.png'), img)

In [None]:
## display contrasted dataset
fig, axes = plt.subplots(7, 7, figsize = (15, 20));
for i, ax in enumerate(axes.flat):
    ax.imshow(data[i, :].reshape(VEC_DIM))
    ax.title.set_text('VALUE ' + str(int(tags[i])))
    ax.title.set_fontsize(14)
    ax.axis('off')
fig.savefig('FIGURES/display_dataset.png')

In [None]:
## visualize results with dataset test images
fig, axes = plt.subplots(6, 6, figsize = (15, 20));
for i, ax in enumerate(axes.flat):
    ax.imshow(X_test[i - 1, :].reshape(VEC_DIM))
    ax.title.set_text('VALUE ' + str(int(y_test[i - 1])) + "\n PREDICTED " + str(int(y_pred[i - 1])))
    ax.title.set_fontsize(14)
    ax.axis('off')
plt.savefig('FIGURES/sample_outputs.png')

In [None]:
## pca with minimal components
pca_MIN = PCA(n_components = pca.components_.shape[0]).fit(data)
plt.figure(figsize = (10, 7));
plt.plot(pca_MIN.explained_variance_ratio_.cumsum(), color = 'seagreen');
plt.xlabel('Number of Principal Components');
plt.ylabel('Explained Variance Ratio');
plt.title('Explained Variance Ratio of\nPrincipal Components in a Contrasted Dataset');
plt.savefig('FIGURES/pca_variance.png')

In [None]:
## visualize support vectors
svm, pca = load_models()

sv_indices = svm.support_

fig, axes = plt.subplots(5, 4, figsize = (18, 22))
for i, ax in enumerate(axes.flat):
    if i < sv_indices.size:
        ax.imshow(pca.inverse_transform(svm.support_vectors_[i, :]).reshape((VEC_DIM)).astype(float).clip(0, 1))
        ax.axis('off')
    else:
        fig.delaxes(ax)
        
fig.savefig('FIGURES/support_vectors.png')

In [None]:
## visualize eigenfaces
_, pca = load_models()

fig, axes = plt.subplots(5, 4, figsize = (18, 25))
for i, ax in enumerate(axes.flat):
    if i < pca.components_.shape[0]:
        ax.title.set_text(str(pca.singular_values_[i]))
        ax.imshow((pca.components_[i, :].reshape((VEC_DIM)) * 255).astype(float).clip(0, 1))
        ax.axis('off')
    else:
        fig.delaxes(ax)
        
fig.savefig('FIGURES/eigenfaces.png')

In [None]:
## retrain model with new data


In [None]:
## variance of training space parameter vs num support vectors
_, pca = load_models()

G_RANGE = np.logspace(-9, 1, 20)

NUM_SVS = []

data_PCA = pca.transform(data)

for G in G_RANGE:
    svm = SVC(C = 1, kernel = 'rbf', gamma = G, class_weight = 'balanced')
    svm.fit(data_PCA, tags)
    NUM_SVS.append(svm.support_vectors_.shape[0])
    
plt.figure(figsize = (10, 7));
plt.plot(C_RANGE, NUM_SVS, color = 'lightcoral');
plt.xscale('log');
plt.xlabel('G');
plt.ylabel('Support Vectors');
plt.title('Gamma Value (Variance of Training Space)\nvs. Number of Support Vectors')
plt.savefig('FIGURES/gamma_sv_tradeoff.png')

In [None]:
## visualize variance of kernel parameter vs number of support vectors
_, pca = load_models()

C_RANGE = np.logspace(-2, 4, 20)

NUM_SVS = []

data_PCA = pca.transform(data)

for C in C_RANGE:
    svm = SVC(C = C, kernel = 'rbf', gamma = 'scale', class_weight = 'balanced')
    svm.fit(data_PCA, tags)
    NUM_SVS.append(svm.support_vectors_.shape[0])
    
plt.figure(figsize = (10, 7));
plt.plot(C_RANGE, NUM_SVS, color = 'darkorchid');
plt.xscale('log');
plt.xlabel('C');
plt.ylabel('Support Vectors');
plt.title('Cost Value (L2 Regularization Parameter)\nvs. Number of Support Vectors')
plt.savefig('FIGURES/cost_sv_tradeoff.png')

In [None]:
## simple pca example with random 3D data
fig = plt.figure(figsize = (15, 6))
ax1 = fig.add_subplot(1, 2, 1, projection = '3d')
ax2 = fig.add_subplot(1, 2, 2)

n = 100

xyz = np.empty((n, 3))

## generate random 3D data
xyz[:, 0] = np.linspace(0, 1, 100)
xyz[:, 1] = np.sin(9 * xyz[:, 0]) + np.sqrt(1 / 3.0) * np.random.randn(n)
xyz[:, 2] = np.random.rand(n)

## define binary classes - green triangle, pink circle
for i in range(xyz.shape[0]):
    if xyz[i, 1] < 0.5:
        ax1.scatter(xyz[i, 0], xyz[i, 1], xyz[i, 2], marker = '^', color = 'green')
    else:
        ax1.scatter(xyz[i, 0], xyz[i, 1], xyz[i, 2], marker = 'o', color = 'hotpink')

ax1.set_xlabel('X')
ax1.set_ylabel('Y')
ax1.set_zlabel('Z')
ax1.set_title('Data before PCA (3D)')

for label in (ax1.get_xticklabels() + ax1.get_yticklabels() + ax1.get_zticklabels()):
    label.set_fontsize(12)

## reduce dimension from 3D to 2D
pca_xyz = PCA(n_components = 2).fit(xyz)
xyz_d = pca_xyz.transform(xyz)

for i in range(xyz.shape[0]):
    if xyz[i, 1] < 0.5:
        ax2.scatter(xyz_d[i, 0], xyz_d[i, 1], marker = '^', color = 'green')
    else:
        ax2.scatter(xyz_d[i, 0], xyz_d[i, 1], marker = 'o', color = 'hotpink')

ax2.set_xlabel('PC 1')
ax2.set_ylabel('PC 2')
ax2.set_title('Data after PCA (2D)')

for label in (ax2.get_xticklabels() + ax2.get_yticklabels()):
    label.set_fontsize(12)

fig.savefig('FIGURES/pca_example.png')

In [None]:
## reduce junocam data to dimensions and visualize
fig = plt.figure(figsize = (10, 7))
ax = fig.add_subplot(1, 1, 1) # , projection = '3d'

## reduce dimension from ND to 2D
pca_xy = PCA(n_components = 2).fit(data)
xy_d = pca_xy.transform(data)

for i in range(data.shape[0]):
    if tags[i] == 0:
        ax.scatter(xy_d[i, 0], xy_d[i, 1], marker = '^', color = 'green')
    else:
        ax.scatter(xy_d[i, 0], xy_d[i, 1], marker = 'o', color = 'hotpink')

ax.set_xlabel('PC 1')
ax.set_ylabel('PC 2')
ax.set_title('Reduced Dimensionality JunoCam Data (2D)')

for label in (ax.get_xticklabels() + ax.get_yticklabels()):
    label.set_fontsize(12)

fig.savefig('FIGURES/pca_juno_2d.png')

In [None]:
# svm, pca = load_models()
# img = cv2.imread(os.path.join(TEST_DIR, '6582-Contrasted.png'))
# b, g, r = cv2.split(img)
# img = cv2.merge((r, g, b))
# img = (img / 255)

# split_img = np.empty((64, VEC_SIZE))
# dist = np.zeros((64, 2))
# start_row, end_row, start_col, end_col = 0, 256, 0, 256
# valid_indices = []
# index = 0
# for i in range(64):
#     block = img[start_row : end_row, start_col : end_col]
#     if end_col != 2048:
#         start_col += 256
#         end_col += 256
#     else:
#         start_col, end_col = 0, 256
#         start_row += 256
#         end_row += 256
#     block = np.reshape(block, VEC_SIZE)
#     split_img[i, :] = block
#     zero_locs = np.where(block < 0.035)[0]
#     if (zero_locs.size < block.size / 3):
#         dist[i] = np.linalg.norm(split_img[i, :] - pca.components_[2, :])
#         for j in range(3, pca.components_.shape[0]):
#             if dist[i, 0] > np.linalg.norm(split_img[i, :] - pca.components_[j, :]):
#                 dist[i, 0] = np.linalg.norm(split_img[i, :] - pca.components_[j, :])
#                 dist[i, 1] = j
#     else:
#         dist[i, 1] = -1

# # print(dist)

# fig, axes = plt.subplots(8, 8, figsize = (18, 18));
# for i, ax in enumerate(axes.flat):
#     ax.imshow(split_img[i, :].reshape(256, 256, 3))
#     ax.title.set_text(str(dist[i, 1]))
#     ax.axis('off')

# fig, axes = plt.subplots(8, 8, figsize = (18, 18));
# for i, ax in enumerate(axes.flat):
#     if dist[i, 1] >= 0:
#         ax.imshow((pca.components_[int(dist[i, 1]), :].reshape((VEC_DIM)) * 255).astype(float).clip(0, 1))
#     else:
#         ax.imshow(split_img[i, :].reshape(256, 256, 3))
#     ax.axis('off')

In [None]:
# end of notebook