# Imports

In [36]:
import MySQLdb as sql
import h5py
from earth.utils import generateFilePathStr
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import tensorflow as tf

import plotly.offline as plotly
import plotly.graph_objs as go
plotly.init_notebook_mode(connected=True)

# Generate Datasets

In [2]:
bands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B9', 'BQA']

In [105]:
db = sql.connect(db='earthdat', host='104.199.118.158', user='root')
cur = db.cursor()
cur.execute('SELECT lid, x_coord, y_coord, label FROM cloud_detection_kmeans2 ORDER BY RAND() LIMIT 20000;')
results = cur.fetchall()
db.close()

In [107]:
grid_size = 16
dataset = np.empty((20000, 2304))
labels = np.empty((20000,))
with h5py.File(generateFilePathStr(kind='database'), 'r') as h:
    i = 0
    for lid, x, y, status in results:
        subimage = np.empty((len(bands), grid_size**2))
        for j, b in enumerate(bands):
            subimage[j] = h[lid][b][
                x - int(grid_size / 2) : x + int(grid_size / 2),
                y - int(grid_size / 2) : y + int(grid_size / 2)
            ].flatten()
        dataset[i] = subimage.flatten()
        labels[i] = status
        i += 1

# 3D Scatter Plots

In [108]:
tsne = TSNE(3)
ld_dataset = tsne.fit_transform(dataset)

In [109]:
fig0 = go.Scatter3d(
    x=ld_dataset[labels == 0][:,0],
    y=ld_dataset[labels == 0][:,1],
    z=ld_dataset[labels == 0][:,2],
    marker={'size': 1},
    mode="markers"
)

fig1 = go.Scatter3d(
    x=ld_dataset[labels == 1][:,0],
    y=ld_dataset[labels == 1][:,1],
    z=ld_dataset[labels == 1][:,2],
    marker={'size': 1},
    mode="markers"
)

fig2 = go.Scatter3d(
    x=ld_dataset[labels == 2][:,0],
    y=ld_dataset[labels == 2][:,1],
    z=ld_dataset[labels == 2][:,2],
    marker={'size': 1},
    mode="markers"
)

plotly.iplot([fig0, fig1, fig2])

# 2D Scatter Plots

In [110]:
tsne = TSNE(2)
ld_dataset = tsne.fit_transform(dataset)

In [115]:
fig0 = go.Scatter(
    x=ld_dataset[labels == 0][:,0],
    y=ld_dataset[labels == 0][:,1],
    marker={'size': 2},
    mode="markers"
)

fig1 = go.Scatter(
    x=ld_dataset[labels == 1][:,0],
    y=ld_dataset[labels == 1][:,1],
    marker={'size': 2},
    mode="markers"
)

fig2 = go.Scatter(
    x=ld_dataset[labels == 2][:,0],
    y=ld_dataset[labels == 2][:,1],
    marker={'size': 2, 'opacity': 0.1},
    mode="markers"
)

plotly.iplot([fig0, fig1, fig2])

# Fitting 

In [81]:
class FFNN(object):
    def __init__(self, layers, activation=tf.tanh, input_vector=None, session=None):
        """ An implementation of a simple feed-forward neural network using the low-level
            tensorflow API.

        Inputs
            layers <list(ints)>: A list of integers giving the number of nodes in each layer. The
                first item in the list gives the input dimension and the last item gives output
                dimension.
            input_vector <tf.Tensor>: An (None, dim_in) shaped tensor. If not given, a tensorflow
                is used instead.
            activation <function(tf.Tensor -> tf.Tensor)>: A function mapping tensors to tensors
                which will be used as the activation function.

        Attributes
            session
            activation
            layers
            input
            output
            weights
            biases
        """
        self.activation = activation
        self.layers = layers

        if input_vector is None: self.input = tf.placeholder(tf.float32, [None, layers[0]])
        else: self.input = input_vector
        if session is None: self.session = tf.InteractiveSession()
        else: self.session = session
        self.output = self.input
        self.train_targets = tf.placeholder(tf.float32, [None, layers[-1]])

        self.weights = []
        self.biases = []
        for i in range(len(self.layers) - 1):
            self.weights.append(tf.Variable(tf.random_uniform([self.layers[i], self.layers[i + 1]], -1, 1)))
            self.biases.append(tf.Variable(tf.random_uniform([self.layers[i + 1]], -1, 1)))

        for w, b in zip(self.weights, self.biases):
            self.output = self.activation(tf.matmul(self.output, w) + b)

        self.session.run(tf.global_variables_initializer())


    def train(self, train_in, train_out, loss_func=None, optimizer=None, batch_size=10, epochs=1):
        """ Train the network weights with provided data. Trained weights can be accessed inside of
            the tensorflow session stored as `self.session`.

        Inputs
            train_in
            train_out
            loss_func
            optimizer
            batch_size
            epochs
        Returns
            <None>
        """
        if loss_func is None: loss_func = tf.losses.mean_squared_error
        if optimizer is None: optimizer = tf.train.GradientDescentOptimizer(0.01)

        loss_val = loss_func(self.output, self.train_targets)
        train_step = optimizer.minimize(loss_val)

        for i in range(epochs):
            in_batch = np.roll(train_in, -batch_size * i, 0)[:batch_size]
            out_batch = np.roll(train_out, -batch_size * i, 0)[:batch_size]
            self.session.run(train_step, feed_dict={self.input: in_batch, self.train_targets: out_batch})


    def evaluate(self, in_vector):
        """ Runs the model on a numpy array representing a collection of input data

        Inputs
            in_vector

        Returns
            <np.ndarray>
        """
        return self.session.run(self.output, feed_dict={self.input: in_vector})

In [82]:
norm_ld_dataset = ld_dataset[labels!=2] / abs(ld_dataset[labels!=2]).max()

In [93]:
ffnn = FFNN([3, 256, 1])
ffnn.train(norm_ld_dataset, labels[labels!=2].reshape((3079, 1)), batch_size=100, epochs=5000)
predicted_output = ffnn.evaluate(norm_ld_dataset)