## Tensorflow Run

This is the core tensorflow run. It sets up the model by applying a DNN to the training data and then predict the class labels for the test data.

This module may get stuck when running more than once. The model section will throw an IndexError: list index out of range. 
Restart the kernel in these cases.


In [None]:
import psycopg2
import numpy as np
import tensorflow as tf
import tflearn

# Load CSV file, indicate that the last column represents labels
from tflearn.data_utils import load_csv

In [None]:
# Conect to the database
conn = psycopg2.connect(
    host="localhost",
    database="postgres",
    user="postgres",
    password="")
cur = conn.cursor()

In [None]:
# Set the table names
parcels_table = "aoi2020"

# Set the folder to store the data
data_folder = ''

In [3]:
# tflearn.init_graph(gpu_memory_fraction=0.0)

nclass = 11
nepoch = 50
nrun = 0

data, labels = load_csv(f'{data_}{parcels_table}_train_{nrun}.csv', target_column=-1, 
                        categorical_labels=True, n_classes=nclass)

test_data, test_labels = load_csv(f'{data_}{parcels_table}_test_{nrun}.csv', target_column=-1,
                        categorical_labels=True, n_classes=nclass)

# Preprocessing function
def preprocess(profiles, columns_to_delete):
    # Sort by descending id and delete columns
    for column_to_delete in sorted(columns_to_delete, reverse=True):
        [profile.pop(column_to_delete) for profile in profiles]
    
    return np.array(profiles, dtype=np.float32)

# Ignore 'pid' 
to_ignore=[0,1]

# Preprocess data
# If the next statement throws a string to float32 conversion error, you may have NA() in the data. Remove!
data = preprocess(data, to_ignore)

In [4]:
# Build neural network
net = tflearn.input_data(shape=[None, len(data[0])])

net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
#net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, nclass, activation='softmax')
net = tflearn.regression(net)

# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(data, labels, n_epoch=nepoch, batch_size=32, show_metric=True)

Training Step: 58549  | total loss: [1m[32m0.95439[0m[0m | time: 2.429s
| Adam | epoch: 050 | loss: 0.95439 - acc: 0.6407 -- iter: 37440/37442
Training Step: 58550  | total loss: [1m[32m0.98672[0m[0m | time: 2.431s
| Adam | epoch: 050 | loss: 0.98672 - acc: 0.6266 -- iter: 37442/37442
--


In [5]:
# Apply the trained model to predict the class label for the test data
fw = open(f"{data_}{parcels_table}_predictions_{nrun}.csv", 'w')
fw.write("id,class")
for i in range(nclass):
    fw.write(f",prob{i}")
fw.write('\n')

# Check predictions for the samples not used in training
for i in range(len(test_data)):
    sample = test_data[i][2:]
    slabel = test_labels[i].tolist().index(1)
    #print(labels[i])
    pred = model.predict([sample])
    fw.write(f"{test_data[i][1]},{str(slabel)}")
    for i in range(nclass):
        fw.write(",{:6.2f}".format(100*pred[0][i]))
    fw.write('\n')
