In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

# TensorFlow and tf.keras
import tensorflow as tf
tf.enable_eager_execution()
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

print(tf.__version__)

In [None]:
#Categorical columns

#land_surface_condition
#foundation_type
#roof_type
#ground_floor_type
#other_floor_type
#position
#plan_configuration
#legal_ownership_status

In [None]:
#Load Training Data
X_train = pd.read_csv("train_values.csv").drop(columns=['building_id'])
Y_train = pd.read_csv("train_labels.csv").drop(columns=['building_id'])

In [None]:
# One Hot Encoding Categorical Columns
categorical_cols = ['land_surface_condition','foundation_type','roof_type','ground_floor_type','other_floor_type','position','plan_configuration','legal_ownership_status'];

for col in categorical_cols:
    X_train[col] = pd.Categorical(X_train[col])
    X_test[col] = pd.Categorical(X_test[col])
    
dummies = pd.get_dummies(X_train['land_surface_condition'], prefix = 'land_surface_condition')
for i in range (1,len(categorical_cols)):
    dummies = pd.concat([dummies, pd.get_dummies(X_train[categorical_cols[i]], prefix = categorical_cols[i])],axis=1)

In [None]:
#Feature Scaling
scaler = MinMaxScaler();

X_train = X_train.drop(columns=categorical_cols)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
concated = False
print(X_train.shape)

In [None]:
#Add categorical columns after feature scaling
if(not(concated)):
    X_train = pd.concat([X_train, dummies],axis=1)
    concated = True

In [None]:
# Prepare Data To Be Inserted Into Tensorflow Model
X_training_test = X_train.sample(1024)
Y_training_test = Y_train.sample(1024)

Y_training_test-=1

dataset = tf.data.Dataset.from_tensor_slices((tf.cast(X_training_test.values, tf.float32),
            tf.cast(Y_training_test.values, tf.int32)))
dataset = dataset.shuffle(1000).repeat(10).batch(32)

In [None]:
#Model Declaration
model = keras.Sequential()

model.add(keras.layers.Dense(68, activation=tf.nn.tanh, input_dim=68))
model.add(keras.layers.Dense(128, activation=tf.nn.relu))
model.add(keras.layers.Dense(3, activation=tf.nn.softmax))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
def loss(model, x, y):
  y_ = model(x)
  return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)

def grad(model, inputs, targets):
  with tf.GradientTape() as tape:
    loss_value = loss(model, inputs, targets)
  return loss_value, tape.gradient(loss_value, model.trainable_variables)

In [None]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.03)

global_step = tf.Variable(0)

In [None]:
## Note: Rerunning this cell uses the same model variables
from tensorflow import contrib
tfe = contrib.eager

# keep results for plotting
train_loss_results = []
train_accuracy_results = []

num_epochs = 50

for epoch in range(num_epochs):
  epoch_loss_avg = tfe.metrics.Mean()
  epoch_accuracy = tfe.metrics.Accuracy()

  # Training loop - using batches of 32
  for x, y in dataset:
#     print("x : {}".format(x))
#     print("y : {}".format(y))
    # Optimize the model
    loss_value, grads = grad(model, x, y)
    optimizer.apply_gradients(zip(grads, model.trainable_variables),
                              global_step)

    # Track progress
    epoch_loss_avg(loss_value)  # add current batch loss
    # compare predicted label to actual label
    probabilities = model(x)
#     print("predictions : {}".format(probabilities))
    predictions = tf.argmax(probabilities, axis=1, output_type=tf.int32)
    
    predictions = tf.reshape(predictions, [32,1])
    
    epoch_accuracy(predictions,y)

  # end epoch
  train_loss_results.append(epoch_loss_avg.result())
  train_accuracy_results.append(epoch_accuracy.result())

  if epoch % 2 == 0:
    print("Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.8%}".format(epoch,
                                                                epoch_loss_avg.result(),
                                                                epoch_accuracy.result()))

In [None]:
#Load Testing Data
test_data = pd.read_csv("test_values.csv")
X_test = test_data.drop(columns=['building_id'])

#One Hot Encoding Testing Data
categorical_cols = ['land_surface_condition','foundation_type','roof_type','ground_floor_type','other_floor_type','position','plan_configuration','legal_ownership_status'];

for col in categorical_cols:
    X_test[col] = pd.Categorical(X_test[col])
    X_test[col] = pd.Categorical(X_test[col])
    
dummies = pd.get_dummies(X_test['land_surface_condition'], prefix = 'land_surface_condition')
for i in range (1,len(categorical_cols)):
    dummies = pd.concat([dummies, pd.get_dummies(X_test[categorical_cols[i]], prefix = categorical_cols[i])],axis=1)
    
#Feature Scaling Testing Data
X_test = X_test.drop(columns=categorical_cols)
X_test = pd.DataFrame(scaler.fit_transform(X_test), columns=X_test.columns)
print(X_test.shape)


#Add categorical columns after feature scaling
X_test = pd.concat([X_test, dummies],axis=1)

# Prepare Data To Be Inserted Into Tensorflow Model
X_testing_test = X_test.sample(1024)
Y_testing_test = Y_test.sample(1024)

Y_testing_test-=1

test_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(X_testing_test.values, tf.float32),
            tf.cast(Y_testing_test.values, tf.int32)))
test_dataset = test_dataset.shuffle(1000).repeat(10).batch(32)