In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
# Import the training data
tor_df = pd.read_csv("/home/jeremydiaz/tornadoesr/data/raw/tor_train_set.csv")


# Get the outcomes
tornado_outcome = tor_df.iloc[:, [2]]

# Convert the pandas column to a ndarray and then into a FloatTensor
train_outcome_Variable = Variable(torch.from_numpy(tornado_outcome.values).float())


# Get the predictors
tornado_predictors = tor_df.iloc[:, 3:]

# Make the validation set predictors into a numpy array
train_predictors_Variable = Variable(torch.from_numpy(tornado_predictors.values).float())

In [None]:
# Import the test set data
test_df = pd.read_csv("/home/jeremydiaz/tornadoesr/data/raw/tor_test_set.csv")


# Get the outcomes
test_outcome = test_df.iloc[:, [2]]

# Convert the pandas column to a ndarray and then into a FloatTensor
test_outcome_Variable = Variable(torch.from_numpy(test_outcome.values).float())


# Get the test set predictors
test_predictors = test_df.iloc[:, 3:]

# Make the test set predictors into a numpy array
test_predictors_Variable = Variable(torch.from_numpy(test_predictors.values).float())

In [None]:
def convert_prop_dam_to_binary(property_damage_values):
    
    # This function will convert continuous property damage values to binary values defining whether
        # or not a tornado caused any damage
    # property_damage_values = a PyTorch Tensor containing property damage values
    # Returns as PyTorch Tensor of binary values
    
    
    # Get the Tensor as a ndarray 
    prop_dam_array = property_damage_values.data.numpy()
    
    # For-loop to convert to binary
    for i in list(range(len(prop_dam_array))):
        
        if (prop_dam_array[i] == prop_dam_array.min())[0]:
            
            prop_dam_array[i] = 0
            
        else:
            
            prop_dam_array[i] = 1
     
    # Convert ndarray to Tensor
    prop_dam_Tensor = Variable(torch.from_numpy(prop_dam_array))
    
    # Return Tensor
    return(prop_dam_Tensor)

In [None]:
# Convert the training data
train_Y_binary = convert_prop_dam_to_binary(train_outcome_Variable)

# And the test data
test_Y_binary = convert_prop_dam_to_binary(test_outcome_Variable)

<br><br><br>

# Logistic Regression - Manual

In [None]:
train_predictors_Variable.size()

In [None]:
torch.manual_seed(123)

class LogisticRegression(torch.nn.Module):
    
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.logistic_layer = nn.Sequential(nn.Linear(51, 1),
                                          nn.Sigmoid())
        
        
    def forward(self, x):
        logistic_output = self.logistic_layer(x)
        return(logistic_output)


# Make it
classifier = LogisticRegression()

# Optimizing options
loss_function = nn.BCELoss()
optimizer = torch.optim.Adam(classifier.parameters())

In [None]:
loss_list = []
test_loss_list = []

for i in range(1000):
    optimizer.zero_grad()
    
    predictions = classifier(train_predictors_Variable)
    test_predictions = classifier(test_predictors_Variable)
    
    loss = loss_function(predictions, train_Y_binary)
    test_loss = loss_function(test_predictions, test_Y_binary)
    
    loss_list.append(loss.data[0])
    test_loss_list.append(test_loss.data[0])
    loss.backward()
    optimizer.step()

In [None]:
plt.plot(loss_list, label = 'train')
plt.plot(test_loss_list, label = 'test')
plt.legend();

In [None]:
plain_prediction_list = []

test_predictions = classifier(test_predictors_Variable)

for i in range(len(test_predictions)):
    plain_prediction = test_predictions[i].data.numpy()[0]
    if plain_prediction < 0.5:
        plain_prediction_list.append(0)
    else:
        plain_prediction_list.append(1)

In [None]:
test_Y_binary_list = test_Y_binary.data.numpy().tolist()

In [None]:
accuracy_score(plain_prediction_list, test_Y_binary_list)

In [None]:
confusion_matrix(plain_prediction_list, test_Y_binary_list)

<br><br><br>

# Adding hidden layers - Neural Network

In [None]:
torch.manual_seed(123)

class NeuralNetwork(torch.nn.Module):
    
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.hidden_layer = nn.Sequential(nn.Linear(51, 26),
                                          nn.ReLU())
        self.output_layer = nn.Sequential(nn.Linear(26, 1),
                                          nn.Sigmoid())
        
        
        
    def forward(self, x):
        hidden_output = self.hidden_layer(x)
        final_output = self.output_layer(hidden_output)
        return(final_output)


# Make it
classifier = NeuralNetwork()

# Optimizing options
loss_function = nn.BCELoss()
optimizer = torch.optim.Adam(classifier.parameters())

In [None]:
loss_list = []
test_loss_list = []

for i in range(1000):
    optimizer.zero_grad()
    
    predictions = classifier(train_predictors_Variable)
    test_predictions = classifier(test_predictors_Variable)
    
    loss = loss_function(predictions, train_Y_binary)
    test_loss = loss_function(test_predictions, test_Y_binary)
    
    loss_list.append(loss.data[0])
    test_loss_list.append(test_loss.data[0])
    loss.backward()
    optimizer.step()

In [None]:
plt.plot(loss_list, label = 'train')
plt.plot(test_loss_list, label = 'test')
plt.legend();

In [None]:
plain_prediction_list = []

test_predictions = classifier(test_predictors_Variable)

for i in range(len(test_predictions)):
    plain_prediction = test_predictions[i].data.numpy()[0]
    if plain_prediction < 0.5:
        plain_prediction_list.append(0)
    else:
        plain_prediction_list.append(1)

In [None]:
accuracy_score(plain_prediction_list, test_Y_binary_list)

In [None]:
confusion_matrix(plain_prediction_list, test_Y_binary_list)

<br><br><br>