In [1]:
# Initial Set-Up

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from time import process_time
from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from sklearn.neighbors import KNeighborsRegressor as KNR
from sklearn.metrics import mean_squared_error as MSE
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

##### 1.	What is a neural network? What are the general steps required to build a neural network? 

You start by importing the data to be analyzed.  Then you create the model (typically using Sequential).  Next, you use the .add function to add the hidden layers for the model, specifying how many nodes are in each layer and an activation function as well.  For the output layer, you designate the number of possible outputs, and for a categorization problem, you use the activation function of "softmax."  After creating the model and its layers, you compile the model.  When compiling, you specify and optimizer such as Adam, as well as the loss function and possibly a list of metrics to be used.  After compiling, you fit the model to the data, specifying the features and the targets and adding optional parameter for number of epochs, callback functions, and a validation_split.

##### 2.	Generally, how do you check the performance of a neural network? Why? 

After fitting the model, it's typical to use a validation split rather than cross-validation.  With large datasets, a single score is sufficient.  You can also tune the model by assessing its performance while experimenting with the number of layers and/or the number of nodes in each layer, observing changes in the accuracy scores along the way.  The callback function EarlyStopping in keras can be useful during this process, as it allows you to run a higher number of epochs and stopping when the accuracy score stops improving.  This process will help you to avoid overfitting or underfitting the data, identifying a model that is highly performant.

##### 3.	Create a neural network using keras to predict the outcome of either of these datasets: 
##### Cardiac Arrhythmia: https://archive.ics.uci.edu/ml/datasets/Arrhythmia 
##### Abalone age: https://archive.ics.uci.edu/ml/datasets/Abalone


In [2]:
# Import data

col_names=['sex','length','diameter','height','whole_weight','shucked_weight','viscera_weight','shell_weight','rings']

abalone_df = pd.read_csv('abalone.data', header = None, names = col_names, delimiter=',')

abalone_df.head()

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [3]:
abalone_df.describe()

Unnamed: 0,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings
count,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0
mean,0.523992,0.407881,0.139516,0.828742,0.359367,0.180594,0.238831,9.933684
std,0.120093,0.09924,0.041827,0.490389,0.221963,0.109614,0.139203,3.224169
min,0.075,0.055,0.0,0.002,0.001,0.0005,0.0015,1.0
25%,0.45,0.35,0.115,0.4415,0.186,0.0935,0.13,8.0
50%,0.545,0.425,0.14,0.7995,0.336,0.171,0.234,9.0
75%,0.615,0.48,0.165,1.153,0.502,0.253,0.329,11.0
max,0.815,0.65,1.13,2.8255,1.488,0.76,1.005,29.0


In [4]:
# Use one-hot encoding since the sex column has three categories that are not ordinal

# Encode sex column
one_hot = pd.get_dummies(abalone_df['sex'])
abalone_df = abalone_df.drop(['sex'],axis = 1)
abalone_encoded=abalone_df.join(one_hot)
abalone_encoded.head()

Unnamed: 0,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings,F,I,M
0,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,0,0,1
1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,0,0,1
2,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,1,0,0
3,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,0,0,1
4,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,0,1,0


In [5]:
X=abalone_encoded[['shell_weight', 'shucked_weight', 'I']]
y=abalone_df['rings']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 43)  

sc = StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

In [11]:
n_cols = X.shape[1] 

t1_start = process_time() 

model_1 = Sequential()
model_1.add(Dense(7000, activation='relu', input_shape = (n_cols,)))
model_1.add(Dense(7000, activation='relu'))    
model_1.add(Dense(1))
model_1.compile(optimizer='adam', loss='mean_squared_error')

model_1.fit(X_train_scaled, y_train, validation_data = (X_test_scaled,y_test))

t1_stop = process_time()

print ('Processing time = ', t1_stop - t1_start)

Processing time =  207.953125


##### 4.	Write another algorithm to predict the same result as the previous question using either KNN or logistic regression.

In [7]:
t1_start = process_time() 

model_2=KNR(n_neighbors=3)
model_2.fit(X_train_scaled, y_train)
y_pred = model_2.predict(X_test)
score = MSE(y_pred, y_test)
print('loss: ', score)

t1_stop = process_time()

print ('Processing time = ', t1_stop - t1_start)

loss:  13.019315966684387
Processing time =  0.03125




##### 5.	Create a neural network using pytorch to predict the same result as question 3. 

In [8]:
#create tensors = matrices 
X_train = torch.FloatTensor(X_train) 
X_test = torch.FloatTensor(X_test)

y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

print(X_train)

ValueError: could not determine the shape of object type 'DataFrame'

In [None]:
#artificial neural network
class ANN_Model(nn.Module):
    def __init__(self, input_features=8,hidden1=20,hidden2=20,out_features=2):
        super().__init__() #super is a computed indirect reference. So, it isolates changes
        # and makes sure that children in the layers of multiple inheritence are calling
        #the right parents
        self.layer_1_connection = nn.Linear(input_features, hidden1)
        self.layer_2_connection = nn.Linear(hidden1, hidden2)
        self.out = nn.Linear(hidden2, out_features)
        
    def forward(self, x):
        #apply activation functions
        x = F.relu(self.layer_1_connection(x))
        x = F.relu(self.layer_2_connection(x))
        x = self.out(x)
        return x

In [None]:
torch.manual_seed(43)

#create instance of model
ann = ANN_Model()
#loss function
loss_function = nn.CrossEntropyLoss()

#optimizer
optimizer = torch.optim.Adam(ann.parameters(),lr=0.01)
#run model through multiple epochs/iterations
final_loss = []
n_epochs = 500
for epoch in range(n_epochs):
    y_pred = ann.forward(X_train)
    loss = loss_function(y_pred, y_train)
    final_loss.append(loss)
    
    if epoch % 10 == 1:
        print(f'Epoch number: {epoch} with loss: {loss}')
        
    optimizer.zero_grad() #zero the gradient before running backwards propagation
    loss.backward() 
    optimizer.step() #perform one optimization step each epoch

##### 6.	Compare the performance of the neural networks to the other model you created. Which performed better? Why do you think that is?

### DataCamp

![Intro%20to%20Deep%20Learning%20in%20Python.png](attachment:Intro%20to%20Deep%20Learning%20in%20Python.png)