# FS Consulting

## Overview


---

### Load Preprocessed Data
---

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
from torchvision import datasets, transforms
import model_1
from torch import optim

# Read in preprocessed files
train_set = pd.read_csv('./data/Preprocessed_TrainingSet.csv')
train_targets = pd.read_csv('./data/Preprocessed_TrainingTargets.csv')
test_set = pd.read_csv('./data/Preprocessed_TestingSet.csv')
test_targets = pd.read_csv('./data/Preprocessed_TestingTargets.csv')

# Setup data loaders
train_set = torch.utils.data.TensorDataset(torch.Tensor(np.array(train_set)), torch.Tensor(np.array(train_targets)))
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 64, shuffle = True)

test_set = torch.utils.data.TensorDataset(torch.Tensor(np.array(test_set)), torch.Tensor(np.array(test_targets)))
test_loader = torch.utils.data.DataLoader(test_set, batch_size = 64, shuffle = True)

# Verify shape of tensors
dataiter = iter(train_loader)
features, targets = dataiter.next()
print("Training Set Tensors:")
print("  Features:",features.shape)
print("  Targets: ",targets.shape)
dataiter = iter(test_loader)
features, targets = dataiter.next()
print("Test Set Tensors:")
print("  Features:",features.shape)
print("  Targets: ",targets.shape)

Training Set Tensors:
  Features: torch.Size([64, 908])
  Targets:  torch.Size([64, 1])
Test Set Tensors:
  Features: torch.Size([64, 908])
  Targets:  torch.Size([64, 1])


### Load Model

In [10]:
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = model_1.JobPerformance(checkpoint['input_size'],checkpoint['output_size'],checkpoint['hidden_layers'],checkpoint['drop_out'])
    model.load_state_dict(checkpoint['state_dict'])
    criterion = checkpoint['criterion']
    optimizer = checkpoint['optimizer']
    learning_rate = checkpoint['learning_rate']
    
    return model, criterion, optimizer, learning_rate

model, criterion, optimizer, learning_rate = load_checkpoint('checkpoint.pth')
print("Our Model: \n",model,"\n")
print("State Dict Keys:\n",model.state_dict().keys(),"\n")
print("Criterion:",criterion,"\n")
print("Optimizer:",optimizer,"\n")
# Set criterion & optimizer
# criterion = exec(checkpoint['criterion']
# optimizer = optim.Adam(model.parameters(), lr=0.003)

print("")

Our Model: 
 JobPerformance(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=908, out_features=780, bias=True)
    (1): Linear(in_features=780, out_features=524, bias=True)
    (2): Linear(in_features=524, out_features=256, bias=True)
    (3): Linear(in_features=256, out_features=64, bias=True)
  )
  (output): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.0)
) 

State Dict Keys:
 odict_keys(['hidden_layers.0.weight', 'hidden_layers.0.bias', 'hidden_layers.1.weight', 'hidden_layers.1.bias', 'hidden_layers.2.weight', 'hidden_layers.2.bias', 'hidden_layers.3.weight', 'hidden_layers.3.bias', 'output.weight', 'output.bias']) 

Criterion: MSELoss() 

Optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.003
    weight_decay: 0
) 




### Create New Model

Pass in model configuration parameters.

In [3]:
# Instantiate model
learning_rate = 0.003
dropout_percentage = 0.0
model = model_1.JobPerformance(908,1,[780,524,256,64],dropout_percentage)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print(model)

JobPerformance(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=908, out_features=780, bias=True)
    (1): Linear(in_features=780, out_features=524, bias=True)
    (2): Linear(in_features=524, out_features=256, bias=True)
    (3): Linear(in_features=256, out_features=64, bias=True)
  )
  (output): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.0)
)


### Train Model


In [11]:
train_losses, test_losses = model_1.train(model, train_loader, test_loader, criterion, optimizer, epochs=25)


Epoch: 1
  Train Loss (MSE):     46,355.8
  Test Loss  (MSE):     66,536.8
  Accuracy:                -90.3
Epoch: 2
  Train Loss (MSE):     46,355.8
  Test Loss  (MSE):     66,542.1
  Accuracy:                -90.7


KeyboardInterrupt: 

### Plot Results 

In [3]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt

plt.plot(train_losses, label='Training Loss')
plt.plot(test_losses, label='Validation Loss')
plt.legend(frameon=False)

NameError: name 'train_losses' is not defined

### Save Model

In [7]:
checkpoint = {
    'input_size': model.input_size,
    'output_size': model.output_size,
    'hidden_layers': [each.out_features for each in model.hidden_layers],
    'drop_out': model.dropout_percentage,
    'criterion': criterion,
    'optimizer': optimizer,
    'learning_rate': learning_rate,
    'state_dict': model.state_dict()
}
print("State dict keys:", model.state_dict().keys())

torch.save(checkpoint, 'checkpoint.pth')

State dict keys: odict_keys(['hidden_layers.0.weight', 'hidden_layers.0.bias', 'hidden_layers.1.weight', 'hidden_layers.1.bias', 'hidden_layers.2.weight', 'hidden_layers.2.bias', 'hidden_layers.3.weight', 'hidden_layers.3.bias', 'output.weight', 'output.bias'])


### Best Results

### Ideas
- Try 90/10 split of data
- See if there is a way to iterate through different sets of 90/10 data
- Consider different architecture or a larger number of layers
- Try different loss functions
- Try different learning rates



### Minimum Objective MSE = 172,400
### Target Objective MSE = 52,500


### Best MSE = 35,000
- Layers: [908,780,524,256,64,1]
- Equations: [MSE,Adam,lr=0.003,do=0.0]
- Epochs: 