# FS Consulting


## Load New Data

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
from torchvision import datasets, transforms
import model_1
from torch import optim
from torch.utils.data.sampler import RandomSampler

# Import new data for predictions & create empty DataFrame with shape[test_set.shape[0],1]
production_data = pd.read_csv('./data/1814_Preprocessed_TestSet.csv')
production_targets = pd.DataFrame(index=range(production_data.shape[0]),columns=range(1),dtype='float')

if isinstance(production_data, pd.DataFrame):
    print("New data successfully imported.")
else:
    print("New data failed to import.")

New data successfully imported.


## Preprocess Data & Setup Data Loader

In [2]:
# Preprocessing pipeline here

production_data = torch.utils.data.TensorDataset(torch.Tensor(np.array(production_data)), torch.Tensor(np.array(production_targets)))
production_loader = torch.utils.data.DataLoader(production_data, batch_size = 32, shuffle = False)

if(production_loader):
    print("New data successfully preprocessed and is ready for predictions.")
else:
    print("Preprocessing failed.")

New data successfully preprocessed and is ready for predictions.


## Load Model

In [6]:
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = model_1.JobPerformance(checkpoint['input_size'],checkpoint['output_size'],checkpoint['hidden_layers'],checkpoint['drop_out'])
    model.load_state_dict(checkpoint['state_dict'])
    criterion = checkpoint['criterion']
    optimizer = checkpoint['optimizer']
    learning_rate = checkpoint['learning_rate']
    
    return model, criterion, optimizer, learning_rate

model, criterion, optimizer, learning_rate = load_checkpoint('checkpoint_v1.pth')
print("Our Model: \n",model,"\n")
print("State Dict Keys:\n",model.state_dict().keys(),"\n")
print("Criterion:",criterion,"\n")
print("Optimizer:",optimizer,"\n")


Our Model: 
 JobPerformance(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=1814, out_features=1054, bias=True)
    (1): Linear(in_features=1054, out_features=512, bias=True)
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): Linear(in_features=256, out_features=64, bias=True)
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): Linear(in_features=32, out_features=16, bias=True)
  )
  (output): Linear(in_features=16, out_features=1, bias=True)
  (dropout): Dropout(p=0.0)
) 

State Dict Keys:
 odict_keys(['hidden_layers.0.weight', 'hidden_layers.0.bias', 'hidden_layers.1.weight', 'hidden_layers.1.bias', 'hidden_layers.2.weight', 'hidden_layers.2.bias', 'hidden_layers.3.weight', 'hidden_layers.3.bias', 'hidden_layers.4.weight', 'hidden_layers.4.bias', 'hidden_layers.5.weight', 'hidden_layers.5.bias', 'output.weight', 'output.bias']) 

Criterion: SmoothL1Loss() 

Optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)


## Obtain & Save Predictions

In [18]:
predictions_tensor = model_1.predict(model, production_loader)
output_dataframe = pd.DataFrame(predictions_tensor.detach().numpy().round(2))

raw_test_set = pd.read_csv('./data/TestSet.csv')
raw_test_set['job_performance'] = output_dataframe.iloc[:,0]
raw_test_set.to_csv(r'./data/predictions_v1.csv', index=False)
print("Predictions data saved to CSV file.")

Predictions data saved to CSV file.


## Expected Results
MSE of 31,000 - 35,000