In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from torch import torch, nn
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import warnings
import pickle
import os

In [1]:
def split_sequences(input_sequences, output_sequence, n_steps_in, n_steps_out):
    X, y = list(), list() # instantiate X and y
    for i in range(len(input_sequences)):
        # find the end of the input, output sequence
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out - 1
        # check if we are beyond the dataset
        if out_end_ix > len(input_sequences): break
        # gather input and output of the pattern
        seq_x, seq_y = input_sequences[i:end_ix], output_sequence[end_ix-1:out_end_ix, -1]
        X.append(seq_x), y.append(seq_y)
        # print(X,y)
    return np.array(X), np.array(y)

In [7]:
lstm = torch.load('lstm_models_pt/naics_11_model.pt')
df = pd.read_csv('revised_processed_data.csv')
all0 = df[df['NAICS'] == 11]
# Do not change
pred_len = 2
X = all0.drop(columns=['TOT_EMP','NAICS','YEAR'])
y = all0['TOT_EMP'].values
feed_days = 8
mm = MinMaxScaler()
ss = StandardScaler()

X_trans = ss.fit_transform(X)
y_trans = mm.fit_transform(y.reshape(-1, 1)) 
X_ss, y_mm = split_sequences(X_trans, y_trans, feed_days, pred_len)
X_train = X_ss[:-pred_len]
X_test = X_ss[-pred_len:]

y_train = y_mm[:-pred_len]
y_test = y_mm[-pred_len:]
X_train_tensors = torch.Tensor(X_train)
X_test_tensors = torch.Tensor(X_test)

y_train_tensors = torch.Tensor(y_train)
y_test_tensors = torch.Tensor(y_test)
X_train_tensors_final = torch.reshape(X_train_tensors,   
                                   (X_train_tensors.shape[0], feed_days, 
                                   X_train_tensors.shape[2]))
X_test_tensors_final = torch.reshape(X_test_tensors,  
                                   (X_test_tensors.shape[0], feed_days, 
                                   X_test_tensors.shape[2])) 
naics_titles = np.array(['Agriculture, Forestry, Fishing and Hunting',
       'Mining, Quarrying, and Oil and Gas Extraction', 'Utilities',
       'Construction', 'Manufacturing', 'Wholesale Trade', 'Retail Trade',
       'Transportation and Warehousing', 'Information',
       'Finance and Insurance', 'Real Estate and Rental and Leasing',
       'Professional, Scientific, and Technical Services',
       'Management of Companies and Enterprises',
       'Administrative and Support and Waste Management and Remediation Services',
       'Educational Services', 'Health Care and Social Assistance',
       'Arts, Entertainment, and Recreation',
       'Accommodation and Food Services',
       'Other Services (except Public Administration)',
       'Federal, State, and Local Government'],
      dtype=object)

AttributeError: Can't get attribute 'LSTM' on <module '__main__'>

In [4]:
# Do not change this cell
test_predict = lstm(X_test_tensors_final[-1].unsqueeze(0)) # get the last sample
test_predict = test_predict.detach().numpy()
test_predict = mm.inverse_transform(test_predict)
test_predict = test_predict[0].tolist()

test_target = y_test_tensors[-1].detach().numpy() # last sample again
test_target = mm.inverse_transform(test_target.reshape(1, -1))
test_target = test_target[0].tolist()

#Plotting
plt.figure(figsize=(10,6)) #plotting
a = [x for x in range(16)]
plt.plot(a, 16, label='Actual data')
c = [x for x in range(16-pred_len-1, len(y))]
pred_trend = np.insert(test_predict,0,y[-pred_len-1])
plt.plot(c, pred_trend, label='Prediction for 2022-2023 data')
plt.xticks([i for i in range(19)])
plt.axvline(x=16, c='r', linestyle='--')
plt.xlabel("Years Since 2005")
plt.ylabel("Total Employment")
plt.title(f"Total Employment across {naics_titles[0]} over time")
plt.legend()
plt.show()
plt.clf()

NameError: name 'lstm' is not defined