In [1]:
# UT-TOR-DATA-PT-01-2020-U-C Group Project 3
# Final project
# Neural Network. Model 1 testing on July 2020 data
# (c) Boris Smirnov

In [2]:
# Depedences and Constants
from sqlalchemy import create_engine
from config import PGPASSWORD
import pandas as pd

db_connect_str = f"postgresql://postgres:{PGPASSWORD}@project-3.c0wevqvgoxbl.us-east-2.rds.amazonaws.com:5432/project3"

In [3]:
# Connect to the database and fetch all the data
engine = create_engine(db_connect_str)
connection = engine.connect()
query = 'select * from prognosis order by "FED Id"'
df = pd.read_sql(query, connection)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 338 entries, 0 to 337
Columns: 135 entries, FED Id to employed...15+ by mode of transportation other methods FC108373
dtypes: float64(55), int64(79), object(1)
memory usage: 356.6+ KB


In [4]:
# One-hot encode province Ids
# First change numeric Geo Ids to Alpha Codes
province_dct = {
    10: "NL",
    11: "PE",
    12: "NS",
    13: "NB",
    24: "QC",
    35: "ON",
    46: "MB",
    47: "SK",
    48: "AB",
    59: "BC",
    60: "YT",
    61: "NT",
    62: "NU" 
}
df['Prov Id'] = df['Prov Id'].map(province_dct)

# Then encode province id as one-hot and replace old column new new columns
one_hot = pd.get_dummies(df['Prov Id'])
df = df.drop('Prov Id', axis=1)
df = df.join(one_hot)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 338 entries, 0 to 337
Columns: 147 entries, FED Id to YT
dtypes: float64(55), int64(78), object(1), uint8(13)
memory usage: 358.3+ KB


In [5]:
# Convert pandas dataframe to numpy array
data = df.values
X = data[:, 2:]
y = data[:, 1] # winning party only (projection)

In [23]:
# Step 1: Label-encode data set
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

In [24]:
# Step 2: One-hot encoding
from keras.utils import to_categorical

one_hot_y = to_categorical(encoded_y, num_classes=6)
one_hot_y

array([[0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       ...,
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.]], dtype=float32)

In [19]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler model and fit it to the training data
X_scaler = StandardScaler().fit(X)

# Transform the data set
X_scaled = X_scaler.transform(X)

In [20]:
# Load the model
from tensorflow.keras.models import load_model

model = load_model("model1-146-30--40_trained.h5")
model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_36 (Dense)             (None, 146)               21316     
_________________________________________________________________
dense_37 (Dense)             (None, 30)                4410      
_________________________________________________________________
dense_38 (Dense)             (None, 6)                 186       
Total params: 25,912
Trainable params: 25,912
Non-trainable params: 0
_________________________________________________________________


In [25]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_scaled, one_hot_y, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

338/338 - 0s - loss: nan - accuracy: 0.6953
Loss: nan, Accuracy: 0.6952662467956543


In [26]:
# Trying to get prediction results
predictions = model.predict_classes(np.array(X_scaled))
parties = label_encoder.inverse_transform(predictions)
df['Predicted Pid'] = pd.Series(parties)
df[df['Winning Pid'] != df['Predicted Pid']].count()

FED Id           103
Winning Pid      103
lib_age          103
lib_tenure       103
cpc_age          103
                ... 
PE               103
QC               103
SK               103
YT               103
Predicted Pid    103
Length: 148, dtype: int64

In [29]:
df.loc[df['Winning Pid'] != df['Predicted Pid'], ['FED Id', 'Winning Pid', 'Predicted Pid']]

Unnamed: 0,FED Id,Winning Pid,Predicted Pid
0,10001,LIB,BQ
1,10002,LIB,BQ
2,10003,LIB,BQ
3,10004,LIB,BQ
4,10005,LIB,BQ
...,...,...,...
325,59033,LIB,NDP
328,59036,LIB,NDP
332,59040,LIB,NDP
334,59042,LIB,CPC
