## Preprocessing

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping  # Import EarlyStopping
import keras_tuner as kt
import sqlite3


In [2]:
# Connect to DB and import data into a pandas dataframe
connection = sqlite3.connect('../data/db.sqlite')
ratings = pd.read_sql_query("SELECT * FROM ratings", connection)
clean = pd.read_sql_query("SELECT * FROM clean", connection)
dummies = pd.read_sql_query("SELECT * FROM dummies", connection)

connection.close()



In [3]:
dummies.head()

Unnamed: 0,Binary Rating,SIC Code,Current Ratio,Long-term Debt / Capital,Debt/Equity Ratio,Gross Margin,Operating Margin,EBIT Margin,EBITDA Margin,Pre-Tax Profit Margin,...,Sector_Durbl,Sector_Enrgy,Sector_Hlth,Sector_Manuf,Sector_Money,Sector_NoDur,Sector_Other,Sector_Shops,Sector_Telcm,Sector_Utils
0,1,4941.0,1.1507,0.4551,0.8847,77.623,19.4839,19.4839,28.9834,13.6093,...,0,0,0,0,0,0,0,0,0,1
1,1,7374.0,1.1129,0.0072,0.0073,43.6619,19.8327,19.8327,23.9379,20.8699,...,0,0,0,0,0,0,0,0,0,0
2,1,5065.0,1.9276,0.2924,0.4255,11.9008,3.3173,3.3173,3.6338,3.0536,...,0,0,0,0,0,0,0,1,0,0
3,1,4941.0,0.8358,0.4708,0.9491,64.5096,18.4549,18.4549,27.9377,15.1135,...,0,0,0,0,0,0,0,0,0,1
4,1,5122.0,1.2931,0.2644,0.4036,3.8385,1.3269,1.3269,1.5847,1.2304,...,0,0,0,0,0,0,0,1,0,0


In [4]:
# Indicate Model Number 
model = 7

# Split our preprocessed data into our features and target arrays
X = dummies.drop(['Binary Rating'], axis=1)
y = dummies['Binary Rating']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# Hidden layer
nn.add(tf.keras.layers.Dense(units=4,
             input_dim=number_input_features, activation="relu"))

# Hidden layer
nn.add(tf.keras.layers.Dense(
    units=2, activation="relu"))

# Hidden layer
nn.add(tf.keras.layers.Dense(
    units=1, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
EPOCHS = 75
mini_path = f'../models/model{model}/checkpoints/'
checkpoint_filepath = mini_path+'weights.epoch_{epoch:02d}.hdf5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_freq=5,
    verbose=True)

# Fit the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=EPOCHS, callbacks=[model_checkpoint_callback])



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 4)                 5804      
                                                                 
 dense_1 (Dense)             (None, 2)                 10        
                                                                 
 dense_2 (Dense)             (None, 1)                 3         
                                                                 
 dense_3 (Dense)             (None, 1)                 2         
                                                                 
Total params: 5,819
Trainable params: 5,819
Non-trainable params: 0
_________________________________________________________________
Epoch 1/75


2024-03-18 16:46:27.785424: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


  1/183 [..............................] - ETA: 38s - loss: 0.6966 - accuracy: 0.2812
Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5
 30/183 [===>..........................] - ETA: 0s - loss: 0.7005 - accuracy: 0.5688 
Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weights.epoch_01.hdf5

Epoch 1: saving model to ../models/model7/checkpoints/weigh

In [5]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


61/61 - 0s - loss: 0.8524 - accuracy: 0.6998 - 91ms/epoch - 1ms/step
Loss: 0.8523546457290649, Accuracy: 0.6997950673103333


In [6]:
# Export our model to HDF5 file
nn.save(f'../models/model{model}/model.h5')