In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf

In [2]:
# Import our input dataset
companies_df = pd.read_csv("All.csv")
companies_df.head()

Unnamed: 0,sector,Name,symbol,code,year,Current ratio,Quick Ratio,Cash ratio,Operating cash flow ratio,Working capital ratio,...,EV / EBIT,EV / Free Cash Flow,EV / Invested Capital,EV / Revenue,P/E Ratio,Price/Book,Dividend Per Share,Altman Z-Score,Piotroski Score,Classification
0,Academic & Education,Global Education Communities,GEC,A&E 01,2013,0.63,0.61,0.2,-0.1,0.63,...,-6.2x,-4.0x,0.9x,0.9x,-5.9x,1.1x,0.0,-3.1,0,0
1,Academic & Education,Global Education Communities,GEC,A&E 01,2014,0.78,0.76,0.16,-0.25,0.78,...,-9.0x,-2.6x,0.7x,0.5x,-14.3x,1.0x,0.0,-0.9,6,0
2,Academic & Education,Global Education Communities,GEC,A&E 01,2015,0.27,0.26,0.05,0.08,0.27,...,-22.7x,16.6x,1.0x,1.9x,-18.7x,1.1x,0.0,-0.7,4,0
3,Academic & Education,Global Education Communities,GEC,A&E 01,2016,0.76,0.73,0.27,-0.04,0.76,...,54.3x,84.6x,1.1x,2.4x,6.4x,1.5x,0.0,2.9,4,1
4,Academic & Education,Global Education Communities,GEC,A&E 01,2017,0.32,0.31,0.12,0.18,0.32,...,162.1x,28.4x,1.2x,3.4x,30.6x,1.8x,0.0,1.9,4,1


In [3]:
# Replacing x suffix from the DataFrame

columns_with_x = ['EV/EBITDA', 'EV / EBIT', 'EV / Free Cash Flow',
                    'EV / Invested Capital', 'EV / Revenue', 'P/E Ratio', 'Price/Book']

# Function to remove 'x' and convert to numeric
def remove_x_convert_numeric(value):
    if isinstance(value, str):
        return pd.to_numeric(value.replace('x', ''), errors='coerce')
    return value

# Apply the function to the specified columns
for column in columns_with_x:
    companies_df[column] = companies_df[column].apply(remove_x_convert_numeric)


In [4]:
companies_df.head()

Unnamed: 0,sector,Name,symbol,code,year,Current ratio,Quick Ratio,Cash ratio,Operating cash flow ratio,Working capital ratio,...,EV / EBIT,EV / Free Cash Flow,EV / Invested Capital,EV / Revenue,P/E Ratio,Price/Book,Dividend Per Share,Altman Z-Score,Piotroski Score,Classification
0,Academic & Education,Global Education Communities,GEC,A&E 01,2013,0.63,0.61,0.2,-0.1,0.63,...,-6.2,-4.0,0.9,0.9,-5.9,1.1,0.0,-3.1,0,0
1,Academic & Education,Global Education Communities,GEC,A&E 01,2014,0.78,0.76,0.16,-0.25,0.78,...,-9.0,-2.6,0.7,0.5,-14.3,1.0,0.0,-0.9,6,0
2,Academic & Education,Global Education Communities,GEC,A&E 01,2015,0.27,0.26,0.05,0.08,0.27,...,-22.7,16.6,1.0,1.9,-18.7,1.1,0.0,-0.7,4,0
3,Academic & Education,Global Education Communities,GEC,A&E 01,2016,0.76,0.73,0.27,-0.04,0.76,...,54.3,84.6,1.1,2.4,6.4,1.5,0.0,2.9,4,1
4,Academic & Education,Global Education Communities,GEC,A&E 01,2017,0.32,0.31,0.12,0.18,0.32,...,162.1,28.4,1.2,3.4,30.6,1.8,0.0,1.9,4,1


In [5]:
# Define the Columns to Convert to float
columns_to_convert = [
    'Interest coverage ratio',
    'The Long-Term Debt Coverage Ratio',
    'Total debt to EBITDA ratio',
    'Inventory turnover ratio',
    'Receivables turnover ratio',
    'Working capital turnover ratio',
    'EBITDA'
]

def convert_to_float(df, columns):
    for column in columns:
        # Remove commas and convert to float
        df[column] = pd.to_numeric(df[column].str.replace(',', ''), errors='coerce')

# Apply the conversion function to the DataFrame
convert_to_float(companies_df, columns_to_convert)


In [6]:
# Drop columns from the DataFrame
companies_df.drop(columns=['sector', 'symbol', 'code', 'Altman Z-Score'], inplace=True)

In [7]:
# Convert categorical data to numeric using one-hot encoding
dummies_companies_df = pd.get_dummies(companies_df)

In [None]:
# render_username = 'forecating_companies_future_user'
# render_password = 'qRvkrYzSYuo6TvDWAh8SMcQokhT5pYyb'
# render_host = 'dpg-cog4tdmv3ddc73e67q00-a.ohio-postgres.render.com'
# database = 'forecating_companies_future'
# engine = create_engine(f"postgresql+psycopg2://{render_username}:{render_password}@{render_host}:5432/{database}")

In [None]:
# df.to_sql('Company_Data', engine)

In [9]:
# Splitting the data into features and target
y = dummies_companies_df['Classification'].values
X = dummies_companies_df.drop(columns='Classification').values


In [10]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [11]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
#Define the Model with Keras Tuner
def create_model(hp):
    model = tf.keras.models.Sequential()
    # Activation for hidden layers
    hidden_activation = hp.Choice('hidden_activation', ['softmax', 'relu', 'tanh'])

    # First layer
    model.add(tf.keras.layers.Dense(
        units=hp.Int('first_units', min_value=10, max_value=100, step=10),
        activation=hidden_activation,
        input_shape=(X_train_scaled.shape[1],)))

    # Additional hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        model.add(tf.keras.layers.Dense(
            units=hp.Int(f'units_layer_{i}', min_value=10, max_value=100, step=10),
            activation=hidden_activation))

    # Output layer with tunable activation
    output_activation = hp.Choice('output_activation', ['softmax', 'relu', 'tanh'])
    model.add(tf.keras.layers.Dense(1, activation=output_activation))

    # Compile the model based on the chosen activation for the output
    if output_activation == 'sigmoid':
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    else:
        # You might need to define a different setup if not using sigmoid
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model



In [17]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/129.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [18]:
# Import the kerastuner library
import keras_tuner as kt
from kerastuner.tuners import RandomSearch

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

  from kerastuner.tuners import RandomSearch


In [19]:
# Import the kerastuner library
import keras_tuner as kt
from kerastuner.tuners import RandomSearch

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

Trial 60 Complete [00h 00m 04s]
val_accuracy: 0.08520179241895676

Best val_accuracy So Far: 0.08520179241895676
Total elapsed time: 00h 02m 30s


In [None]:
# Get top 3 model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)

{'hidden_activation': 'softmax', 'first_units': 30, 'num_layers': 5, 'units_layer_0': 60, 'output_activation': 'relu', 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0, 'units_layer_1': 10, 'units_layer_2': 10, 'units_layer_3': 10, 'units_layer_4': 10}
{'hidden_activation': 'relu', 'first_units': 10, 'num_layers': 2, 'units_layer_0': 10, 'output_activation': 'softmax', 'units_layer_1': 10, 'units_layer_2': 90, 'units_layer_3': 20, 'units_layer_4': 40, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}
{'hidden_activation': 'tanh', 'first_units': 10, 'num_layers': 4, 'units_layer_0': 90, 'output_activation': 'relu', 'units_layer_1': 90, 'units_layer_2': 10, 'units_layer_3': 10, 'units_layer_4': 50, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}


In [20]:
# Evaluate the top 3 models against the test dataset
top_models = tuner.get_best_models(3)
for i, model in enumerate(top_models, start=1):
    model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)
    print(f"Model {i}: Loss: {model_loss}, Accuracy: {model_accuracy}")

14/14 - 0s - loss: nan - accuracy: 0.0852 - 173ms/epoch - 12ms/step
Model 1: Loss: nan, Accuracy: 0.08520179241895676
14/14 - 0s - loss: nan - accuracy: 0.0852 - 209ms/epoch - 15ms/step
Model 2: Loss: nan, Accuracy: 0.08520179241895676
14/14 - 0s - loss: nan - accuracy: 0.0852 - 203ms/epoch - 14ms/step
Model 3: Loss: nan, Accuracy: 0.08520179241895676


In [21]:
# Creating the model with the specified hyperparameters
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(50, activation='relu', input_dim=X_train_scaled.shape[1]),
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1, activation='tanh')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [22]:
# Train the model
model.fit(X_train_scaled, y_train, epochs=20, validation_split=0.2)

# Evaluate the model using the test data
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Test loss: {loss}, Test accuracy: {accuracy}")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: nan, Test accuracy: 0.08520179241895676


In [None]:
# Saving the model
model.save("Forecasting_bankrupt_companies.h5")