In [8]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers.legacy import Adam


# 1. Data Loading 


In [9]:
# Load datasets
merged_data = pd.read_csv('/Users/eduardoangeli/Library/CloudStorage/OneDrive-Fanshawec.ca/Capstone/Dataset/merged_data.csv')
merged_data.head()

Unnamed: 0,home_ownership,annual_inc,loan_amnt,dti,emp_length,label
0,MORTGAGE,55000.0,3600.0,0.1998,10+ years,1
1,MORTGAGE,65000.0,24700.0,0.1998,10+ years,1
2,MORTGAGE,63000.0,20000.0,0.1998,10+ years,1
3,MORTGAGE,110000.0,35000.0,0.1998,10+ years,1
4,MORTGAGE,104433.0,10400.0,0.1998,3 years,1


# 2. Feature Engineering

In [10]:
# One-hot encoding categorical columns ( encoding to categorical variables)
encoded_data = pd.get_dummies(merged_data, columns=['home_ownership', 'emp_length'])

# Splitting data and scaling features (split our data into training and test sets and scale, to normalizing data)
X = encoded_data.drop(columns=['label']) 
y = merged_data['label']

X = X.to_numpy()  # Convert to NumPy array if X is a DataFrame
y = y.to_numpy()  # Convert to NumPy array if y is a DataFrame

# Shuffle the data
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
X_shuffled = X[indices]
y_shuffled = y[indices]

# Calculate the split index
test_size = 0.2  # 20% for testing
split_index = int(X.shape[0] * (1 - test_size))

# Split the data
X_train, X_test = X_shuffled[:split_index], X_shuffled[split_index:]
y_train, y_test = y_shuffled[:split_index], y_shuffled[split_index:]

# 3. Model Training and Evaluation

In [11]:
#Convert the Data Types
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')

In [12]:
# Check for NaN values
print(np.isnan(X_train).any(), np.isnan(X_test).any(), np.isnan(y_train).any(), np.isnan(y_test).any())

True True False False


In [13]:
# Replace NaN values if they exist
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

In [14]:
# Model building using TensorFlow
model = Sequential()
model.add(Dense(1, activation='sigmoid', input_shape=(X_train.shape[1],)))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x28c029780>

# 4. Return value to the user

In [15]:
def generate_random_user_and_predict():
    # Define possible values for categorical features
    home_ownerships = ['MORTGAGE', 'RENT', 'OWN']
    emp_lengths = ['< 1 year', '1 year', '2 years', '3 years', '4 years', '5 years', 
                   '6 years', '7 years', '8 years', '9 years', '10+ years']

    # Randomly select values for each feature
    home_ownership = random.choice(home_ownerships)
    annual_inc = random.uniform(20000, 150000)  # Random income between 20k and 150k
    loan_amnt = random.uniform(500, 40000)  # Random loan amount between $500 and $40k
    dti = random.uniform(0.1, 0.9)  # Random DTI between 10% and 90%
    emp_length = random.choice(emp_lengths)

    # Create a dictionary for the user
    user_data = {
        'home_ownership': home_ownership,
        'annual_inc': annual_inc,
        'loan_amnt': loan_amnt,
        'dti': dti,
        'emp_length': emp_length
    }

    # Convert to a DataFrame
    user_df = pd.DataFrame([user_data])

    # One-hot encoding for categorical features
    user_encoded = pd.get_dummies(user_df, columns=['home_ownership', 'emp_length'])

    # TODO: Add any necessary scaling or further processing here

    # Mock prediction (randomly assigning acceptance or rejection)
    prediction = random.choice([0, 1])

    # Output based on the mock prediction
    if prediction == 1:
        print(f"""Generated user with:
Home Ownership: {home_ownership}
Annual Income: ${annual_inc:.2f}
Loan Amount: ${loan_amnt:.2f}
DTI: {dti*100:.2f}%
Employment Length: {emp_length}

Prediction: Loan ACCEPTED""")
    else:
        print(f"""Generated user with:
Home Ownership: {home_ownership}
Annual Income: ${annual_inc:.2f}
Loan Amount: ${loan_amnt:.2f}
DTI: {dti*100:.2f}%
Employment Length: {emp_length}

Prediction: Loan REJECTED""")

# Example usage
generate_random_user_and_predict()


Generated user with:
Home Ownership: MORTGAGE
Annual Income: $120858.45
Loan Amount: $16371.13
DTI: 34.65%
Employment Length: 4 years

Prediction: Loan ACCEPTED


# Save the model for conversion

In [16]:
# Save the model for deployment on the cloud
model.save('/Users/eduardoangeli/Documents/GitHub/smartcredit/TF_Model.h5')


  saving_api.save_model(
