In [None]:
#DEEP LEARNING LOAN DATA PROJECT
# Import necessary libraries
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical operations
import matplotlib.pyplot as plt  # For plotting graphs
import seaborn as sns  # For making statistical graphics
from sklearn.model_selection import train_test_split  # For splitting the dataset into training and testing sets
from sklearn.preprocessing import StandardScaler  # For feature scaling
from sklearn.metrics import confusion_matrix, classification_report  # For model evaluation
import tensorflow as tf  # For building and training neural network models
from tensorflow import keras  # High-level API for TensorFlow

# Load the dataset
df = pd.read_csv('loan_data.csv')  # Reading the dataset from a CSV file

# Data cleaning
df = df.dropna()  # Remove missing values
df = pd.get_dummies(df, columns=['purpose'])  # Convert categorical variable 'purpose' into dummy/indicator variables

# Splitting the dataset into features and target variable
X = df.drop('credit.policy', axis=1)  # Features (independent variables)
y = df['credit.policy']  # Target variable (dependent variable)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 80% training, 20% testing

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit to data, then transform it (for training set)
X_test = scaler.transform(X_test)  # Perform standardization by centering and scaling (for testing set)

# Building the neural network model architecture
model = keras.Sequential([
    keras.layers.Dense(32, activation='relu'),  # First hidden layer with 32 neurons and ReLU activation function
    keras.layers.Dense(16, activation='relu'),  # Second hidden layer with 16 neurons and ReLU activation function
    keras.layers.Dense(1, activation='sigmoid')  # Output layer with 1 neuron (binary classification) and sigmoid activation function
])

# Compiling the model
model.compile(optimizer='adam',  # Optimization algorithm
              loss='binary_crossentropy',  # Loss function for binary classification
              metrics=['accuracy'])  # List of metrics to be evaluated by the model during training and testing

# Training the model
model.fit(X_train, y_train, 
          epochs=100,  # Number of epochs to train the model
          batch_size=32,  # Number of samples per gradient update
          validation_data=(X_test, y_test))  # Data on which to evaluate the loss and any model metrics

# Evaluating the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)  # Returns the loss value & metrics values for the model

# Generating predictions and evaluating the model
y_pred = (model.predict(X_test) > 0.5).astype("int32")  # Predict and convert probabilities to class labels (0 or 1)
print(confusion_matrix(y_test, y_pred))  # Compute confusion matrix to evaluate the accuracy of a classification
print(classification_report(y_test, y_pred))  # Build a text report showing the main classification metrics

