In [27]:
import pandas as pd

# Load the uploaded dataset
file_path = 'financial_risk_assessment.csv'
data = pd.read_csv(file_path)

# Display the first few rows to understand its structure
data.head()


Unnamed: 0,Age,Gender,Education Level,Marital Status,Income,Credit Score,Loan Amount,Loan Purpose,Employment Status,Years at Current Job,Payment History,Debt-to-Income Ratio,Assets Value,Number of Dependents,City,State,Country,Previous Defaults,Marital Status Change,Risk Rating
0,49,Male,PhD,Divorced,72799.0,688.0,45713.0,Business,Unemployed,19,Poor,0.154313,120228.0,0.0,Port Elizabeth,AS,Cyprus,2.0,2,Low
1,57,Female,Bachelor's,Widowed,,690.0,33835.0,Auto,Employed,6,Fair,0.14892,55849.0,0.0,North Catherine,OH,Turkmenistan,3.0,2,Medium
2,21,Non-binary,Master's,Single,55687.0,600.0,36623.0,Home,Employed,8,Fair,0.362398,180700.0,3.0,South Scott,OK,Luxembourg,3.0,2,Medium
3,59,Male,Bachelor's,Single,26508.0,622.0,26541.0,Personal,Unemployed,2,Excellent,0.454964,157319.0,3.0,Robinhaven,PR,Uganda,4.0,2,Medium
4,25,Non-binary,Bachelor's,Widowed,49427.0,766.0,36528.0,Personal,Unemployed,10,Fair,0.143242,287140.0,,New Heather,IL,Namibia,3.0,1,Low


In [28]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer

# Drop rows where Loan Purpose (the target) is missing
data_clean = data.dropna(subset=['Loan Purpose'])

# Encode categorical features
label_encoders = {}
categorical_cols = ['Gender', 'Education Level', 'Marital Status', 'Employment Status', 'Payment History', 'City', 'State', 'Country', 'Risk Rating', 'Loan Purpose']

for col in categorical_cols:
    le = LabelEncoder()
    data_clean[col] = le.fit_transform(data_clean[col].astype(str))
    label_encoders[col] = le

# Impute missing values for numerical columns
imputer = SimpleImputer(strategy='mean')
numerical_cols = ['Age', 'Income', 'Credit Score', 'Loan Amount', 'Years at Current Job', 'Debt-to-Income Ratio', 'Assets Value', 'Number of Dependents', 'Previous Defaults', 'Marital Status Change']

data_clean[numerical_cols] = imputer.fit_transform(data_clean[numerical_cols])

# Separate features (X) and target (y)
X = data_clean.drop(columns=['Loan Purpose'])
y = data_clean['Loan Purpose']

# Normalize the numerical data
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the processed data shape
X_train.shape, X_val.shape, y_train.shape, y_val.shape


((12000, 19), (3000, 19), (12000,), (3000,))

In [30]:
# Build the neural network model
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(len(data_clean['Loan Purpose'].unique()), activation='softmax')
])


# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val)
print('Validation accuracy:', accuracy)

# Print training and validation accuracy for each epoch
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

for epoch in range(len(train_accuracy)):
    print(f"Epoch {epoch+1}: Training Accuracy = {train_accuracy[epoch]*100:.2f}%, Validation Accuracy = {val_accuracy[epoch]*100:.2f}%")


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.2451 - loss: 25.7511 - val_accuracy: 0.2640 - val_loss: 3.5868
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.2471 - loss: 3.6842 - val_accuracy: 0.2400 - val_loss: 1.9484
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2600 - loss: 3.4187 - val_accuracy: 0.2500 - val_loss: 1.9586
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2431 - loss: 2.9286 - val_accuracy: 0.2543 - val_loss: 1.8762
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2554 - loss: 2.2135 - val_accuracy: 0.2367 - val_loss: 1.6820
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2498 - loss: 2.1645 - val_accuracy: 0.2520 - val_loss: 1.8427
Epoch 7/10
[1m375/375[0m [32m━━━━━━