### Goldsmiths University of London
### Author....: Carlos Manuel de Oliveira Alves
### Student...: cdeol003
### Created...: 24/02/2023
### FYP.......: NeuroCredit

In [36]:
# Import libraries to split the dataset into training and test sets
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset with random data with loan records
df = pd.read_csv("data.csv")

# Encode categorical variables from the dataset
df_encoded = pd.get_dummies(df)

# Split dataset into training and testing sets
X = df_encoded.drop(["approval_status_Approved", "approval_status_Rejected"], axis=1)
y = df_encoded["approval_status_Approved"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the numerical variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

# Predict the approval status for new input variables
new_data = pd.DataFrame({
    'credit_history': 'Good',
    'employment_status': 'Employed',
    'collateral': 'Other',
    'payment_history': 'Excellent',
    'type_of_credit_accounts': 'Mortgage',
    'public_records_and_collections': 'Other',
    'purpose_of_loan': 'Other',
    'income': 34017,
    'assets_value': 18704,
    'debt_to_income_ratio': 73,
    'length_of_credit_history': 13,
    'number_of_credit_inquiries': 2,
    'number_of_credit_accounts': 5,
    'number_of_credit_accounts_opened_last_12_months': 0,
    'current_balance_of_credit_accounts': 6316,
    'total_credit_limit': 6875,
    'total_credit_utilization': 22,
    'loan_amount': 2067,
    'saving_account_balance': 5985
}, index=[0])

# Encode the new data
new_data_encoded = pd.get_dummies(new_data)

# Ensure that new_data_encoded has the same columns as X_train
new_data_encoded = new_data_encoded.reindex(columns=X_train.columns, fill_value=0)

# Scale the new data
new_data_scaled = scaler.transform(new_data_encoded)

# Predict the approval status
approval_prediction = lr.predict(new_data_scaled)
approval_probability = lr.predict_proba(new_data_scaled)[:, 1]

if approval_prediction == 0:
    print("The loan is approved")
else:
    print("The loan is rejected")

# Print the approval probability
print("Approval Probability: {:.2f}%".format(approval_probability.item() * 100))


The loan is approved
Approval Probability: 47.89%
