### Goldsmiths University of London
### Author....: Carlos Manuel de Oliveira Alves
### Student...: cdeol003
### Created...: 24/02/2023
### FYP.......: NeuroCredit

In [72]:
# Import libraries to split the dataset into training and test sets
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset with random data with loan records
df = pd.read_csv("data.csv")

# Encode categorical variables from the dataset
df_encoded = pd.get_dummies(df)

# Split dataset into training and testing sets
X = df_encoded.drop(["approval_status_Approved", "approval_status_Rejected"], axis=1)
y = df_encoded["approval_status_Approved"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the numerical variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

# Predict the approval status for new input variables - test data
# Good,Employed,Other,Excellent,Mortgage,Other,Other,34017,18704,73,13,2,5,0,6316,6875,22,2067,5985,Approved
new_data = pd.DataFrame({
    'credit_history': 'Good',
    'employment_status': 'Employed',
    'collateral': 'Other',
    'payment_history': 'Excellent',
    'type_of_credit_accounts': 'Mortgage',
    'public_records_and_collections': 'Other',
    'purpose_of_loan': 'Other',
    'income': 34017,
    'assets_value': 18704,
    'debt_to_income_ratio': 73,
    'length_of_credit_history': 13,
    'number_of_credit_inquiries': 2,
    'number_of_credit_accounts': 5,
    'number_of_credit_accounts_opened_last_12_months': 0,
    'current_balance_of_credit_accounts': 6316,
    'total_credit_limit': 6875,
    'total_credit_utilization': 22,
    'loan_amount': 2067,
    'saving_account_balance': 5985
}, index=[0])

# Encode the new data
new_data_encoded = pd.get_dummies(new_data)

# Ensure that new_data_encoded has the same columns as X_train
new_data_encoded = new_data_encoded.reindex(columns=X_train.columns, fill_value=0)

# Scale the new data
new_data_scaled = scaler.transform(new_data_encoded)

# Predict the approval status
approval_prediction = lr.predict(new_data_scaled)
approval_probability = lr.predict_proba(new_data_scaled)[:, 1]

# Print the approval prediction
if approval_prediction == 0:
    if approval_probability > 0.4:
        print(">> The loan is approved")
    else:
        print(">> The loan is rejected")
else:
    if approval_probability < 0.4:
        print(">> The loan is rejected")
    else:
        print(">> The loan is approved")

# Print the approval probability
print("Approval Probability..: {:.2f}%".format(approval_probability.item() * 100))

# Evaluate the model accuracy
print("Accuracy of the Model.: {:.2f}%".format(lr.score(X_test_scaled, y_test) * 100))


>> The loan is approved
Approval Probability..: 47.89%
Accuracy of the Model.: 87.00%


In [69]:
# Import libraries to split the dataset into training and test sets
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset with random data with loan records
df = pd.read_csv("data.csv")

# Encode categorical variables from the dataset
df_encoded = pd.get_dummies(df)

# Split dataset into training and testing sets
X = df_encoded.drop(["approval_status_Approved", "approval_status_Rejected"], axis=1)
y = df_encoded["approval_status_Approved"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the numerical variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

# Predict the approval status for new input variables - test data 2
# Good,Self-Employed,House,Fair,Personal,Bankruptcy,Debt Consolidation,12769,12859,91,27,4,1,0,7840,17289,50,3130,4953,Rejected
new_data = pd.DataFrame({
    'credit_history': 'Good',
    'employment_status': 'Self-Employed',
    'collateral': 'House',
    'payment_history': 'Fair',
    'type_of_credit_accounts': 'Personal',
    'public_records_and_collections': 'Bankruptcy',
    'purpose_of_loan': 'Debt Consolidation',
    'income': 12769,
    'assets_value': 12859,
    'debt_to_income_ratio': 91,
    'length_of_credit_history': 27,
    'number_of_credit_inquiries': 4,
    'number_of_credit_accounts': 1,
    'number_of_credit_accounts_opened_last_12_months': 0,
    'current_balance_of_credit_accounts': 7840,
    'total_credit_limit': 17289,
    'total_credit_utilization': 50,
    'loan_amount': 3130,
    'saving_account_balance': 4953
}, index=[0])

# Encode the new data
new_data_encoded = pd.get_dummies(new_data)

# Ensure that new_data_encoded has the same columns as X_train
new_data_encoded = new_data_encoded.reindex(columns=X_train.columns, fill_value=0)

# Scale the new data
new_data_scaled = scaler.transform(new_data_encoded)

# Predict the approval status
approval_prediction = lr.predict(new_data_scaled)
approval_probability = lr.predict_proba(new_data_scaled)[:, 1]

# Print the approval prediction
if approval_prediction == 0:
    if approval_probability > 0.5:
        print(">> The loan is approved")
    else:
        print(">> The loan is rejected")
else:
    if approval_probability < 0.5:
        print(">> The loan is rejected")
    else:
        print(">> The loan is approved")

# Print the approval probability
print("Approval Probability..: {:.2f}%".format(approval_probability.item() * 100))

# Evaluate the model accuracy
print("Accuracy of the Model.: {:.2f}%".format(lr.score(X_test_scaled, y_test) * 100))


>> The loan is rejected
Approval Probability..: 8.84%
Accuracy of the Model.: 87.00%


In [68]:
# Import libraries to split the dataset into training and test sets
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset with random data with loan records
df = pd.read_csv("data.csv")

# Encode categorical variables from the dataset
df_encoded = pd.get_dummies(df)

# Split dataset into training and testing sets
X = df_encoded.drop(["approval_status_Approved", "approval_status_Rejected"], axis=1)
y = df_encoded["approval_status_Approved"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the numerical variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

# Predict the approval status for new input variables - test data 3
# Good,Employed,Other,Poor,Student,None,Home Improvement,18561,23741,12,100,1,4,2,18277,18047,33,2411,16367,Approved
new_data = pd.DataFrame({
    'credit_history': 'Good',
    'employment_status': 'Employed',
    'collateral': 'Other',
    'payment_history': 'Poor',
    'type_of_credit_accounts': 'Student',
    'public_records_and_collections': 'None',
    'purpose_of_loan': 'Home Improvement',
    'income': 18561,
    'assets_value': 23741,
    'debt_to_income_ratio': 12,
    'length_of_credit_history': 100,
    'number_of_credit_inquiries': 1,
    'number_of_credit_accounts': 4,
    'number_of_credit_accounts_opened_last_12_months': 2,
    'current_balance_of_credit_accounts': 18277,
    'total_credit_limit': 18047,
    'total_credit_utilization': 33,
    'loan_amount': 2411,
    'saving_account_balance': 16367
}, index=[0])

# Encode the new data
new_data_encoded = pd.get_dummies(new_data)

# Ensure that new_data_encoded has the same columns as X_train
new_data_encoded = new_data_encoded.reindex(columns=X_train.columns, fill_value=0)

# Scale the new data
new_data_scaled = scaler.transform(new_data_encoded)

# Predict the approval status
approval_prediction = lr.predict(new_data_scaled)
approval_probability = lr.predict_proba(new_data_scaled)[:, 1]

# Print the approval prediction
if approval_prediction == 0:
    if approval_probability > 0.5:
        print(">> The loan is approved")
    else:
        print(">> The loan is rejected")
else:
    if approval_probability < 0.5:
        print(">> The loan is rejected")
    else:
        print(">> The loan is approved")

# Print the approval probability
print("Approval Probability..: {:.2f}%".format(approval_probability.item() * 100))

# Evaluate the model accuracy
print("Accuracy of the Model.: {:.2f}%".format(lr.score(X_test_scaled, y_test) * 100))


>> The loan is approved
Approval Probability..: 66.63%
Accuracy of the Model.: 87.00%


In [67]:
# Import libraries to split the dataset into training and test sets
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset with random data with loan records
df = pd.read_csv("data.csv")

# Encode categorical variables from the dataset
df_encoded = pd.get_dummies(df)

# Split dataset into training and testing sets
X = df_encoded.drop(["approval_status_Approved", "approval_status_Rejected"], axis=1)
y = df_encoded["approval_status_Approved"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the numerical variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

# Predict the approval status for new input variables - test data 4
# Good,Self-Employed,Other,Good,Auto,Other,Other,18477,35372,30,51,2,4,3,13319,2090,26,3460,17630,Approved
new_data = pd.DataFrame({
    'credit_history': 'Good',
    'employment_status': 'Self-Employed',
    'collateral': 'Other',
    'payment_history': 'Good',
    'type_of_credit_accounts': 'Auto',
    'public_records_and_collections': 'Other',
    'purpose_of_loan': 'Other',
    'income': 18477,
    'assets_value': 35372,
    'debt_to_income_ratio': 30,
    'length_of_credit_history': 51,
    'number_of_credit_inquiries': 2,
    'number_of_credit_accounts': 4,
    'number_of_credit_accounts_opened_last_12_months': 3,
    'current_balance_of_credit_accounts': 13319,
    'total_credit_limit': 2090,
    'total_credit_utilization': 26,
    'loan_amount': 3460,
    'saving_account_balance': 17630
}, index=[0])

# Encode the new data
new_data_encoded = pd.get_dummies(new_data)

# Ensure that new_data_encoded has the same columns as X_train
new_data_encoded = new_data_encoded.reindex(columns=X_train.columns, fill_value=0)

# Scale the new data
new_data_scaled = scaler.transform(new_data_encoded)

# Predict the approval status
approval_prediction = lr.predict(new_data_scaled)
approval_probability = lr.predict_proba(new_data_scaled)[:, 1]

# Print the approval prediction
if approval_prediction == 0:
    if approval_probability > 0.5:
        print(">> The loan is approved")
    else:
        print(">> The loan is rejected")
else:
    if approval_probability < 0.5:
        print(">> The loan is rejected")
    else:
        print(">> The loan is approved")

# Print the approval probability
print("Approval Probability..: {:.2f}%".format(approval_probability.item() * 100))

# Evaluate the model accuracy
print("Accuracy of the Model.: {:.2f}%".format(lr.score(X_test_scaled, y_test) * 100))

>> The loan is approved
Approval Probability..: 79.76%
Accuracy of the Model.: 87.00%


In [66]:
# Import libraries to split the dataset into training and test sets
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset with random data with loan records
df = pd.read_csv("data.csv")

# Encode categorical variables from the dataset
df_encoded = pd.get_dummies(df)

# Split dataset into training and testing sets
X = df_encoded.drop(["approval_status_Approved", "approval_status_Rejected"], axis=1)
y = df_encoded["approval_status_Approved"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the numerical variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

# Predict the approval status for new input variables - test data 5
# Excellent,Employed,Land,Fair,Student,Bankruptcy,Home Improvement,41282,13191,38,54,5,3,3,15807,5887,92,3152,19206,Rejected
new_data = pd.DataFrame({
    'credit_history': 'Excellent',
    'employment_status': 'Employed',
    'collateral': 'Land',
    'payment_history': 'Fair',
    'type_of_credit_accounts': 'Student',
    'public_records_and_collections': 'Bankruptcy',
    'purpose_of_loan': 'Home Improvement',
    'income': 41282,
    'assets_value': 13191,
    'debt_to_income_ratio': 38,
    'length_of_credit_history': 54,
    'number_of_credit_inquiries': 5,
    'number_of_credit_accounts': 3,
    'number_of_credit_accounts_opened_last_12_months': 3,
    'current_balance_of_credit_accounts': 15807,
    'total_credit_limit': 5887,
    'total_credit_utilization': 92,
    'loan_amount': 3152,
    'saving_account_balance': 19206
}, index=[0])

# Encode the new data
new_data_encoded = pd.get_dummies(new_data)

# Ensure that new_data_encoded has the same columns as X_train
new_data_encoded = new_data_encoded.reindex(columns=X_train.columns, fill_value=0)

# Scale the new data
new_data_scaled = scaler.transform(new_data_encoded)

# Predict the approval status
approval_prediction = lr.predict(new_data_scaled)
approval_probability = lr.predict_proba(new_data_scaled)[:, 1]

# Print the approval prediction
if approval_prediction == 0:
    if approval_probability > 0.5:
        print(">> The loan is approved")
    else:
        print(">> The loan is rejected")
else:
    if approval_probability < 0.5:
        print(">> The loan is rejected")
    else:
        print(">> The loan is approved")

# Print the approval probability
print("Approval Probability..: {:.2f}%".format(approval_probability.item() * 100))

# Evaluate the model accuracy
print("Accuracy of the Model.: {:.2f}%".format(lr.score(X_test_scaled, y_test) * 100))

>> The loan is rejected
Approval Probability..: 0.33%
Accuracy of the Model.: 87.00%
