In [28]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [29]:
# Loading the dataset
data = pd.read_csv("train.csv")

In [30]:
# Data preprocessing
data = data.dropna(how='any')
data = data.drop(['Loan_ID'], axis=1)

In [31]:
le = LabelEncoder()
data['Gender'] = le.fit_transform(data['Gender'])
data['Married'] = le.fit_transform(data['Married'])
data['Dependents'] = le.fit_transform(data['Dependents'])
data['Education'] = le.fit_transform(data['Education'])
data['Self_Employed'] = le.fit_transform(data['Self_Employed'])
data['Property_Area'] = le.fit_transform(data['Property_Area'])

In [32]:
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [33]:
sc = StandardScaler()
X = sc.fit_transform(X)

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [35]:
# Applying machine learning algorithms
models = []
models.append(('LR', LogisticRegression()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('RF', RandomForestClassifier()))

In [36]:
results = []
names = []

In [37]:
for name, model in models:
    cv_results = cross_val_score(model, X_train, y_train, cv=10, scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    print(f'{name}: {cv_results.mean()}')

LR: 0.8159536541889482
DT: 0.7411764705882353
RF: 0.8216577540106951


In [38]:
# Evaluating the model performance
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [39]:
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))

Accuracy: 0.7708333333333334
              precision    recall  f1-score   support

           N       0.83      0.41      0.55        49
           Y       0.76      0.96      0.85        95

    accuracy                           0.77       144
   macro avg       0.80      0.68      0.70       144
weighted avg       0.78      0.77      0.74       144

