## Imports

In [None]:
# Standard
import json
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os

# Encoding
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

# Scale
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer

# Models
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# Scoring
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from joblib import load


In [None]:
file_name = 'df'
df = pd.read_pickle(f'.\\processed_data\\{file_name}.pki')

In [None]:
df.head()

In [None]:
acc = 0.0
f1 = 0.0
iters = 10

for it in range(iters):
    #===================================General Processing====================================
    r = int.from_bytes(os.urandom(8), byteorder='little') % 4294967295
    train, test = train_test_split(df, test_size=.25, random_state=r)

    train['TotalIncome']=train['ApplicantIncome']+train['CoapplicantIncome']
    train['EMI']=train['LoanAmount']/train['Loan_Amount_Term']
    train['BalanceIncome']=train['TotalIncome']-train['EMI']

    train.drop(['ApplicantIncome','CoapplicantIncome','Loan_Amount_Term','LoanAmount'],axis=1,inplace=True)
    X_train, y_train = train.drop(columns=['Loan_ID', 'Loan_Status']), train['Loan_Status']

    test['TotalIncome']=test['ApplicantIncome']+test['CoapplicantIncome']
    test['EMI']=test['LoanAmount']/test['Loan_Amount_Term']
    test['BalanceIncome']=test['TotalIncome']-test['EMI']

    test.drop(['ApplicantIncome','CoapplicantIncome','Loan_Amount_Term','LoanAmount'],axis=1,inplace=True)
    X_test, y_test = test.drop(columns=['Loan_ID', 'Loan_Status']), test['Loan_Status']


    #========================================Logistic Regression===================================

    # scaler = StandardScaler()
    # X_train_scaled = scaler.fit_transform(X_train)
    # X_test_scaled = scaler.fit_transform(X_test)


    # # clf = LogisticRegression(solver='liblinear', penalty='l1', class_weight='balanced', C=0.11) # Liblinear
    # clf = LogisticRegression(solver='saga', l1_ratio=0.1, penalty='elasticnet', class_weight='balanced', C=0.01) # Saga
    
    # model = clf.fit(X_train_scaled, y_train)
    # y_pred = model.predict(X_test_scaled)


    #=======================================Support Vector Machine====================================
    # scaler = StandardScaler()
    # X_train_scaled = scaler.fit_transform(X_train)
    # X_test_scaled = scaler.fit_transform(X_test)
    # clf = SVC(class_weight='balanced', cache_size=10000, kernel='sigmoid', C = .21, gamma='auto')
    # model = clf.fit(X_train_scaled, y_train) 

    # y_pred = model.predict(X_test_scaled)
    
    
    
    #======================================Bagging Extra Trees============================================
    # base_model = ExtraTreesClassifier(bootstrap=True, oob_score=True, class_weight='balanced_subsample', criterion='gini', max_features='log2', min_samples_leaf=3, n_estimators=100)
    # r = random.randint(1, 101)
    # model = BaggingClassifier(estimator=base_model, n_estimators=100, bootstrap_features=True, random_state=r, n_jobs=-1)
    # model = model.fit(X_train, y_train)
    # y_pred = model.predict(X_test)

    #=====================================Bagging Gradient Boosting======================================
    # r = random.randint(1, 101)
    # model = BaggingClassifier(base_estimator=GradientBoostingClassifier(criterion='friedman_mse', loss='log_loss', max_features='sqrt', n_estimators=100), bootstrap_features=True, random_state=r, n_estimators=50)
    # model = model.fit(X_train, y_train)
    # y_pred = model.predict(X_test)

    #====================================Bagging MLP==============================================
    scaler = StandardScaler()
    X_train_scaled, X_test_scaled = scaler.fit_transform(X_train), scaler.fit_transform(X_test)
    r = random.randint(1, 101)
    model = BaggingClassifier(base_estimator=MLPClassifier(activation='logistic', alpha=.0001, hidden_layer_sizes=200, learning_rate='invscaling', max_iter = 500, solver = 'adam'), bootstrap_features=True, random_state=r, n_estimators=50)
    model = model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)





    curr_f1 = f1_score(y_pred = y_pred, y_true=y_test)
    curr_acc = accuracy_score(y_pred = y_pred, y_true=y_test)
    f1 += curr_f1
    acc += curr_acc
    print(f'It: {it}')
    print(f'Acc: {curr_acc}')
    print(f'F1: {curr_f1}')



print(f'Accuracy of model: {file_name} is: {acc / iters}')
print(f'f1 score of model: {file_name} is: {f1 / iters}')
