<a href="https://colab.research.google.com/github/alvin-98/credit-card-approval-prediction/blob/main/credit_card_approval_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Credit Card Approval Prediction Based on Credit Card Application Data using Logistic Regression 
#####*A DataCamp Project*


####Loading and exploring data

In [28]:
import pandas as pd
ip_data = pd.read_csv("crx.data", header=None)
ip_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,b,30.83,0.0,u,g,w,v,1.25,t,t,1,f,g,202,0,+
1,a,58.67,4.46,u,g,q,h,3.04,t,t,6,f,g,43,560,+
2,a,24.5,0.5,u,g,q,h,1.5,t,f,0,f,g,280,824,+
3,b,27.83,1.54,u,g,w,v,3.75,t,t,5,t,g,100,3,+
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,120,0,+


In [None]:
ip_data_description = ip_data.describe()
print(ip_data_description)

print("\n")

ip_data_info = ip_data.info()
print(ip_data_info)

print("\n")

ip_data.tail(17)

####Pre-processing

In [None]:
import numpy as np

print(ip_data.tail(17))

ip_data = ip_data.replace(to_replace="?", value=np.nan)

print(ip_data.tail(17))

In [None]:
ip_data.fillna(ip_data.mean(), inplace=True)

print(ip_data.isnull().sum())

In [None]:
for col in ip_data.columns:
    if ip_data[col].dtypes == 'object':
        ip_data = ip_data.fillna(ip_data[col].value_counts().index[0])

print(ip_data.isnull().sum())

####Converting non-numeric data into numeric data

In [31]:
from sklearn.preprocessing import LabelEncoder

le=LabelEncoder()

for col in ip_data.columns.to_numpy():
    if ip_data[col].dtypes=='object':
        ip_data[col]=le.fit_transform(ip_data[col])

####Spliting dataset into training and test data

In [32]:
from sklearn.model_selection import train_test_split

ip_data = ip_data.drop([11, 13], axis=1)
ip_data = ip_data.to_numpy()

X,y = ip_data[:,0:12] , ip_data[:,13]

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.33,random_state=42)

####Rescaling values

In [33]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
rescaledX_train = scaler.fit_transform(X_train)
rescaledX_test = scaler.fit_transform(X_test)

####Training logistic regression model

In [None]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()

logreg.fit(rescaledX_train, y_train)

####Determining the accuracy of the model 

In [35]:
from sklearn.metrics import confusion_matrix, accuracy_score

y_pred = logreg.predict(rescaledX_test)

print("Accuracy of logistic regression classifier: ", accuracy_score(y_test, y_pred))

print(confusion_matrix(y_test, y_pred))

Accuracy of logistic regression classifier:  0.8333333333333334
[[92 11]
 [27 98]]


####Grid searching and making the model better

In [36]:
from sklearn.model_selection import GridSearchCV

tol = [0.01, 0.001, 0.0001]
max_iter = [100, 150, 200]

param_grid = dict(tol = [*tol], max_iter = [*max_iter])

In [37]:
grid_model = GridSearchCV(estimator=logreg, param_grid=param_grid, cv=5)

rescaledX = scaler.fit_transform(X)

grid_model_result = grid_model.fit(rescaledX, y)

best_score, best_params = grid_model_result.best_score_, grid_model_result.best_params_
print("Best: %f using %s" % (best_score, best_params))

Best: 0.850725 using {'max_iter': 100, 'tol': 0.01}
