# Problem Statement:
The primary objective of this project is to predict the approval or rejection of credit card applications.
The challenge lies in understanding the key factors influencing credit card approval decisions and
building a predictive model to assist in the decision-making process.

In [2]:
import pandas as pd 
import numpy as np
import matplotlib as plt
import seaborn as sns

In [3]:
test_data=pd.read_csv("test_data.csv")
test_data.head()

Unnamed: 0,ID,Gender,Has a car,Has a property,Children count,Income,Employment status,Education level,Marital status,Dwelling,Age,Employment length,Has a mobile phone,Has a work phone,Has a phone,Has an email,Job title,Family member count,Account age,Is high risk
0,5091261,F,N,Y,0,202500.0,State servant,Secondary / secondary special,Separated,House / apartment,-16834,-1692,1,0,0,0,Medicine staff,1.0,-6.0,0
1,5096963,M,Y,N,0,675000.0,Commercial associate,Higher education,Married,House / apartment,-18126,-948,1,0,1,0,Managers,2.0,-16.0,0
2,5087880,F,N,N,0,234000.0,State servant,Higher education,Civil marriage,House / apartment,-21967,-5215,1,0,0,1,Core staff,2.0,-52.0,0
3,5021949,F,Y,Y,0,445500.0,Commercial associate,Higher education,Married,House / apartment,-12477,-456,1,0,0,0,Managers,2.0,-54.0,0
4,5105705,F,Y,N,0,225000.0,Working,Secondary / secondary special,Married,Municipal apartment,-12155,-667,1,0,0,0,Laborers,2.0,-48.0,0


In [5]:
train_data=pd.read_csv("train_data.csv")
train_data.head()

Unnamed: 0,ID,Gender,Has a car,Has a property,Children count,Income,Employment status,Education level,Marital status,Dwelling,Age,Employment length,Has a mobile phone,Has a work phone,Has a phone,Has an email,Job title,Family member count,Account age,Is high risk
0,5037048,M,Y,Y,0,135000.0,Working,Secondary / secondary special,Married,With parents,-16271,-3111,1,0,0,0,Core staff,2.0,-17.0,0
1,5044630,F,Y,N,1,135000.0,Commercial associate,Higher education,Single / not married,House / apartment,-10130,-1651,1,0,0,0,Accountants,2.0,-1.0,0
2,5079079,F,N,Y,2,180000.0,Commercial associate,Secondary / secondary special,Married,House / apartment,-12821,-5657,1,0,0,0,Laborers,4.0,-38.0,0
3,5112872,F,Y,Y,0,360000.0,Commercial associate,Higher education,Single / not married,House / apartment,-20929,-2046,1,0,0,1,Managers,1.0,-11.0,0
4,5105858,F,N,N,0,270000.0,Working,Secondary / secondary special,Separated,House / apartment,-16207,-515,1,0,1,0,,1.0,-41.0,0


In [17]:
train_data.shape

(29165, 20)

In [6]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29165 entries, 0 to 29164
Data columns (total 20 columns):
ID                     29165 non-null int64
Gender                 29165 non-null object
Has a car              29165 non-null object
Has a property         29165 non-null object
Children count         29165 non-null int64
Income                 29165 non-null float64
Employment status      29165 non-null object
Education level        29165 non-null object
Marital status         29165 non-null object
Dwelling               29165 non-null object
Age                    29165 non-null int64
Employment length      29165 non-null int64
Has a mobile phone     29165 non-null int64
Has a work phone       29165 non-null int64
Has a phone            29165 non-null int64
Has an email           29165 non-null int64
Job title              20138 non-null object
Family member count    29165 non-null float64
Account age            29165 non-null float64
Is high risk           29165 non-null int

In [7]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7292 entries, 0 to 7291
Data columns (total 20 columns):
ID                     7292 non-null int64
Gender                 7292 non-null object
Has a car              7292 non-null object
Has a property         7292 non-null object
Children count         7292 non-null int64
Income                 7292 non-null float64
Employment status      7292 non-null object
Education level        7292 non-null object
Marital status         7292 non-null object
Dwelling               7292 non-null object
Age                    7292 non-null int64
Employment length      7292 non-null int64
Has a mobile phone     7292 non-null int64
Has a work phone       7292 non-null int64
Has a phone            7292 non-null int64
Has an email           7292 non-null int64
Job title              4996 non-null object
Family member count    7292 non-null float64
Account age            7292 non-null float64
Is high risk           7292 non-null int64
dtypes: float64(3),

In [16]:
train_data.describe()

Unnamed: 0,ID,Children count,Income,Age,Employment length,Has a mobile phone,Has a work phone,Has a phone,Has an email,Family member count,Account age,Is high risk
count,29165.0,29165.0,29165.0,29165.0,29165.0,29165.0,29165.0,29165.0,29165.0,29165.0,29165.0,29165.0
mean,5078232.0,0.43079,186890.4,-15979.47749,59257.761255,1.0,0.22431,0.294977,0.090279,2.197531,-26.137734,0.01711
std,41824.0,0.741882,101409.6,4202.997485,137655.883458,0.0,0.417134,0.45604,0.286587,0.912189,16.486702,0.129682
min,5008804.0,0.0,27000.0,-25152.0,-15713.0,1.0,0.0,0.0,0.0,1.0,-60.0,0.0
25%,5042047.0,0.0,121500.0,-19444.0,-3153.0,1.0,0.0,0.0,0.0,2.0,-39.0,0.0
50%,5074666.0,0.0,157500.0,-15565.0,-1557.0,1.0,0.0,0.0,0.0,2.0,-24.0,0.0
75%,5114629.0,1.0,225000.0,-12475.0,-412.0,1.0,0.0,1.0,0.0,3.0,-12.0,0.0
max,5150485.0,19.0,1575000.0,-7705.0,365243.0,1.0,1.0,1.0,1.0,20.0,0.0,1.0


In [14]:
test_data.describe()

Unnamed: 0,ID,Children count,Income,Age,Employment length,Has a mobile phone,Has a work phone,Has a phone,Has an email,Family member count,Account age,Is high risk
count,7292.0,7292.0,7292.0,7292.0,7292.0,7292.0,7292.0,7292.0,7292.0,7292.0,7292.0,7292.0
mean,5078209.0,0.428415,185867.2,-15957.958722,59283.630691,1.0,0.230389,0.294158,0.087493,2.202139,-26.270022,0.016045
std,42082.43,0.74435,103296.4,4190.99001,137642.577749,0.0,0.421111,0.455695,0.282576,0.909726,16.563032,0.125657
min,5008809.0,0.0,27000.0,-25152.0,-15661.0,1.0,0.0,0.0,0.0,1.0,-60.0,0.0
25%,5041912.0,0.0,117000.0,-19382.0,-3141.0,1.0,0.0,0.0,0.0,2.0,-39.0,0.0
50%,5069416.0,0.0,157500.0,-15522.0,-1534.0,1.0,0.0,0.0,0.0,2.0,-24.0,0.0
75%,5115503.0,1.0,225000.0,-12454.0,-397.0,1.0,0.0,1.0,0.0,3.0,-12.0,0.0
max,5150487.0,14.0,1575000.0,-7489.0,365243.0,1.0,1.0,1.0,1.0,15.0,0.0,1.0
