## 1. Importing important Libraries

In [1]:
import pandas as pd

## 2. Importing data

In [2]:
data1 = pd.read_csv('Fraud_check (1).csv',sep=',')
data1.head()

Unnamed: 0,Undergrad,Marital.Status,Taxable.Income,City.Population,Work.Experience,Urban
0,NO,Single,68833,50047,10,YES
1,YES,Divorced,33700,134075,18,YES
2,NO,Married,36925,160205,30,YES
3,YES,Single,50190,193264,15,YES
4,NO,Married,81002,27533,28,NO


## 3. Data Understanding

In [3]:
data1.shape

(600, 6)

In [4]:
data1.isnull().sum()

Undergrad          0
Marital.Status     0
Taxable.Income     0
City.Population    0
Work.Experience    0
Urban              0
dtype: int64

In [5]:
data1.dtypes

Undergrad          object
Marital.Status     object
Taxable.Income      int64
City.Population     int64
Work.Experience     int64
Urban              object
dtype: object

## 4. Data Preparation

In [6]:
y = []
for itr in data1['Taxable.Income']:
    if itr <= 30000:
        y.append('Risky')
    else:
        y.append('Good')
data1['y'] = y

In [7]:
data1.head()

Unnamed: 0,Undergrad,Marital.Status,Taxable.Income,City.Population,Work.Experience,Urban,y
0,NO,Single,68833,50047,10,YES,Good
1,YES,Divorced,33700,134075,18,YES,Good
2,NO,Married,36925,160205,30,YES,Good
3,YES,Single,50190,193264,15,YES,Good
4,NO,Married,81002,27533,28,NO,Good


In [8]:
data1.y.unique()

array(['Good', 'Risky'], dtype=object)

In [9]:
data1.isnull().sum()

Undergrad          0
Marital.Status     0
Taxable.Income     0
City.Population    0
Work.Experience    0
Urban              0
y                  0
dtype: int64

In [10]:
data1.dtypes

Undergrad          object
Marital.Status     object
Taxable.Income      int64
City.Population     int64
Work.Experience     int64
Urban              object
y                  object
dtype: object

In [11]:
from sklearn.preprocessing import LabelEncoder

In [12]:
le = LabelEncoder()
data1['Encoded y'] = le.fit_transform(data1['y'])

In [13]:
data1.head()

Unnamed: 0,Undergrad,Marital.Status,Taxable.Income,City.Population,Work.Experience,Urban,y,Encoded y
0,NO,Single,68833,50047,10,YES,Good,0
1,YES,Divorced,33700,134075,18,YES,Good,0
2,NO,Married,36925,160205,30,YES,Good,0
3,YES,Single,50190,193264,15,YES,Good,0
4,NO,Married,81002,27533,28,NO,Good,0


In [14]:
data1.drop(labels=['Taxable.Income','y'],axis=1,inplace=True)

In [15]:
data1['Undergrad'] = le.fit_transform(data1['Undergrad'])
data1['Marital.Status'] = le.fit_transform(data1['Marital.Status'])
data1['Urban'] = le.fit_transform(data1['Marital.Status'])

In [16]:
data1.head()

Unnamed: 0,Undergrad,Marital.Status,City.Population,Work.Experience,Urban,Encoded y
0,0,2,50047,10,2,0
1,1,0,134075,18,0,0
2,0,1,160205,30,1,0
3,1,2,193264,15,2,0
4,0,1,27533,28,1,0


In [17]:
data1['Encoded y'].unique()

array([0, 1])

In [18]:
data1['Undergrad'].unique()

array([0, 1])

In [19]:
data1['Marital.Status'].unique()

array([2, 0, 1])

In [20]:
data1['Urban'].unique()

array([2, 0, 1], dtype=int64)

## 5. Model Building

In [21]:
from sklearn.model_selection import train_test_split

In [22]:
X = data1.drop('Encoded y',axis= 1)
Y = data1[['Encoded y']]

In [23]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.20,random_state = 12)

In [24]:
print(X_train.shape,Y_train.shape)

(480, 5) (480, 1)


In [25]:
print(X_test.shape,Y_test.shape)

(120, 5) (120, 1)


##  6. Model Training

In [26]:
from sklearn.ensemble import RandomForestClassifier

In [27]:
rf_model = RandomForestClassifier(n_estimators=100, criterion='gini',max_depth=None)

In [28]:
rf_model.fit(X_train,Y_train)

  rf_model.fit(X_train,Y_train)


RandomForestClassifier()

## 7. Model Testing

In [29]:
y_pred_train = rf_model.predict(X_train)
y_pred_train

array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,

In [30]:
y_pred_test = rf_model.predict(X_test)
y_pred_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## 7. Model Evaluation

In [31]:
from sklearn.metrics import accuracy_score as ac

In [32]:
ac(Y_train,y_pred_train)

1.0

In [33]:
ac(Y_test,y_pred_test)

0.7583333333333333

In [34]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [35]:
confusion_matrix(Y_train,y_pred_train)

array([[369,   0],
       [  0, 111]], dtype=int64)

In [36]:
confusion_matrix(Y_test,y_pred_test)

array([[91, 16],
       [13,  0]], dtype=int64)