# **Credit Scoring Model**

In [None]:
import pandas as pd

df = pd.read_csv('bank.csv')
display(df)

## Data Preparation

### Preprocessing (One-hot encoding)

In [None]:
dummy_df = pd.get_dummies(df, columns=['job', 'marital', 'education', 'contact', 'month', 'poutcome'])


In [None]:
dummy_df['default'] = dummy_df['default'].map({'no':0,'yes':1})
dummy_df['housing'] = dummy_df['housing'].map({'no':0,'yes':1})
dummy_df['loan'] = dummy_df['loan'].map({'no':0,'yes':1})
dummy_df['y'] = dummy_df['y'].map({'no':0,'yes':1})

### Data Separation into x and y

In [None]:
y_temp = df['y']
y = dummy_df['y']
y

In [None]:
x_temp = df.drop('y',axis=1)
x = dummy_df.drop('y',axis=1)
x

### Split into 80% train and 20% test data

In [None]:
from sklearn.model_selection import train_test_split
x_train_temp, x_test_temp, y_train_temp, y_test_temp = train_test_split(x_temp,y_temp,test_size=0.2,random_state=100)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=100)

"""     random_state : int, RandomState instance or None, default=None
        Controls the shuffling applied to the data before applying the split.
        Pass an int for reproducible output across multiple function calls.
        
        Use a new random number generator seeded by the given integer. 
        Using an int will produce the same results across different calls. 
        However, it may be worthwhile checking that your results are stable 
        across a number of different distinct random seeds. Popular integer 
        random seeds are 0 and 42. Integer values must be in the range [0, 2**32 - 1].
        The seed is a starting point for a sequence of pseudorandom numbers. 
        If you start from the same seed, you get the very same sequence.
"""

## Building the Model

### Training the Model

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(x_train,y_train)

In [None]:
len(x_test)

In [None]:
x_test.reset_index(drop=True, inplace=True)
x_test_temp.reset_index(drop=True, inplace=True)

In [None]:
y_rf_pred = rf.predict(x_test)

In [None]:
y_rf_pred

In [None]:
prediction = pd.DataFrame(y_rf_pred)
prediction

In [None]:
prediction.columns = ['Loan Approved']
prediction

In [None]:
prediction['Loan Approved'] = prediction['Loan Approved'].map({0:'no',1:'yes'})
prediction

In [None]:
test_table_prediction = pd.concat([x_test_temp, prediction],axis=1).reset_index(drop=True)
test_table_prediction

## Checking accuracy and precision

In [None]:
from sklearn.metrics import accuracy_score,precision_score

In [None]:
accuracy = accuracy_score(y_test, y_rf_pred)
precision = precision_score(y_test, y_rf_pred)
print("Accuracy:", accuracy)
print("Precision:",precision)