In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load the dataset
data = pd.read_csv('german_credit_data (1).csv')
data

Unnamed: 0.1,Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose
0,0,67,male,2,own,,little,1169,6,radio/TV
1,1,22,female,2,own,little,moderate,5951,48,radio/TV
2,2,49,male,1,own,little,,2096,12,education
3,3,45,male,2,free,little,little,7882,42,furniture/equipment
4,4,53,male,2,free,little,little,4870,24,car
...,...,...,...,...,...,...,...,...,...,...
995,995,31,female,1,own,little,,1736,12,furniture/equipment
996,996,40,male,3,own,little,little,3857,30,car
997,997,38,male,2,own,little,,804,12,radio/TV
998,998,23,male,2,free,little,little,1845,45,radio/TV


In [3]:
data.columns


Index(['Unnamed: 0', 'Age', 'Sex', 'Job', 'Housing', 'Saving accounts',
       'Checking account', 'Credit amount', 'Duration', 'Purpose'],
      dtype='object')

In [4]:
data.isnull().sum()

Unnamed: 0            0
Age                   0
Sex                   0
Job                   0
Housing               0
Saving accounts     183
Checking account    394
Credit amount         0
Duration              0
Purpose               0
dtype: int64

In [5]:
saving_accounts_mode = data['Saving accounts'].mode()[0]
data['Saving accounts'].fillna(saving_accounts_mode, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Saving accounts'].fillna(saving_accounts_mode, inplace=True)


In [6]:
Checking_account_mode = data['Checking account'].mode()[0]
data['Checking account'].fillna(Checking_account_mode, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Checking account'].fillna(Checking_account_mode, inplace=True)


In [7]:
data.isnull().sum()

Unnamed: 0          0
Age                 0
Sex                 0
Job                 0
Housing             0
Saving accounts     0
Checking account    0
Credit amount       0
Duration            0
Purpose             0
dtype: int64

In [8]:
from sklearn.preprocessing import LabelEncoder
cols = ['Sex','Housing','Saving accounts','Checking account','Purpose']
le = LabelEncoder()

for col in cols:
    data[col] = le.fit_transform(data[col])

data

Unnamed: 0.1,Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose
0,0,67,1,2,1,0,0,1169,6,5
1,1,22,0,2,1,0,1,5951,48,5
2,2,49,1,1,1,0,0,2096,12,3
3,3,45,1,2,0,0,0,7882,42,4
4,4,53,1,2,0,0,0,4870,24,1
...,...,...,...,...,...,...,...,...,...,...
995,995,31,0,1,1,0,0,1736,12,4
996,996,40,1,3,1,0,0,3857,30,1
997,997,38,1,2,1,0,0,804,12,5
998,998,23,1,2,0,0,0,1845,45,5


In [9]:
X = data.drop('Credit amount', axis=1)
y = data['Credit amount']

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [12]:
y_pred = model.predict(X_test)

In [13]:
data = pd.DataFrame({
              'Creadit Amount':y_test,
              'Predictions':y_pred})
data

Unnamed: 0,Creadit Amount,Predictions
521,3190,2087.57
737,4380,2769.40
740,2325,2477.01
660,1297,2091.00
411,7253,4315.94
...,...,...
408,3235,3319.61
332,7408,10372.37
208,6568,2799.89
613,3632,1966.74
