In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [2]:
data = pd.read_csv('GermanCredit.csv')

In [3]:
# Checking if any rows are missing data, False will indicate no data missing
data.isnull().any().any()

False

In [4]:
data.sample(5)

Unnamed: 0,Duration,Amount,InstallmentRatePercentage,ResidenceDuration,Age,NumberExistingCredits,NumberPeopleMaintenance,Telephone,ForeignWorker,Class,...,OtherInstallmentPlans.Bank,OtherInstallmentPlans.Stores,OtherInstallmentPlans.None,Housing.Rent,Housing.Own,Housing.ForFree,Job.UnemployedUnskilled,Job.UnskilledResident,Job.SkilledEmployee,Job.Management.SelfEmp.HighlyQualified
134,60,10144,2,4,21,1,1,0,1,Good,...,0,0,1,0,1,0,0,0,1,0
753,30,5771,4,2,25,2,1,1,1,Good,...,0,0,1,0,1,0,0,0,1,0
600,7,2329,1,1,45,1,1,1,1,Good,...,0,0,1,0,1,0,0,0,1,0
419,18,1042,4,2,33,1,1,1,1,Bad,...,0,0,1,0,1,0,0,0,1,0
275,9,2697,1,2,32,1,2,1,1,Good,...,0,0,1,0,1,0,0,0,1,0


In [5]:
data.shape

(1000, 62)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 62 columns):
Duration                                  1000 non-null int64
Amount                                    1000 non-null int64
InstallmentRatePercentage                 1000 non-null int64
ResidenceDuration                         1000 non-null int64
Age                                       1000 non-null int64
NumberExistingCredits                     1000 non-null int64
NumberPeopleMaintenance                   1000 non-null int64
Telephone                                 1000 non-null int64
ForeignWorker                             1000 non-null int64
Class                                     1000 non-null object
CheckingAccountStatus.lt.0                1000 non-null int64
CheckingAccountStatus.0.to.200            1000 non-null int64
CheckingAccountStatus.gt.200              1000 non-null int64
CheckingAccountStatus.none                1000 non-null int64
CreditHistory.NoCredit.Al

In [7]:
# All the columns are int64 except the label class which is categorical
# Lets change the categories to be int64 data type
def checkWorthiness(worthiness):
    if worthiness == "Good":
        return 1
    else:
        return 0

In [8]:
data["Worthiness"] = data["Class"].apply(checkWorthiness)
data.sample(5)

Unnamed: 0,Duration,Amount,InstallmentRatePercentage,ResidenceDuration,Age,NumberExistingCredits,NumberPeopleMaintenance,Telephone,ForeignWorker,Class,...,OtherInstallmentPlans.Stores,OtherInstallmentPlans.None,Housing.Rent,Housing.Own,Housing.ForFree,Job.UnemployedUnskilled,Job.UnskilledResident,Job.SkilledEmployee,Job.Management.SelfEmp.HighlyQualified,Worthiness
680,6,1538,1,2,56,1,1,1,1,Good,...,0,1,0,1,0,0,0,1,0,1
139,12,1881,2,2,44,1,1,0,1,Good,...,0,1,1,0,0,0,1,0,0,1
167,11,1577,4,1,20,1,1,1,1,Good,...,0,1,0,1,0,0,0,1,0,1
85,12,1412,4,2,29,2,1,0,1,Good,...,0,1,0,1,0,0,0,0,1,1
358,12,776,4,2,28,1,1,1,1,Good,...,0,1,0,1,0,0,0,1,0,1


In [9]:
train, test = train_test_split(data, test_size=0.3)

In [10]:
clf = DecisionTreeClassifier()
clf = clf.fit(train[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]], 
              train["Worthiness"])
clf.feature_importances_

array([0.11190238, 0.15621753, 0.02135184, 0.03733511, 0.06023935,
       0.01872175, 0.00260945, 0.01732603, 0.00499894, 0.02021839,
       0.00463902, 0.01306766, 0.10907617, 0.01916366, 0.03053375,
       0.00347926, 0.0059312 , 0.01824377, 0.00189318, 0.00662717,
       0.03806671, 0.00445451, 0.        , 0.00361588, 0.02223422,
       0.        , 0.        , 0.0161767 , 0.        , 0.01141082,
       0.00512401, 0.01058918, 0.00463902, 0.02135234, 0.00347926,
       0.00463902, 0.01546339, 0.03107208, 0.02113916, 0.00187625,
       0.        , 0.01158641, 0.00626267, 0.        , 0.01537001,
       0.01521809, 0.        , 0.00622861, 0.00703286, 0.0052189 ,
       0.        , 0.01082438, 0.        , 0.01406884, 0.        ,
       0.        , 0.00486229, 0.00642326, 0.011037  , 0.00695853,
       0.        ])

In [11]:
predictions = clf.predict(test[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]])
print(classification_report(test["Worthiness"], predictions, target_names=['Good', 'Bad']))

              precision    recall  f1-score   support

        Good       0.58      0.50      0.54        98
         Bad       0.77      0.82      0.80       202

    accuracy                           0.72       300
   macro avg       0.67      0.66      0.67       300
weighted avg       0.71      0.72      0.71       300



In [13]:
# Visualize it

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [14]:
clf = RandomForestClassifier()
clf = clf.fit(train[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]], 
              train["Worthiness"])
predictions = clf.predict(test[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]])
print(classification_report(test["Worthiness"], predictions, target_names=['Good', 'Bad']))

              precision    recall  f1-score   support

        Good       0.56      0.41      0.47        98
         Bad       0.75      0.85      0.79       202

    accuracy                           0.70       300
   macro avg       0.66      0.63      0.63       300
weighted avg       0.69      0.70      0.69       300





In [15]:
clf.feature_importances_

array([0.07201331, 0.09803819, 0.03247832, 0.02656475, 0.10164911,
       0.018106  , 0.01004358, 0.01749629, 0.00089151, 0.01711014,
       0.01332071, 0.01394442, 0.07205245, 0.00758558, 0.01481913,
       0.01952802, 0.01032223, 0.02641344, 0.01319876, 0.01056973,
       0.01222521, 0.01911971, 0.00212216, 0.00453375, 0.00976247,
       0.        , 0.00169224, 0.01148842, 0.00295917, 0.02268941,
       0.01065761, 0.00611349, 0.00040915, 0.02234989, 0.02037395,
       0.01272785, 0.01418902, 0.01139217, 0.00591803, 0.00873939,
       0.01850877, 0.00952878, 0.00618886, 0.        , 0.00680261,
       0.00460469, 0.00820956, 0.01702474, 0.01363852, 0.01061412,
       0.00897937, 0.00883274, 0.00578993, 0.01787817, 0.00997541,
       0.01533494, 0.00620713, 0.0015982 , 0.01058768, 0.01265947,
       0.01142755])

In [16]:
# Try partial dependence plots

In [17]:
from xgboost.sklearn import XGBClassifier

In [18]:
clf = XGBClassifier()
clf = clf.fit(train[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]], 
              train["Worthiness"])
predictions = clf.predict(test[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]])
print(classification_report(test["Worthiness"], predictions, target_names=['Good', 'Bad']))

              precision    recall  f1-score   support

        Good       0.65      0.45      0.53        98
         Bad       0.77      0.88      0.82       202

    accuracy                           0.74       300
   macro avg       0.71      0.67      0.68       300
weighted avg       0.73      0.74      0.73       300



In [19]:
from catboost import CatBoostClassifier

In [20]:
clf = CatBoostClassifier()
clf = clf.fit(train[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]], 
              train["Worthiness"])
predictions = clf.predict(test[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]])
print(classification_report(test["Worthiness"], predictions, target_names=['Good', 'Bad']))

Learning rate set to 0.015146
0:	learn: 0.6863760	total: 58.2ms	remaining: 58.1s
1:	learn: 0.6791506	total: 62.6ms	remaining: 31.2s
2:	learn: 0.6723016	total: 66.7ms	remaining: 22.2s
3:	learn: 0.6662854	total: 70.7ms	remaining: 17.6s
4:	learn: 0.6599095	total: 73.2ms	remaining: 14.6s
5:	learn: 0.6545430	total: 75.5ms	remaining: 12.5s
6:	learn: 0.6489821	total: 77.8ms	remaining: 11s
7:	learn: 0.6437929	total: 79.8ms	remaining: 9.9s
8:	learn: 0.6400447	total: 81.9ms	remaining: 9.02s
9:	learn: 0.6350034	total: 84ms	remaining: 8.31s
10:	learn: 0.6296253	total: 86.1ms	remaining: 7.74s
11:	learn: 0.6245213	total: 88.2ms	remaining: 7.26s
12:	learn: 0.6195474	total: 90.5ms	remaining: 6.87s
13:	learn: 0.6151812	total: 92.6ms	remaining: 6.52s
14:	learn: 0.6099560	total: 94.7ms	remaining: 6.22s
15:	learn: 0.6046086	total: 96.7ms	remaining: 5.95s
16:	learn: 0.5997522	total: 98.9ms	remaining: 5.72s
17:	learn: 0.5968263	total: 99.9ms	remaining: 5.45s
18:	learn: 0.5932693	total: 102ms	remaining: 5.25

224:	learn: 0.3443642	total: 502ms	remaining: 1.73s
225:	learn: 0.3437848	total: 504ms	remaining: 1.73s
226:	learn: 0.3431193	total: 506ms	remaining: 1.72s
227:	learn: 0.3426197	total: 508ms	remaining: 1.72s
228:	learn: 0.3419380	total: 510ms	remaining: 1.72s
229:	learn: 0.3410954	total: 513ms	remaining: 1.72s
230:	learn: 0.3404746	total: 515ms	remaining: 1.71s
231:	learn: 0.3400333	total: 517ms	remaining: 1.71s
232:	learn: 0.3392406	total: 520ms	remaining: 1.71s
233:	learn: 0.3387301	total: 521ms	remaining: 1.71s
234:	learn: 0.3379691	total: 523ms	remaining: 1.7s
235:	learn: 0.3370874	total: 525ms	remaining: 1.7s
236:	learn: 0.3363907	total: 527ms	remaining: 1.7s
237:	learn: 0.3357382	total: 529ms	remaining: 1.69s
238:	learn: 0.3351335	total: 531ms	remaining: 1.69s
239:	learn: 0.3345334	total: 532ms	remaining: 1.69s
240:	learn: 0.3341292	total: 534ms	remaining: 1.68s
241:	learn: 0.3335944	total: 536ms	remaining: 1.68s
242:	learn: 0.3329852	total: 538ms	remaining: 1.67s
243:	learn: 0.3

408:	learn: 0.2545017	total: 837ms	remaining: 1.21s
409:	learn: 0.2542546	total: 839ms	remaining: 1.21s
410:	learn: 0.2537580	total: 841ms	remaining: 1.2s
411:	learn: 0.2535180	total: 843ms	remaining: 1.2s
412:	learn: 0.2530843	total: 844ms	remaining: 1.2s
413:	learn: 0.2527845	total: 846ms	remaining: 1.2s
414:	learn: 0.2525002	total: 848ms	remaining: 1.2s
415:	learn: 0.2521126	total: 851ms	remaining: 1.19s
416:	learn: 0.2516800	total: 853ms	remaining: 1.19s
417:	learn: 0.2515004	total: 855ms	remaining: 1.19s
418:	learn: 0.2512288	total: 857ms	remaining: 1.19s
419:	learn: 0.2507201	total: 859ms	remaining: 1.19s
420:	learn: 0.2502841	total: 861ms	remaining: 1.18s
421:	learn: 0.2500462	total: 863ms	remaining: 1.18s
422:	learn: 0.2497162	total: 865ms	remaining: 1.18s
423:	learn: 0.2492223	total: 867ms	remaining: 1.18s
424:	learn: 0.2484737	total: 868ms	remaining: 1.17s
425:	learn: 0.2481751	total: 870ms	remaining: 1.17s
426:	learn: 0.2479009	total: 872ms	remaining: 1.17s
427:	learn: 0.247

568:	learn: 0.1965086	total: 1.16s	remaining: 882ms
569:	learn: 0.1961368	total: 1.17s	remaining: 880ms
570:	learn: 0.1956579	total: 1.17s	remaining: 878ms
571:	learn: 0.1952777	total: 1.17s	remaining: 875ms
572:	learn: 0.1947679	total: 1.17s	remaining: 873ms
573:	learn: 0.1945041	total: 1.17s	remaining: 871ms
574:	learn: 0.1943048	total: 1.18s	remaining: 869ms
575:	learn: 0.1941545	total: 1.18s	remaining: 868ms
576:	learn: 0.1941265	total: 1.18s	remaining: 868ms
577:	learn: 0.1937223	total: 1.19s	remaining: 866ms
578:	learn: 0.1934347	total: 1.19s	remaining: 864ms
579:	learn: 0.1931178	total: 1.19s	remaining: 862ms
580:	learn: 0.1928394	total: 1.19s	remaining: 859ms
581:	learn: 0.1924840	total: 1.19s	remaining: 857ms
582:	learn: 0.1921644	total: 1.2s	remaining: 855ms
583:	learn: 0.1918168	total: 1.2s	remaining: 853ms
584:	learn: 0.1914927	total: 1.2s	remaining: 851ms
585:	learn: 0.1909940	total: 1.2s	remaining: 849ms
586:	learn: 0.1907140	total: 1.2s	remaining: 846ms
587:	learn: 0.190

803:	learn: 0.1340903	total: 1.66s	remaining: 405ms
804:	learn: 0.1339655	total: 1.66s	remaining: 402ms
805:	learn: 0.1335995	total: 1.66s	remaining: 400ms
806:	learn: 0.1334288	total: 1.66s	remaining: 398ms
807:	learn: 0.1332243	total: 1.67s	remaining: 396ms
808:	learn: 0.1330421	total: 1.67s	remaining: 394ms
809:	learn: 0.1327948	total: 1.67s	remaining: 392ms
810:	learn: 0.1325642	total: 1.67s	remaining: 390ms
811:	learn: 0.1323312	total: 1.68s	remaining: 388ms
812:	learn: 0.1320945	total: 1.68s	remaining: 386ms
813:	learn: 0.1319360	total: 1.68s	remaining: 384ms
814:	learn: 0.1318082	total: 1.68s	remaining: 382ms
815:	learn: 0.1316190	total: 1.68s	remaining: 380ms
816:	learn: 0.1314540	total: 1.69s	remaining: 378ms
817:	learn: 0.1312794	total: 1.69s	remaining: 375ms
818:	learn: 0.1310724	total: 1.69s	remaining: 373ms
819:	learn: 0.1309162	total: 1.69s	remaining: 371ms
820:	learn: 0.1306898	total: 1.69s	remaining: 369ms
821:	learn: 0.1303669	total: 1.69s	remaining: 367ms
822:	learn: 

963:	learn: 0.1056212	total: 1.99s	remaining: 74.1ms
964:	learn: 0.1055109	total: 1.99s	remaining: 72.1ms
965:	learn: 0.1053844	total: 1.99s	remaining: 70ms
966:	learn: 0.1052786	total: 1.99s	remaining: 68ms
967:	learn: 0.1050943	total: 1.99s	remaining: 65.9ms
968:	learn: 0.1048992	total: 2s	remaining: 63.8ms
969:	learn: 0.1047560	total: 2s	remaining: 61.8ms
970:	learn: 0.1045288	total: 2s	remaining: 59.7ms
971:	learn: 0.1043062	total: 2s	remaining: 57.7ms
972:	learn: 0.1041356	total: 2s	remaining: 55.6ms
973:	learn: 0.1039433	total: 2s	remaining: 53.5ms
974:	learn: 0.1037303	total: 2.01s	remaining: 51.5ms
975:	learn: 0.1036411	total: 2.01s	remaining: 49.4ms
976:	learn: 0.1035377	total: 2.01s	remaining: 47.3ms
977:	learn: 0.1034286	total: 2.01s	remaining: 45.3ms
978:	learn: 0.1033281	total: 2.02s	remaining: 43.2ms
979:	learn: 0.1032149	total: 2.02s	remaining: 41.3ms
980:	learn: 0.1030897	total: 2.02s	remaining: 39.2ms
981:	learn: 0.1029245	total: 2.03s	remaining: 37.1ms
982:	learn: 0.1

In [21]:
from lightgbm import LGBMClassifier

In [22]:
clf = LGBMClassifier()
clf = clf.fit(train[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]], 
              train["Worthiness"])
predictions = clf.predict(test[["Duration", "Amount", "InstallmentRatePercentage", 
                     "ResidenceDuration", "Age", "NumberExistingCredits", 
                     "NumberPeopleMaintenance", "Telephone", "ForeignWorker", "CheckingAccountStatus.lt.0",
                     "CheckingAccountStatus.0.to.200", "CheckingAccountStatus.gt.200", "CheckingAccountStatus.none", 
                     "CreditHistory.NoCredit.AllPaid", "CreditHistory.ThisBank.AllPaid", "CreditHistory.PaidDuly", 
                     "CreditHistory.Delay", "CreditHistory.Critical", "Purpose.NewCar", "Purpose.UsedCar",
                     "Purpose.Furniture.Equipment", "Purpose.Radio.Television", "Purpose.DomesticAppliance", 
                     "Purpose.Repairs", "Purpose.Education", "Purpose.Vacation", 
                     "Purpose.Retraining", "Purpose.Business", "Purpose.Other", "SavingsAccountBonds.lt.100",
                     "SavingsAccountBonds.100.to.500", "SavingsAccountBonds.500.to.1000", "SavingsAccountBonds.gt.1000", 
                     "SavingsAccountBonds.Unknown", "EmploymentDuration.lt.1", "EmploymentDuration.1.to.4", 
                     "EmploymentDuration.4.to.7", "EmploymentDuration.gt.7", "EmploymentDuration.Unemployed", "Personal.Male.Divorced.Seperated",
                     "Personal.Female.NotSingle", "Personal.Male.Single", "Personal.Male.Married.Widowed", 
                     "Personal.Female.Single", "OtherDebtorsGuarantors.None", "OtherDebtorsGuarantors.CoApplicant", 
                     "OtherDebtorsGuarantors.Guarantor", "Property.RealEstate", "Property.Insurance", "Property.CarOther",
                     "Property.Unknown", "OtherInstallmentPlans.Bank", "OtherInstallmentPlans.Stores", 
                     "OtherInstallmentPlans.None", "Housing.Rent", "Housing.Own", 
                     "Housing.ForFree", "Job.UnemployedUnskilled", "Job.UnskilledResident", "Job.SkilledEmployee",
                     "Job.Management.SelfEmp.HighlyQualified"]])
print(classification_report(test["Worthiness"], predictions, target_names=['Good', 'Bad']))

              precision    recall  f1-score   support

        Good       0.62      0.49      0.55        98
         Bad       0.78      0.86      0.81       202

    accuracy                           0.74       300
   macro avg       0.70      0.67      0.68       300
weighted avg       0.73      0.74      0.73       300

