In [35]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
statlog_german_credit_data = fetch_ucirepo(id=144) 
  
# data (as pandas dataframes) 
X = statlog_german_credit_data.data.features 
y = statlog_german_credit_data.data.targets 
  
# metadata 
print(statlog_german_credit_data.metadata) 
  
# variable information 
print(statlog_german_credit_data.variables) 


{'uci_id': 144, 'name': 'Statlog (German Credit Data)', 'repository_url': 'https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data', 'data_url': 'https://archive.ics.uci.edu/static/public/144/data.csv', 'abstract': 'This dataset classifies people described by a set of attributes as good or bad credit risks. Comes in two formats (one all numeric). Also comes with a cost matrix', 'area': 'Social Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1000, 'num_features': 20, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Other', 'Marital Status', 'Age', 'Occupation'], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1994, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5NC77', 'creators': ['Hans Hofmann'], 'intro_paper': None, 'additional_info': {'summary': 'Two datasets are provided.  the original dataset, in the form provided by

In [36]:
X.columns

Index(['Attribute1', 'Attribute2', 'Attribute3', 'Attribute4', 'Attribute5',
       'Attribute6', 'Attribute7', 'Attribute8', 'Attribute9', 'Attribute10',
       'Attribute11', 'Attribute12', 'Attribute13', 'Attribute14',
       'Attribute15', 'Attribute16', 'Attribute17', 'Attribute18',
       'Attribute19', 'Attribute20'],
      dtype='object')

In [37]:
y

Unnamed: 0,class
0,1
1,2
2,1
3,1
4,2
...,...
995,1
996,1
997,1
998,2


In [38]:
column_names = [
    'Status_Checking_Account', 'Duration_Months', 'Credit_History', 'Purpose',
    'Credit_Amount', 'Savings_Account_Bonds', 'Employment_Since', 'Installment_Rate',
    'Personal_Status_Sex', 'Other_Debtors_Guarantors', 'Present_Residence_Since',
    'Property', 'Age', 'Other_Installment_Plans', 'Housing', 'Number_Credits',
    'Job', 'People_Liable', 'Telephone', 'Foreign_Worker'
]

X.columns = column_names

In [39]:
for col in X.columns:
    if X[col].dtype == 'object':
        print(f"{col}: {X[col].unique()}\n")


Status_Checking_Account: ['A11' 'A12' 'A14' 'A13']

Credit_History: ['A34' 'A32' 'A33' 'A30' 'A31']

Purpose: ['A43' 'A46' 'A42' 'A40' 'A41' 'A49' 'A44' 'A45' 'A410' 'A48']

Savings_Account_Bonds: ['A65' 'A61' 'A63' 'A64' 'A62']

Employment_Since: ['A75' 'A73' 'A74' 'A71' 'A72']

Personal_Status_Sex: ['A93' 'A92' 'A91' 'A94']

Other_Debtors_Guarantors: ['A101' 'A103' 'A102']

Property: ['A121' 'A122' 'A124' 'A123']

Other_Installment_Plans: ['A143' 'A141' 'A142']

Housing: ['A152' 'A153' 'A151']

Job: ['A173' 'A172' 'A174' 'A171']

Telephone: ['A192' 'A191']

Foreign_Worker: ['A201' 'A202']



In [40]:
# Status of existing checking account
status_checking_account_map = {
    'A11': '< 0 DM',
    'A12': '0 <= ... < 200 DM',
    'A13': '>= 200 DM / salary assignment',
    'A14': 'no checking account'
}

# Credit history
credit_history_map = {
    'A30': 'no credits taken/ all paid',
    'A31': 'all credits paid back duly',
    'A32': 'existing credits paid back duly till now',
    'A33': 'delay in paying off in the past',
    'A34': 'critical account/ other credits existing'
}

# Purpose
purpose_map = {
    'A40': 'car (new)',
    'A41': 'car (used)',
    'A42': 'furniture/equipment',
    'A43': 'radio/TV',
    'A44': 'domestic appliances',
    'A45': 'repairs',
    'A46': 'education',
    'A47': 'vacation',
    'A48': 'retraining',
    'A49': 'business',
    'A410': 'others'
}

# Savings account/bonds
savings_account_map = {
    'A61': '< 100 DM',
    'A62': '100 <= ... < 500 DM',
    'A63': '500 <= ... < 1000 DM',
    'A64': '>= 1000 DM',
    'A65': 'unknown/ no savings account'
}

# Present employment since
employment_since_map = {
    'A71': 'unemployed',
    'A72': '< 1 year',
    'A73': '1 <= ... < 4 years',
    'A74': '4 <= ... < 7 years',
    'A75': '>= 7 years'
}

# Personal status and sex
personal_status_sex_map = {
    'A91': 'male : divorced/separated',
    'A92': 'female : divorced/separated/married',
    'A93': 'male : single',
    'A94': 'male : married/widowed',
    'A95': 'female : single'
}

# Other debtors / guarantors
other_debtors_map = {
    'A101': 'none',
    'A102': 'co-applicant',
    'A103': 'guarantor'
}

# Property
property_map = {
    'A121': 'real estate',
    'A122': 'building society savings / life insurance',
    'A123': 'car or other',
    'A124': 'unknown / no property'
}

# Other installment plans
installment_plans_map = {
    'A141': 'bank',
    'A142': 'stores',
    'A143': 'none'
}

# Housing
housing_map = {
    'A151': 'rent',
    'A152': 'own',
    'A153': 'for free'
}

# Job
job_map = {
    'A171': 'unemployed/unskilled - non-resident',
    'A172': 'unskilled - resident',
    'A173': 'skilled employee/official',
    'A174': 'highly qualified/self-employed'
}

# Telephone
telephone_map = {
    'A191': 'none',
    'A192': 'yes, registered under the customer\'s name'
}

# Foreign worker
foreign_worker_map = {
    'A201': 'yes',
    'A202': 'no'
}


In [41]:
X.loc[:, 'Status_Checking_Account'] = X['Status_Checking_Account'].map(status_checking_account_map)
X.loc[:, 'Credit_History'] = X['Credit_History'].map(credit_history_map)
X.loc[:, 'Purpose'] = X['Purpose'].map(purpose_map)
X.loc[:, 'Savings_Account_Bonds'] = X['Savings_Account_Bonds'].map(savings_account_map)
X.loc[:, 'Employment_Since'] = X['Employment_Since'].map(employment_since_map)
X.loc[:, 'Personal_Status_Sex'] = X['Personal_Status_Sex'].map(personal_status_sex_map)
X.loc[:, 'Other_Debtors_Guarantors'] = X['Other_Debtors_Guarantors'].map(other_debtors_map)
X.loc[:, 'Property'] = X['Property'].map(property_map)
X.loc[:, 'Other_Installment_Plans'] = X['Other_Installment_Plans'].map(installment_plans_map)
X.loc[:, 'Housing'] = X['Housing'].map(housing_map)
X.loc[:, 'Job'] = X['Job'].map(job_map)
X.loc[:, 'Telephone'] = X['Telephone'].map(telephone_map)
X.loc[:, 'Foreign_Worker'] = X['Foreign_Worker'].map(foreign_worker_map)



In [42]:
X

Unnamed: 0,Status_Checking_Account,Duration_Months,Credit_History,Purpose,Credit_Amount,Savings_Account_Bonds,Employment_Since,Installment_Rate,Personal_Status_Sex,Other_Debtors_Guarantors,Present_Residence_Since,Property,Age,Other_Installment_Plans,Housing,Number_Credits,Job,People_Liable,Telephone,Foreign_Worker
0,< 0 DM,6,critical account/ other credits existing,radio/TV,1169,unknown/ no savings account,>= 7 years,4,male : single,none,4,real estate,67,none,own,2,skilled employee/official,1,"yes, registered under the customer's name",yes
1,0 <= ... < 200 DM,48,existing credits paid back duly till now,radio/TV,5951,< 100 DM,1 <= ... < 4 years,2,female : divorced/separated/married,none,2,real estate,22,none,own,1,skilled employee/official,1,none,yes
2,no checking account,12,critical account/ other credits existing,education,2096,< 100 DM,4 <= ... < 7 years,2,male : single,none,3,real estate,49,none,own,1,unskilled - resident,2,none,yes
3,< 0 DM,42,existing credits paid back duly till now,furniture/equipment,7882,< 100 DM,4 <= ... < 7 years,2,male : single,guarantor,4,building society savings / life insurance,45,none,for free,1,skilled employee/official,2,none,yes
4,< 0 DM,24,delay in paying off in the past,car (new),4870,< 100 DM,1 <= ... < 4 years,3,male : single,none,4,unknown / no property,53,none,for free,2,skilled employee/official,2,none,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,no checking account,12,existing credits paid back duly till now,furniture/equipment,1736,< 100 DM,4 <= ... < 7 years,3,female : divorced/separated/married,none,4,real estate,31,none,own,1,unskilled - resident,1,none,yes
996,< 0 DM,30,existing credits paid back duly till now,car (used),3857,< 100 DM,1 <= ... < 4 years,4,male : divorced/separated,none,4,building society savings / life insurance,40,none,own,1,highly qualified/self-employed,1,"yes, registered under the customer's name",yes
997,no checking account,12,existing credits paid back duly till now,radio/TV,804,< 100 DM,>= 7 years,4,male : single,none,4,car or other,38,none,own,1,skilled employee/official,1,none,yes
998,< 0 DM,45,existing credits paid back duly till now,radio/TV,1845,< 100 DM,1 <= ... < 4 years,4,male : single,none,4,unknown / no property,23,none,for free,1,skilled employee/official,1,"yes, registered under the customer's name",yes


In [43]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 20 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Status_Checking_Account   1000 non-null   object
 1   Duration_Months           1000 non-null   int64 
 2   Credit_History            1000 non-null   object
 3   Purpose                   1000 non-null   object
 4   Credit_Amount             1000 non-null   int64 
 5   Savings_Account_Bonds     1000 non-null   object
 6   Employment_Since          1000 non-null   object
 7   Installment_Rate          1000 non-null   int64 
 8   Personal_Status_Sex       1000 non-null   object
 9   Other_Debtors_Guarantors  1000 non-null   object
 10  Present_Residence_Since   1000 non-null   int64 
 11  Property                  1000 non-null   object
 12  Age                       1000 non-null   int64 
 13  Other_Installment_Plans   1000 non-null   object
 14  Housing                  

In [44]:
X.describe()

Unnamed: 0,Duration_Months,Credit_Amount,Installment_Rate,Present_Residence_Since,Age,Number_Credits,People_Liable
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,20.903,3271.258,2.973,2.845,35.546,1.407,1.155
std,12.058814,2822.736876,1.118715,1.103718,11.375469,0.577654,0.362086
min,4.0,250.0,1.0,1.0,19.0,1.0,1.0
25%,12.0,1365.5,2.0,2.0,27.0,1.0,1.0
50%,18.0,2319.5,3.0,3.0,33.0,1.0,1.0
75%,24.0,3972.25,4.0,4.0,42.0,2.0,1.0
max,72.0,18424.0,4.0,4.0,75.0,4.0,2.0
