# Libraries Installation

In [None]:
pip install tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
pip install pandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [688]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Dataset Pre-processing

In [792]:
data = pd.read_csv("german.data", header=None, delim_whitespace=True)

In [793]:
# add column names to the dataframe
column_names = ['status', 'duration', 'credit_history', 'purpose', 'credit_amount', 'savings_account',
                'employment_duration', 'installment_rate', 'status_sex', 'other_debtors', 'present_residence',
                'property', 'age', 'other_installment_plans', 'housing', 'number_credits', 'job', 'people_liable',
                'telephone', 'foreign_worker', 'class']
data.columns = column_names


In [794]:
# create a dictionary to map categorical values to numerical values
status_map = {'A11': 1, 'A12': 2, 'A13': 3, 'A14': 4}
credit_history_map = {'A30': 1, 'A31': 2, 'A32': 3, 'A33': 4, 'A34': 5}
purpose_map = {'A40': 1, 'A41': 2, 'A42': 3, 'A43': 4, 'A44': 5,
               'A45': 6, 'A46': 7, 'A48': 8, 'A49': 9, 'A410': 10}
savings_map = {'A61': 1, 'A62': 2, 'A63': 3, 'A64': 4, 'A65': 5}
employment_map = {'A71': 1, 'A72': 2, 'A73': 3, 'A74': 4, 'A75': 5}
status_sex_map = {'A91': 1, 'A92': 2, 'A93': 3, 'A94': 4, 'A95': 5}
other_debtors_map = {'A101': 1, 'A102': 2, 'A103': 3}
property_map = {'A121': 1, 'A122': 2, 'A123': 3, 'A124': 4}
other_installment_plans_map = {'A141': 1, 'A142': 2, 'A143': 3}
housing_map = {'A151': 1, 'A152': 2, 'A153': 3}
job_map = {'A171': 1, 'A172': 2, 'A173': 3, 'A174': 4}
telephone_map = {'A191': 0, 'A192': 1}
foreign_worker_map = {'A201': 0, 'A202': 1}

In [795]:
# replace categorical values with numerical values using the created dictionaries
data['status'] = data['status'].map(status_map)
data['credit_history'] = data['credit_history'].map(credit_history_map)
data['purpose'] = data['purpose'].map(purpose_map)
data['savings_account'] = data['savings_account'].map(savings_map)
data['employment_duration'] = data['employment_duration'].map(employment_map)
data['status_sex'] = data['status_sex'].map(status_sex_map)
data['other_debtors'] = data['other_debtors'].map(other_debtors_map)
data['property'] = data['property'].map(property_map)
data['other_installment_plans'] = data['other_installment_plans'].map(other_installment_plans_map)
data['housing'] = data['housing'].map(housing_map)
data['job'] = data['job'].map(job_map)
data['telephone'] = data['telephone'].map(telephone_map)
data['foreign_worker'] = data['foreign_worker'].map(foreign_worker_map)

In [796]:
data['class'] = data['class'].replace({1: 0, 2: 1})

In [797]:
data['class'] = data['class'].astype(int)

In [807]:
data.head()

Unnamed: 0,status,duration,credit_history,purpose,credit_amount,savings_account,employment_duration,installment_rate,status_sex,other_debtors,...,property,age,other_installment_plans,housing,number_credits,job,people_liable,telephone,foreign_worker,class
0,1,6,5,4,1169,5,5,4,3,1,...,1,67,3,2,2,3,1,1,0,0
1,2,48,3,4,5951,1,3,2,2,1,...,1,22,3,2,1,3,1,0,0,1
2,4,12,5,7,2096,1,4,2,3,1,...,1,49,3,2,1,2,2,0,0,0
3,1,42,3,3,7882,1,4,2,3,3,...,2,45,3,3,1,3,2,0,0,0
4,1,24,4,1,4870,1,3,3,3,1,...,4,53,3,3,2,3,2,0,0,1




In [799]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    data.drop('class', axis=1), data['class'], test_size=0.2)

In [800]:
# Standardize the input data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# XgBOOST

In [801]:
model = XGBClassifier(
    booster='gbtree',
    n_estimators=1000,
    max_depth=9,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    gamma=1,
    reg_alpha=0.1,
    reg_lambda=1,
    objective='binary:logistic',
    eval_metric='logloss',
    early_stopping_rounds=10
)

In [802]:
# Train the model
eval_set = [(X_test, y_test)]
model.fit(X_train, y_train, eval_set=eval_set, verbose=False)

# Performance Evaluation

In [803]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.3f}")
print(f"F1 Score: {f1:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")

Accuracy: 0.835
F1 Score: 0.718
Precision: 0.712
Recall: 0.724


# Save the model 

In [None]:
import pickle

# Save the model to disk
filename = 'xgboost_model.sav'
pickle.dump(model, open(filename, 'wb'))
