#### Catboost model

In [3]:
import pandas as pd
catboost_data = pd.read_csv("output_file.csv")
catboost_data.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,Loan_purpose,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,Personal,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,Personal,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,Business,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,Personal,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,Business,no


In [20]:
#renaming columns for easier understanding
catboost_data.rename(columns={'LP': 'loan_purpose'}, inplace=True)

# Renaming loan purpose acc balance and loan status columns columns
catboost_data.rename(columns={
    'LP': 'Loan_Purpose',
    'y': 'Loan_status',
    'balance':'acc_balance'
}, inplace=True)
# loan access = account balance multiply by 10
catboost_data['Loan_access'] = catboost_data['acc_balance'] * 10

#convert loan access column to float
# create a new column repayment amount which is the amount a customer will return after 
#borrowing a loan
catboost_data['Loan_access'] = catboost_data['Loan_access'].astype(float)
catboost_data['repayment_amount'] = catboost_data['Loan_access'] * 1.12

column_order = [
    'age', 'job', 'marital', 'education', 'default', 'acc_balance', 'housing', 
    'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays', 
    'previous', 'poutcome', 'Loan_purpose', 'Loan_access','repayment_amount','Loan_status'  # Loan status moved to the end
]

# Reorder the columns in the DataFrame
catboost_data = catboost_data[column_order]

model_df = catboost_data.drop(['day', 'duration', 'campaign', 'pdays','previous', 'contact','month', 'poutcome'], axis=1)


In [21]:
model_df.head()

Unnamed: 0,age,job,marital,education,default,acc_balance,housing,loan,Loan_purpose,Loan_access,repayment_amount,Loan_status
0,30,unemployed,married,primary,no,1787,no,no,Personal,17870.0,20014.4,no
1,33,services,married,secondary,no,4789,yes,yes,Personal,47890.0,53636.8,no
2,35,management,single,tertiary,no,1350,yes,no,Business,13500.0,15120.0,no
3,30,management,married,tertiary,no,1476,yes,yes,Personal,14760.0,16531.2,no
4,59,blue-collar,married,secondary,no,0,yes,no,Business,0.0,0.0,no


In [34]:
c_boost = model_df[['age','job','marital','education', 'default', 'acc_balance', 'housing', 'loan','Loan_access','repayment_amount','Loan_status']]
c_boost.head(5)

Unnamed: 0,age,job,marital,education,default,acc_balance,housing,loan,Loan_access,repayment_amount,Loan_status
0,30,unemployed,married,primary,no,1787,no,no,17870.0,20014.4,no
1,33,services,married,secondary,no,4789,yes,yes,47890.0,53636.8,no
2,35,management,single,tertiary,no,1350,yes,no,13500.0,15120.0,no
3,30,management,married,tertiary,no,1476,yes,yes,14760.0,16531.2,no
4,59,blue-collar,married,secondary,no,0,yes,no,0.0,0.0,no


In [46]:
from catboost import CatBoostClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

new_boost = c_boost
# Load and prepare your data
X = new_boost.drop('Loan_status', axis=1)
y = new_boost['Loan_status']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Assuming you know which columns are categorical (if any)
categorical_features_indices = [i for i, col in enumerate(X.columns) if X[col].dtype == 'object']

# Initialize CatBoost Classifier
model_catboost = CatBoostClassifier(
    iterations=100,
    learning_rate=0.1,
    depth=6,
    cat_features=categorical_features_indices,
    verbose=10,
    auto_class_weights='Balanced'
)

# Train the model
model_catboost.fit(X_train, y_train, eval_set=(X_test, y_test))

# Predict the training set and test set
y_train_pred = model_catboost.predict(X_train)
y_test_pred = model_catboost.predict(X_test)

# Calculate and print training and testing accuracies
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Training Accuracy: {train_accuracy*100:.2f}%")
print(f"Testing Accuracy: {test_accuracy*100:.2f}%")


0:	learn: 0.6880266	test: 0.6908433	best: 0.6908433 (0)	total: 58ms	remaining: 5.75s
10:	learn: 0.6529055	test: 0.6710635	best: 0.6710635 (10)	total: 312ms	remaining: 2.53s
20:	learn: 0.6397924	test: 0.6684595	best: 0.6682445 (18)	total: 605ms	remaining: 2.27s
30:	learn: 0.6268118	test: 0.6643286	best: 0.6643286 (30)	total: 1.36s	remaining: 3.02s
40:	learn: 0.6198378	test: 0.6636199	best: 0.6627287 (35)	total: 1.91s	remaining: 2.75s
50:	learn: 0.6147606	test: 0.6619870	best: 0.6619870 (50)	total: 2.12s	remaining: 2.03s
60:	learn: 0.6068178	test: 0.6626966	best: 0.6617586 (55)	total: 2.37s	remaining: 1.52s
70:	learn: 0.6024347	test: 0.6651355	best: 0.6617586 (55)	total: 2.6s	remaining: 1.06s
80:	learn: 0.5964414	test: 0.6683342	best: 0.6617586 (55)	total: 2.85s	remaining: 669ms
90:	learn: 0.5903443	test: 0.6688999	best: 0.6617586 (55)	total: 3.09s	remaining: 306ms
99:	learn: 0.5832356	test: 0.6681596	best: 0.6617586 (55)	total: 3.33s	remaining: 0us

bestTest = 0.661758639
bestIteration 

In [43]:
def get_user_response(prompt, expected_type='string'):
    """Prompt user for input and return the appropriate type based on expected_type."""
    while True:
        response = input(prompt + ": ")
        if expected_type == 'float':
            try:
                return float(response)
            except ValueError:
                print("Invalid input. Please enter a valid number.")
        elif expected_type == 'string':
            return response.strip()  # Use strip to remove any leading/trailing spaces
        else:
            return response

def collect_user_details():
    print("Hi! How may I help you?")
    service_request = get_user_response("Please type 'check loan status' to proceed or 'exit' to quit")

    if service_request.lower() == "check loan status":
        new_user = {
            'age': get_user_response("What is your age", 'float'),
            'job': get_user_response("What is your current job title"),
            'marital': get_user_response("What is your marital status"),
            'education': get_user_response("May I know your education"),
            'default': get_user_response("Have you defaulted any loan"),
            'acc_balance': get_user_response("Account balance", 'float'),
            'housing': get_user_response("Do you have a housing loan"),
            'loan': get_user_response("Do you have a loan currently"),
            'loan_purpose': get_user_response("Is this a Personal Loan or Business Loan?")
        }

        # Perform calculations
        new_user['Loan_access'] = 10 * new_user['acc_balance']
        new_user['repayment_amount'] = 1.12 * new_user['Loan_access']

        # Numerical value based on loan purpose, safely converting to lowercase
        loan_purpose_value = 1 if new_user['loan_purpose'].lower() == "personal" else 2 if new_user['loan_purpose'].lower() == "business" else 0
        new_user['loan_purpose_value'] = loan_purpose_value

        # Print an overview of the details entered by the client
        print("\nPlease review your provided details:")
        for key, value in new_user.items():
            print(f"{key}: {value}")

        return new_user
    else:
        print("Thank you for your time. Goodbye!")
        return None

# Example of how you might use the collect_user_details function
new_user = collect_user_details()
if new_user:
    # Assuming some model and DataFrame conversion here for the sake of example
    # Predict and provide further interactions based on the new_user details
    pass


Hi! How may I help you?


Please type 'check loan status' to proceed or 'exit' to quit:  check loan status
What is your age:  34
What is your current job title:  management
What is your marital status:  single
May I know your education:  secondary
Have you defaulted any loan:  no
Account balance:  2300
Do you have a housing loan:  noo
Do you have a loan currently:  no
Is this a Personal Loan or Business Loan?:  0



Please review your provided details:
age: 34.0
job: management
marital: single
education: secondary
default: no
acc_balance: 2300.0
housing: noo
loan: no
loan_purpose: 0
Loan_access: 23000.0
repayment_amount: 25760.000000000004
loan_purpose_value: 0


In [47]:
loan_status_prediction = model_catboost.predict(new_user_df)[0] 
print(f"Your current Loan Status is {loan_status_prediction}.")
print("Incase of further clarifications send and email to loanprohphets@co.le")

if loan_status_prediction == 'yes':
    print("Based on your current information,You qualify to proceed for a Loan request.")
    
    # Asking user for more information
    more_info = input("Do you need more Loan information? (yes/no) ")
    
    if more_info.lower() == 'yes':
        print("Your Loan requested earns an interest of 12% per annum. To get your Loan application form, send an email to loanprophets@co.le.")
else:
    print("Sorry, you do not qualify to proceed for a Loan application.Thank you for your time.Try again next time")

Your current Loan Status is yes.
Incase of further clarifications send and email to loanprohphets@co.le
Based on your current information,You qualify to proceed for a Loan request.


Do you need more Loan information? (yes/no)  yes


Your Loan requested earns an interest of 12% per annum. To get your Loan application form, send an email to loanprophets@co.le.


In [38]:
#save the model
model_catboost.save_model('final_model.cbm', format='cbm')