In [10]:
# Import necessary libraries
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
import joblib

# Load the datasets
credit_card_df = pd.read_csv('/content/Credit_card.csv')
daily_household_transactions_df = pd.read_csv('/content/Daily Household Transactions.csv')
loan_approval_df = pd.read_csv('/content/loan_approval_dataset.csv')  # Load the loan approval dataset

# Define productive and non-productive categories
productive_categories = [
    'Transportation', 'Education', 'Healthcare', 'Groceries', 'Investment',
    'Utilities', 'Rent', 'Insurance', 'Savings', 'Household',
    'Public Provident Fund', 'Life Insurance', 'Interest', 'Tax refund',
    'Fixed Deposit', 'Recurring Deposit'
]
non_productive_categories = [
    'Entertainment', 'Dining out', 'Subscription', 'Luxury', 'Gambling',
    'Alcohol', 'Tobacco', 'Cosmetics', 'Fashion', 'Leisure',
    'Festivals', 'Apparel', 'Gift', 'Social Life', 'Tourism',
    'Beauty', 'Grooming'
]

# Redefine the function to label transactions
def label_transaction(row):
    if row['Category'] in productive_categories:
        return 'Productive'
    elif row['Category'] in non_productive_categories:
        return 'Non-Productive'
    else:
        return 'Unknown'

# Apply the labeling function
daily_household_transactions_df['Label'] = daily_household_transactions_df.apply(label_transaction, axis=1)

# Drop rows with 'Unknown' label
daily_household_transactions_df = daily_household_transactions_df[daily_household_transactions_df['Label'] != 'Unknown']

# Assign synthetic Ind_ID to daily household transactions
np.random.seed(42)  # for reproducibility
num_unique_ids = credit_card_df['Ind_ID'].nunique()
synthetic_ids = np.random.choice(credit_card_df['Ind_ID'].unique(), len(daily_household_transactions_df))

daily_household_transactions_df['Ind_ID'] = synthetic_ids

# Aggregate transaction data by synthetic Ind_ID
agg_transactions_df = daily_household_transactions_df.groupby('Ind_ID').agg({
    'Amount': ['sum', 'mean', 'std'],
    'Label': lambda x: (x == 'Productive').sum() / len(x)  # ratio of productive transactions
}).reset_index()

# Flatten the column hierarchy
agg_transactions_df.columns = ['Ind_ID', 'Total_Amount', 'Mean_Amount', 'Std_Amount', 'Productive_Ratio']

# Merge aggregated transaction data with credit card data
merged_df = pd.merge(credit_card_df, agg_transactions_df, on='Ind_ID', how='inner')

# Assuming there is no direct match to merge with the loan approval dataset, we will not merge but handle them separately

# Feature Engineering: Handle missing values and encode categorical variables

# Fill missing values for numerical columns with their mean
numerical_cols = ['Annual_income', 'Birthday_count', 'Std_Amount']
for col in numerical_cols:
    merged_df[col].fillna(merged_df[col].mean(), inplace=True)

# Fill missing values for categorical columns with the mode
categorical_cols = ['GENDER', 'Type_Occupation']
for col in categorical_cols:
    merged_df[col].fillna(merged_df[col].mode()[0], inplace=True)

# Encode categorical variables using one-hot encoding
merged_df = pd.get_dummies(merged_df, columns=['GENDER', 'Car_Owner', 'Propert_Owner', 'Type_Income', 'EDUCATION',
                                               'Marital_status', 'Housing_type', 'Type_Occupation'], drop_first=True)

# Select features and target variable
features = merged_df.drop(columns=['Ind_ID'])
target = merged_df['Productive_Ratio'].apply(lambda x: 1 if x > 0.5 else 0)  # Binary target based on productive ratio

# Split the data into training and testing sets using stratified sampling
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, stratify=target, random_state=42)

# Function to evaluate models using cross-validation
def evaluate_model(model, X_train, y_train):
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    return np.mean(scores), np.std(scores)

# Initialize models
logistic_regression = LogisticRegression(max_iter=1000)
decision_tree = DecisionTreeClassifier(random_state=42)
random_forest = RandomForestClassifier(random_state=42)

# Evaluate models
logistic_regression_score = evaluate_model(logistic_regression, X_train, y_train)
decision_tree_score = evaluate_model(decision_tree, X_train, y_train)
random_forest_score = evaluate_model(random_forest, X_train, y_train)

# Print evaluation scores
print("Logistic Regression Score:", logistic_regression_score)
print("Decision Tree Score:", decision_tree_score)
print("Random Forest Score:", random_forest_score)

# Train the best performing model (Random Forest in this case)
best_model = random_forest
best_model.fit(X_train, y_train)

# Feature importance analysis
feature_importances = best_model.feature_importances_
features_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': feature_importances})
features_df = features_df.sort_values(by='Importance', ascending=False)

print(features_df.head(10))

# Function to classify financial status, loan eligibility, and suggest a dynamic step-by-step plan
def classify_financial_status_and_suggest_plan(model):
    input_data = {
        'Annual_income': annual_income.value,
        'Birthday_count': birthday_count.value,
        'Employed_days': employed_days.value,
        'Mobile_phone': int(mobile_phone.value),
        'Work_Phone': int(work_phone.value),
        'Phone': int(phone.value),
        'EMAIL_ID': int(email_id.value),
        'Family_Members': family_members.value,
        'Total_Amount': total_amount.value,
        'Mean_Amount': mean_amount.value,
        'Std_Amount': std_amount.value,
        'Productive_Ratio': productive_ratio.value,
        'GENDER_F': int(gender_f.value),
        'Car_Owner_Y': int(car_owner_y.value),
        'Propert_Owner_Y': int(propert_owner_y.value),
        'Type_Income_' + type_income.value: 1,
        'EDUCATION_' + education.value: 1,
        'Marital_status_' + marital_status.value: 1,
        'Housing_type_' + housing_type.value: 1,
        'Type_Occupation_' + type_occupation.value: 1
    }

    # Ensure all necessary columns are present
    missing_cols = set(features.columns) - set(input_data.keys())
    for col in missing_cols:
        input_data[col] = 0

    input_df = pd.DataFrame([input_data])

    # Reorder columns to match the training data
    input_df = input_df[features.columns]

    # Predict using the trained model
    prediction = model.predict(input_df)
    stability = 'Financially Stable' if prediction[0] == 1 else 'Not Financially Stable'

    # Determine loan eligibility and suggested loan amount
    cibil_score = cibil_score_input.value
    bank_assets_value = bank_assets_value_input.value
    loan_eligibility = 'Eligible' if stability == 'Financially Stable' and cibil_score > 650 else 'Not Eligible'
    suggested_loan_amount = 0
    if loan_eligibility == 'Eligible':
        suggested_loan_amount = (0.2 * annual_income.value + 0.5 * bank_assets_value) / 2

    # Generate a dynamic step-by-step financial plan
    steps = []
    if stability == 'Not Financially Stable':
        steps.append("1. **Reduce Non-Productive Expenses:** Focus on cutting down spending in non-essential categories.")
        if productive_ratio.value < 0.3:
            steps.append("2. **Increase Productive Spending:** Ensure essential needs like healthcare and education are prioritized.")
        if annual_income.value < 30000:
            steps.append("3. **Increase Income:** Consider strategies like upskilling, taking up a side job, or seeking a raise.")
        if bank_assets_value < 5000:
            steps.append("4. **Build Savings:** Start by setting aside a small portion of your income each month to build an emergency fund.")
        if cibil_score < 650:
            steps.append("5. **Improve Credit Score:** Pay off outstanding debts, avoid late payments, and reduce credit utilization.")
        steps.append("6. **Track and Monitor:** Regularly review your expenses and savings. Use budgeting tools or apps to keep track of your financial progress.")

    else:
        steps.append("1. **Maintain Financial Stability:** Continue with your current financial habits to maintain stability.")
        if cibil_score < 700:
            steps.append("2. **Improve Credit Score:** Even though you are financially stable, a higher credit score can provide better loan options. Consider reducing credit card balances and ensuring timely payments.")
        if total_amount.value > annual_income.value * 0.5:
                        steps.append("3. **Optimize Spending:** Your current expenses are over half of your income. Consider optimizing your spending to ensure more is directed towards savings and investments.")
        steps.append("4. **Invest for the Future:** Explore investment options like retirement accounts, mutual funds, or low-risk savings plans to grow your wealth.")
        steps.append("5. **Plan for Long-Term Goals:** Start planning for significant financial goals such as buying a house, funding education, or retirement.")

    # Convert the steps list into a readable format
    plan_text = "\n".join(steps)

    # Return the classification, loan eligibility, suggested loan amount, and the personalized plan
    return stability, loan_eligibility, suggested_loan_amount, plan_text

# Create input widgets
annual_income = widgets.FloatText(description='Annual Income:')
birthday_count = widgets.IntText(description='Birthday Count:')
employed_days = widgets.IntText(description='Employed Days:')
mobile_phone = widgets.Checkbox(description='Mobile Phone:')
work_phone = widgets.Checkbox(description='Work Phone:')
phone = widgets.Checkbox(description='Phone:')
email_id = widgets.Checkbox(description='Email ID:')
family_members = widgets.IntText(description='Family Members:')
total_amount = widgets.FloatText(description='Total Amount:')
mean_amount = widgets.FloatText(description='Mean Amount:')
std_amount = widgets.FloatText(description='Std Amount:')
productive_ratio = widgets.FloatText(description='Productive Ratio:')
gender_f = widgets.Checkbox(description='Female:')
car_owner_y = widgets.Checkbox(description='Car Owner:')
propert_owner_y = widgets.Checkbox(description='Property Owner:')
type_income = widgets.Dropdown(options=['Commercial associate', 'Pensioner', 'State servant', 'Student'], description='Type Income:')
education = widgets.Dropdown(options=['Higher education', 'Incomplete higher', 'Secondary / secondary special'], description='Education:')
marital_status = widgets.Dropdown(options=['Married', 'Single / not married', 'Widow / Widower'], description='Marital Status:')
housing_type = widgets.Dropdown(options=['House / apartment', 'Municipal apartment', 'Office apartment'], description='Housing Type:')
type_occupation = widgets.Dropdown(options=['Management', 'Laborers', 'Private service staff', 'Sales staff'], description='Type Occupation:')
cibil_score_input = widgets.IntText(description='CIBIL Score:')
bank_assets_value_input = widgets.FloatText(description='Bank Assets Value:')

# Display input widgets
display(annual_income, birthday_count, employed_days, mobile_phone, work_phone, phone, email_id, family_members,
        total_amount, mean_amount, std_amount, productive_ratio, gender_f, car_owner_y, propert_owner_y, type_income,
        education, marital_status, housing_type, type_occupation, cibil_score_input, bank_assets_value_input)

# Button to classify the financial status, loan eligibility, and suggest a dynamic step-by-step plan
classify_button = widgets.Button(description="Classify and Suggest Plan")

# Function to handle button click
def on_classify_button_clicked(b):
    stability, loan_eligibility, suggested_loan_amount, plan_text = classify_financial_status_and_suggest_plan(best_model)
    print(f"The financial status is classified as: {stability}")
    print(f"Loan Eligibility: {loan_eligibility}")
    if loan_eligibility == 'Eligible':
        print(f"Suggested Loan Amount: {suggested_loan_amount}")
    print("Personalized Financial Plan:")
    print(plan_text)

classify_button.on_click(on_classify_button_clicked)

# Display the button
display(classify_button)

# Save the model to a file
model_filename = '/content/model.pkl'
joblib.dump(best_model, model_filename)
print(f"Model saved to {model_filename}")



Logistic Regression Score: (0.8297544260422616, 0.004416605945265355)
Decision Tree Score: (1.0, 0.0)
Random Forest Score: (0.9980392156862745, 0.0039215686274509665)
             Feature  Importance
12  Productive_Ratio    0.635423
9       Total_Amount    0.052490
10       Mean_Amount    0.047919
3      Employed_days    0.032715
2     Birthday_count    0.032046
1      Annual_income    0.025530
11        Std_Amount    0.023457
8     Family_Members    0.012944
0           CHILDREN    0.010179
15   Propert_Owner_Y    0.008373


FloatText(value=0.0, description='Annual Income:')

IntText(value=0, description='Birthday Count:')

IntText(value=0, description='Employed Days:')

Checkbox(value=False, description='Mobile Phone:')

Checkbox(value=False, description='Work Phone:')

Checkbox(value=False, description='Phone:')

Checkbox(value=False, description='Email ID:')

IntText(value=0, description='Family Members:')

FloatText(value=0.0, description='Total Amount:')

FloatText(value=0.0, description='Mean Amount:')

FloatText(value=0.0, description='Std Amount:')

FloatText(value=0.0, description='Productive Ratio:')

Checkbox(value=False, description='Female:')

Checkbox(value=False, description='Car Owner:')

Checkbox(value=False, description='Property Owner:')

Dropdown(description='Type Income:', options=('Commercial associate', 'Pensioner', 'State servant', 'Student')…

Dropdown(description='Education:', options=('Higher education', 'Incomplete higher', 'Secondary / secondary sp…

Dropdown(description='Marital Status:', options=('Married', 'Single / not married', 'Widow / Widower'), value=…

Dropdown(description='Housing Type:', options=('House / apartment', 'Municipal apartment', 'Office apartment')…

Dropdown(description='Type Occupation:', options=('Management', 'Laborers', 'Private service staff', 'Sales st…

IntText(value=0, description='CIBIL Score:')

FloatText(value=0.0, description='Bank Assets Value:')

Button(description='Classify and Suggest Plan', style=ButtonStyle())

Model saved to /content/model.pkl
