In [10]:
import random
import pandas as pd
import numpy as np

current_year = 2025

# Define value ranges
employment_statuses = ['employed', 'unemployed']
categories = ['daily', 'weekly', 'monthly']
repayment_ratings = ['Excellent', 'Better', 'Good', 'Bad', 'Poor']
ages = list(np.random.randint(1, 100, size=100))  # 50 random ages between 1 and 100

total_saving_min = 1
total_saving_max = 10_000_000
guardian_options = [True, False]

def get_total_saving_times(category, times_per_period):
    if category == 'daily':
        return times_per_period * 365
    elif category == 'weekly':
        return times_per_period * 52
    elif category == 'monthly':
        return times_per_period * 12

def get_repayment_factor(rating):
    return {
        'Excellent': 1.0,
        'Better': 0.8,
        'Good': 0.6,
        'Bad': 0.4,
        'Poor': 0.2
    }[rating]

def calculate_loan_features(row):
    if row['TotalCurrentSaving'] <= 0 or row['UserSavingsMade'] <= 0 or row['CompletedSavingCycles'] <= 0:
        return {
            'SavingStatus': 'Poor',
            'SavingRatingFactor': 0.2,
            'SavingRatio': 0.0,
            'ExpectedLoan': 0.0,
            'BoostAmount': 0.0,
            'AllowedLoan': 0.0,
        }

    avg_saving = row['TotalCurrentSaving'] / max(row['UserSavingsMade'], 1)
    saving_ratio = row['UserSavingsMade'] / max(row['CompletedSavingCycles'], 1)

    remaining_cycles = row['TotalSavingCycles'] - row['CompletedSavingCycles']
    expected_additional_saves = remaining_cycles * saving_ratio
    expected_contribution_cycles = row['UserSavingsMade'] + expected_additional_saves
    expected_total_saving = avg_saving * expected_contribution_cycles

    # Determine saving rating
    if saving_ratio == 1.0:
        rating_factor = 1.0
        user_rating = "Excellent"
    elif saving_ratio >= 0.8:
        rating_factor = 0.8
        user_rating = "Better"
    elif saving_ratio >= 0.6:
        rating_factor = 0.6
        user_rating = "Good"
    elif saving_ratio >= 0.4:
        rating_factor = 0.4
        user_rating = "Bad"
    else:
        rating_factor = 0.2
        user_rating = "Poor"

    expected_loan = expected_total_saving * rating_factor

    remain_times_rating = remaining_cycles / max(row['TotalSavingCycles'], 1)
    repayment_factor = get_repayment_factor(row['RecentLoanPaymentStatus'])

    # ✅ NEW: membership_duration_rating based on years
    ikimina_life_span = current_year - row['IkiminaCreatedYear']
    member_duration = current_year - row['UserJoinedYear']
    if ikimina_life_span <= 0:
        membership_duration_rating = 1.0
    else:
        membership_duration_rating = min(member_duration / ikimina_life_span, 1.0)

    guardian_factor = 0.5 if row['HasGuardian'] else 1.0

    # Final score (age/employment removed, weights redistributed)
    total_score = (
        (remain_times_rating * 0.4444) +
        (repayment_factor * 0.3333) +
        (membership_duration_rating * 0.1667) +
        (guardian_factor * 0.0556)
    )

    max_boost = expected_loan * 0.50
    boost_amount = max_boost * total_score
    allowed_loan = expected_loan + boost_amount

    allowed_loan = min(allowed_loan, row['TotalCurrentSaving'] * 2)
    allowed_loan = max(allowed_loan, 0)

    return {
        'SavingStatus': user_rating,
        'SavingRatingFactor': rating_factor,
        'SavingRatio': round(saving_ratio, 2),
        'ExpectedLoan': round(expected_loan, 2),
        'BoostAmount': round(boost_amount, 2),
        'AllowedLoan': round(allowed_loan, 2),
    }


def generate_all_combinations():
    data = []
    member_id = 1

    for category in categories:
        if category == 'daily':
            times = 1
            total_cycles = get_total_saving_times(category, times)

            for repayment in repayment_ratings:
                for age in ages:
                    for guardian in guardian_options:
                        for employment in employment_statuses:
                            if random.random() < 0.05:
                                total_saving = 0
                                completed = 0
                                savings_made = 0
                            else:
                                total_saving = random.randint(total_saving_min, total_saving_max)
                                completed = int(total_cycles * random.uniform(0.1, 1.0))
                                savings_made = int(completed * random.uniform(0.1, 1.0))

                            joined = random.randint(2016, 2024)
                            created = random.randint(2015, joined)

                            row = {
                                'MemberID': member_id,
                                'SavingFrequency': category,
                                'SavingTimesPerPeriod': times,
                                'TotalSavingCycles': total_cycles,
                                'CompletedSavingCycles': completed,
                                'UserSavingsMade': savings_made,
                                'TotalCurrentSaving': total_saving,
                                'IkiminaCreatedYear': created,
                                'UserJoinedYear': joined,
                                'EmploymentStatus': employment,
                                'HasGuardian': guardian,
                                'Age': age,
                                'RecentLoanPaymentStatus': repayment
                            }

                            features = calculate_loan_features(row)
                            row.update(features)
                            data.append(row)
                            member_id += 1

        elif category == 'weekly':
            times_values = list(range(1, 5))  # 4 values

            for times in times_values:
                total_cycles = get_total_saving_times(category, times)

                for repayment in repayment_ratings:
                    for age in ages:
                        for guardian in guardian_options:
                            for employment in employment_statuses:
                                if random.random() < 0.05:
                                    total_saving = 0
                                    completed = 0
                                    savings_made = 0
                                else:
                                    total_saving = random.randint(total_saving_min, total_saving_max)
                                    completed = int(total_cycles * random.uniform(0.1, 1.0))
                                    savings_made = int(completed * random.uniform(0.1, 1.0))

                                joined = random.randint(2016, 2024)
                                created = random.randint(2015, joined)

                                row = {
                                    'MemberID': member_id,
                                    'SavingFrequency': category,
                                    'SavingTimesPerPeriod': times,
                                    'TotalSavingCycles': total_cycles,
                                    'CompletedSavingCycles': completed,
                                    'UserSavingsMade': savings_made,
                                    'TotalCurrentSaving': total_saving,
                                    'IkiminaCreatedYear': created,
                                    'UserJoinedYear': joined,
                                    'EmploymentStatus': employment,
                                    'HasGuardian': guardian,
                                    'Age': age,
                                    'RecentLoanPaymentStatus': repayment
                                }

                                features = calculate_loan_features(row)
                                row.update(features)
                                data.append(row)
                                member_id += 1

        elif category == 'monthly':
            times_values = list(range(1, 16))  # 15 values

            for times in times_values:
                total_cycles = get_total_saving_times(category, times)

                for repayment in repayment_ratings:
                    for age in ages:
                        for guardian in guardian_options:
                            for employment in employment_statuses:
                                if random.random() < 0.05:
                                    total_saving = 0
                                    completed = 0
                                    savings_made = 0
                                else:
                                    total_saving = random.randint(total_saving_min, total_saving_max)
                                    completed = int(total_cycles * random.uniform(0.1, 1.0))
                                    savings_made = int(completed * random.uniform(0.1, 1.0))

                                joined = random.randint(2016, 2024)
                                created = random.randint(2015, joined)

                                row = {
                                    'MemberID': member_id,
                                    'SavingFrequency': category,
                                    'SavingTimesPerPeriod': times,
                                    'TotalSavingCycles': total_cycles,
                                    'CompletedSavingCycles': completed,
                                    'UserSavingsMade': savings_made,
                                    'TotalCurrentSaving': total_saving,
                                    'IkiminaCreatedYear': created,
                                    'UserJoinedYear': joined,
                                    'EmploymentStatus': employment,
                                    'HasGuardian': guardian,
                                    'Age': age,
                                    'RecentLoanPaymentStatus': repayment
                                }

                                features = calculate_loan_features(row)
                                row.update(features)
                                data.append(row)
                                member_id += 1

    return pd.DataFrame(data)

# Generate dataset
df = generate_all_combinations()
df.to_csv("ikimina_loan_Dataset3.csv", index=False)
print(f"✅ Dataset saved with {len(df)} rows")
print(df.head(10))


✅ Dataset saved with 40000 rows
   MemberID SavingFrequency  SavingTimesPerPeriod  TotalSavingCycles  \
0         1           daily                     1                365   
1         2           daily                     1                365   
2         3           daily                     1                365   
3         4           daily                     1                365   
4         5           daily                     1                365   
5         6           daily                     1                365   
6         7           daily                     1                365   
7         8           daily                     1                365   
8         9           daily                     1                365   
9        10           daily                     1                365   

   CompletedSavingCycles  UserSavingsMade  TotalCurrentSaving  \
0                    348               40             8945993   
1                    106               21    