In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Generate sample data for 100 entries
np.random.seed(42)  # For reproducibility

# Generate IDs (20 unique clients, each with 5 months of data)
ids = np.repeat(np.arange(1, 21), 5)

# Generate months
months = pd.date_range(start='2023-01', periods=5, freq='ME').strftime('%Y-%m').tolist() * 20

# Generate locations
locations = np.random.choice(['urban', 'suburban', 'rural'], size=100)

# Generate active status
active_statuses = np.random.choice(['active', 'inactive'], size=100)

# Generate MFIs
mfis = np.random.choice(['MFI_A', 'MFI_B', 'MFI_C'], size=100)


In [3]:

# Assuming these weights and penalties are defined
active_status_weight = 10
location_change_penalty = 5
mfi_penalty = 3

# Calculate credit score function
def calculate_credit_score(group):
    score = 0
    active_months = group['active_status'].sum()
    
    # Rule: The longer the active status, the better.
    active_status_score = active_months * active_status_weight
    score += active_status_score
    
    # Rule: Location changes in one MFI more than 2 times in a year.
    location_change_penalty_score = 0
    for mfi, mfi_group in group.groupby('MFI'):
        location_changes = (mfi_group['location'] != mfi_group['location'].shift()).sum() - 1
        if location_changes > 2:
            location_change_penalty_score += location_change_penalty
    
    score -= location_change_penalty_score
    
    # Rule: Borrowing from multiple MFIs
    mfi_penalty_score = 0
    mfi_counts = group['MFI'].nunique()
    if mfi_counts > 1:
        mfi_penalty_score += (mfi_counts - 1) * mfi_penalty
    
    score -= mfi_penalty_score
    
    # Creating the score calculation detail string
    calculation_detail = f"({active_months}*{active_status_weight})+({0 if location_change_penalty_score == 0 else location_change_penalty_score // location_change_penalty}*{location_change_penalty})-({0 if mfi_penalty_score == 0 else (mfi_penalty_score // mfi_penalty)}*{mfi_penalty})"
    
    return pd.Series({'credit_score': score, 'score_calculation': calculation_detail})

# Example DataFrame
np.random.seed(42)
data_size = 100

df = pd.DataFrame({
    'id': np.random.randint(1, 21, data_size),
    'month': np.random.randint(1, 13, data_size),
    'active_status': np.random.choice([True, False], data_size),
    'location': np.random.choice(['Location1', 'Location2', 'Location3', 'Location4'], data_size),
    'MFI': np.random.choice(['MFI1', 'MFI2', 'MFI3', 'MFI4'], data_size)
})

# Group by 'id' and apply the calculate_credit_score function
df_grouped = df.groupby('id').apply(calculate_credit_score).reset_index()

print(df_grouped)


    id  credit_score   score_calculation
0    1            21  (3*10)+(0*5)-(3*3)
1    2             4  (1*10)+(0*5)-(2*3)
2    3            27  (3*10)+(0*5)-(1*3)
3    4            17  (2*10)+(0*5)-(1*3)
4    5             4  (1*10)+(0*5)-(2*3)
5    6             7  (1*10)+(0*5)-(1*3)
6    7            21  (3*10)+(0*5)-(3*3)
7    8            31  (4*10)+(0*5)-(3*3)
8    9            21  (3*10)+(0*5)-(3*3)
9   10            24  (3*10)+(0*5)-(2*3)
10  11            14  (2*10)+(0*5)-(2*3)
11  12            31  (4*10)+(0*5)-(3*3)
12  13             7  (1*10)+(0*5)-(1*3)
13  14            14  (2*10)+(0*5)-(2*3)
14  15            29  (4*10)+(1*5)-(2*3)
15  16            11  (2*10)+(0*5)-(3*3)
16  17            14  (2*10)+(0*5)-(2*3)
17  18            14  (2*10)+(0*5)-(2*3)
18  19            17  (2*10)+(0*5)-(1*3)
19  20            17  (2*10)+(0*5)-(1*3)


  df_grouped = df.groupby('id').apply(calculate_credit_score).reset_index()
