In [124]:
import numpy as np
import pandas as pd
import skfuzzy as fuzz
from skfuzzy import control as ctrl

In [125]:
synthetic_data = pd.read_csv('labelled_data.csv')
synthetic_data.head()

Unnamed: 0,wealth - salary,wealth - investment gain,wealth - rental income,wealth - business revenue,wealth - inheritance,wealth - others,fund - salary,fund - investment gain,fund - rental income,fund - business revenue,...,Annual Income,Adjusted Annual Income,Income Multiplier Based on Age,Wealth Indicator,Net Worth,Proposed Subscription Amount,Net Worth / Total Commitment,Income / Total Commitment,Risk Score,Will Default
0,False,True,False,False,False,False,False,False,True,False,...,151128.0,151128.0,3.0,50000,503384.0,29000,17.358069,5.21131,187,False
1,False,True,True,False,False,False,True,False,False,False,...,119520.0,119520.0,4.0,100000,578080.0,29000,19.933793,4.121379,232,False
2,True,True,False,False,False,False,False,False,False,False,...,116431.92,116431.92,4.0,50000,515727.68,25000,20.629107,4.657277,243,False
3,False,True,False,False,False,False,False,False,False,True,...,130478.4,130478.4,3.5,50000,506674.4,20000,25.33372,6.52392,190,False
4,False,False,True,False,False,False,True,False,False,False,...,139432.8,139432.8,3.5,50000,538014.8,22000,24.455218,6.337855,197,False


In [126]:
synthetic_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 28 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   wealth - salary                 1000 non-null   bool   
 1   wealth - investment gain        1000 non-null   bool   
 2   wealth - rental income          1000 non-null   bool   
 3   wealth - business revenue       1000 non-null   bool   
 4   wealth - inheritance            1000 non-null   bool   
 5   wealth - others                 1000 non-null   bool   
 6   fund - salary                   1000 non-null   bool   
 7   fund - investment gain          1000 non-null   bool   
 8   fund - rental income            1000 non-null   bool   
 9   fund - business revenue         1000 non-null   bool   
 10  fund - inheritance              1000 non-null   bool   
 11  fund - others                   1000 non-null   bool   
 12  Age                             100

In [127]:
age = ctrl.Antecedent(np.arange(18, 81, 1), 'age')
income = ctrl.Antecedent(np.arange(0, 1000001, 1000), 'income')  # Income from 0 to 1M
net_worth = ctrl.Antecedent(np.arange(0, 10000001, 10000), 'net_worth')  # Net worth from 0 to 10M
risk = ctrl.Consequent(np.arange(0, 101, 1), 'risk')

In [128]:
age['young'] = fuzz.trimf(age.universe, [18, 18, 35])
age['middle_aged'] = fuzz.trimf(age.universe, [30, 45, 60])
age['old'] = fuzz.trimf(age.universe, [55, 80, 80])
age['missing'] = fuzz.trimf(age.universe, [0, 0, 0])  # For missing values

income['low'] = fuzz.trimf(income.universe, [0, 0, 200000])
income['medium'] = fuzz.trimf(income.universe, [100000, 500000, 900000])
income['high'] = fuzz.trimf(income.universe, [800000, 1000000, 1000000])
income['missing'] = fuzz.trimf(income.universe, [0, 0, 0])  # Missing data

net_worth['low'] = fuzz.trimf(net_worth.universe, [0, 0, 2000000])
net_worth['medium'] = fuzz.trimf(net_worth.universe, [1000000, 5000000, 8000000])
net_worth['high'] = fuzz.trimf(net_worth.universe, [7000000, 10000000, 10000000])
net_worth['missing'] = fuzz.trimf(net_worth.universe, [0, 0, 0])  # Missing data


In [129]:
risk['low'] = fuzz.trimf(risk.universe, [0, 0, 50])
risk['moderate'] = fuzz.trimf(risk.universe, [25, 50, 75])
risk['high'] = fuzz.trimf(risk.universe, [50, 100, 100])

In [130]:
rule1 = ctrl.Rule(age['young'] & income['low'] & net_worth['low'], risk['high'])
rule2 = ctrl.Rule(age['middle_aged'] & income['medium'] & net_worth['medium'], risk['moderate'])
rule3 = ctrl.Rule(age['old'] & income['high'] & net_worth['high'], risk['low'])
rule4 = ctrl.Rule(age['old'] & income['low'], risk['high'])

In [131]:
rule_missing_income = ctrl.Rule(income['missing'], risk['moderate'])
rule_missing_net_worth = ctrl.Rule(net_worth['missing'], risk['moderate'])
rule_missing_age = ctrl.Rule(age['missing'], risk['moderate'])

In [132]:
risk_ctrl = ctrl.ControlSystem([rule1, rule2, rule3, rule4, rule_missing_income, rule_missing_net_worth, rule_missing_age])
risk_simulation = ctrl.ControlSystemSimulation(risk_ctrl)

In [147]:
def calculate_risk(row):
    try:
        # If data is missing, set the fuzzy system to the "missing" category
        if np.isnan(row['Age']):
            risk_simulation.input['age'] = 0  # Assign the "missing" membership function
        else:
            risk_simulation.input['age'] = row['Age']
        
        if np.isnan(row['Annual Income']):
            risk_simulation.input['income'] = 0  # Assign the "missing" membership function
        else:
            risk_simulation.input['income'] = row['Annual Income']
        
        if np.isnan(row['Net Worth']):
            risk_simulation.input['net_worth'] = 0  # Assign the "missing" membership function
        else:
            risk_simulation.input['net_worth'] = row['Net Worth']

        # Perform the fuzzy logic simulation
        risk_simulation.compute()

        # Return the computed risk
        return risk_simulation.output['risk']

    except KeyError as e:
        print(f"Error: {e}, possibly missing rules or incorrect input")
        return None

In [148]:
data = {
    'Age': [25, 40, np.nan, 65],
    'Annual Income': [50000, np.nan, 1000000, 700000],
    'Net Worth': [300000, 5000000, np.nan, 9000000]
}

synthetic_data = pd.DataFrame(data)
print("Initial DataFrame:\n", synthetic_data)

Initial DataFrame:
     Age  Annual Income  Net Worth
0  25.0        50000.0   300000.0
1  40.0            NaN  5000000.0
2   NaN      1000000.0        NaN
3  65.0       700000.0  9000000.0


In [149]:
synthetic_data['calculated_risk'] = synthetic_data.apply(calculate_risk, axis=1)

In [150]:
synthetic_data.head()

Unnamed: 0,Age,Annual Income,Net Worth,calculated_risk
0,25.0,50000.0,300000.0,81.331699
1,40.0,,5000000.0,50.0
2,,1000000.0,,50.0
3,65.0,700000.0,9000000.0,50.0


In [95]:
for index, row in synthetic_data.iterrows():
    age_value = row['Age']
    income_value = row['Annual Income']
    net_worth_value = row['Net Worth']

    print(age_value, income_value, net_worth_value)
    
    risk_simulation.input['age'] = age_value
    risk_simulation.input['income'] = income_value
    risk_simulation.input['net_worth'] = net_worth_value
    
    risk_simulation.compute()
    
    print(f"Index: {index}, Calculated Risk: {risk_simulation.output['risk']}")


43 151128.0 503384.0


KeyError: 'risk'

In [120]:
risk_simulation.input['age'] = 20
risk_simulation.input['income'] = 120
risk_simulation.input['net_worth'] = 500

In [121]:
risk_simulation.compute()

In [122]:
print(f"Calculated Risk: {risk_simulation.output['risk']}")

KeyError: 'risk'

In [45]:
print(risk_simulation)

<skfuzzy.control.controlsystem.ControlSystemSimulation object at 0x16fea4500>
