# Credit Score Classification with Fuzzy Mamdani and Sugeno

### Anggota Kelompok

- Ahmad Farhan QF
- Rafif Muhammad

### Penjelasan Tugas

1.	Untuk pengerjaan tugas besar tersebut, mahasiswa diharapkan setidaknya melakukan hal-hal berikut:
- Prapemrosesan data untuk memastikan kualitas dataset, seperti pemilihan fitur, penanganan missing value, dll.
- Mengimplementasikan Fuzzy System menggunakan metode Mamdani dan Sugeno
- Mengevaluasi kinerja Fuzzy System metode Mamdani dan Sugeno pada dataset studi kasus. Gunakan metrik evaluasi seperti akurasi, F1-score, atau metrik lain yang sesuai. Bandingkan kinerja metode Mamdani dan Sugeno, dan lakukan analisis. 

2. Bahasa pemrograman yang digunakan adalah Python. Jika mahasiswa membuat Fuzzy System tanpa library maka akan menjadi nilai tambah dalam Tugas Besar ini.


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, f1_score
import skfuzzy as fuzz
from skfuzzy import control

### Preprosessing

In [2]:
data = pd.read_csv('Credit_Score_Classification_Dataset.csv')

In [3]:
data.head()

Unnamed: 0,Age,Gender,Income,Education,Marital_Status,Number_of_Children,Home_Ownership,Credit_Score
0,25,Female,50000,Bachelor's Degree,Single,0,Rented,High
1,30,Male,100000,Master's Degree,Married,2,Owned,High
2,35,Female,75000,Doctorate,Married,1,Owned,High
3,40,Male,125000,High School Diploma,Single,0,Owned,High
4,45,Female,100000,Bachelor's Degree,Married,3,Owned,High


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 164 entries, 0 to 163
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Age                 164 non-null    int64 
 1   Gender              164 non-null    object
 2   Income              164 non-null    int64 
 3   Education           164 non-null    object
 4   Marital_Status      164 non-null    object
 5   Number_of_Children  164 non-null    int64 
 6   Home_Ownership      164 non-null    object
 7   Credit_Score        164 non-null    object
dtypes: int64(3), object(5)
memory usage: 10.4+ KB


In [5]:
data.isnull().sum()

Age                   0
Gender                0
Income                0
Education             0
Marital_Status        0
Number_of_Children    0
Home_Ownership        0
Credit_Score          0
dtype: int64

In [6]:
print(f'Data Duplikat Sebanyak : {data.duplicated().sum()}')

data_duplikat = data[data.duplicated(keep=False)]

data_duplikat

Data Duplikat Sebanyak : 62


Unnamed: 0,Age,Gender,Income,Education,Marital_Status,Number_of_Children,Home_Ownership,Credit_Score
29,25,Female,55000,Bachelor's Degree,Single,0,Rented,Average
30,30,Male,105000,Master's Degree,Married,2,Owned,High
31,35,Female,80000,Doctorate,Married,1,Owned,High
32,40,Male,130000,High School Diploma,Single,0,Owned,High
33,45,Female,105000,Bachelor's Degree,Married,3,Owned,High
...,...,...,...,...,...,...,...,...
159,29,Female,27500,High School Diploma,Single,0,Rented,Low
160,34,Male,47500,Associate's Degree,Single,0,Rented,Average
161,39,Female,62500,Bachelor's Degree,Married,2,Owned,High
162,44,Male,87500,Master's Degree,Single,0,Owned,High


In [7]:
list_data_object = list(data.select_dtypes(include=['object']).columns)

list_data_object

['Gender', 'Education', 'Marital_Status', 'Home_Ownership', 'Credit_Score']

In [8]:
for list in list_data_object:
    print(f'{list} : {data[list].unique()}')

Gender : ['Female' 'Male']
Education : ["Bachelor's Degree" "Master's Degree" 'Doctorate' 'High School Diploma'
 "Associate's Degree"]
Marital_Status : ['Single' 'Married']
Home_Ownership : ['Rented' 'Owned']
Credit_Score : ['High' 'Average' 'Low']


#### Encoded Data

In [9]:
# Encode categorical variables
def encode_data(df):
    df_encoded = df.copy()
    
    df_encoded['Gender'] = df_encoded['Gender'].map({'Female': 0, 'Male': 1})
    
    edu_map = {'High School Diploma': 0, "Bachelor's Degree": 1, "Master's Degree": 2, 'Doctorate': 3}
    df_encoded['Education'] = df_encoded['Education'].map(edu_map)
    
    df_encoded['Marital_Status'] = df_encoded['Marital_Status'].map({'Single': 0, 'Married': 1})
    
    df_encoded['Home_Ownership'] = df_encoded['Home_Ownership'].map({'Rented': 0, 'Owned': 1})
    
    credit_map = {'Low': 0, 'Average': 1, 'High': 2}
    df_encoded['Credit_Score'] = df_encoded['Credit_Score'].map(credit_map)
    
    return df_encoded

encoded_data = encode_data(data)

encoded_data.head()


Unnamed: 0,Age,Gender,Income,Education,Marital_Status,Number_of_Children,Home_Ownership,Credit_Score
0,25,0,50000,1.0,0,0,0,2
1,30,1,100000,2.0,1,2,1,2
2,35,0,75000,3.0,1,1,1,2
3,40,1,125000,0.0,0,0,1,2
4,45,0,100000,1.0,1,3,1,2


In [10]:
encoded_data.isnull().sum()

Age                    0
Gender                 0
Income                 0
Education             25
Marital_Status         0
Number_of_Children     0
Home_Ownership         0
Credit_Score           0
dtype: int64

In [11]:
encoded_data[encoded_data.isnull().any(axis=1)]

Unnamed: 0,Age,Gender,Income,Education,Marital_Status,Number_of_Children,Home_Ownership,Credit_Score
6,26,0,40000,,0,0,0,1
13,32,1,55000,,0,0,0,1
18,28,0,30000,,0,0,0,0
23,53,1,115000,,1,0,1,2
25,34,1,45000,,0,0,0,1
35,26,0,45000,,0,0,0,1
42,32,1,57500,,0,0,0,1
47,28,0,32500,,0,0,0,0
52,53,1,122500,,1,0,1,2
54,34,1,47500,,0,0,0,1


#### Fuzzy Mamdani by own

In [12]:
def fuzzify_income(income):
    """Returns membership degree for low, medium, high income"""
    low = max(0, min(1, (60000 - income) / 30000))
    medium = max(0, min((income - 40000) / 20000, (100000 - income) / 20000))
    high = max(0, min(1, (income - 80000) / 20000))
    return {'low': low, 'medium': medium, 'high': high}

def fuzzify_age(age):
    """Returns membership degree for young, middle, old"""
    young = max(0, min(1, (30 - age) / 10))
    middle = max(0, min((age - 25) / 10, (45 - age) / 10))
    old = max(0, min(1, (age - 35) / 10))
    return {'young': young, 'middle': middle, 'old': old}

def inference_mamdani(income_fuzzy, age_fuzzy):
    """Define simple fuzzy rules and apply Mamdani inference"""
    # Rule base: use min() for AND, max() for OR (Mamdani)
    rules = []
    # Example rules
    rules.append(('high', min(income_fuzzy['high'], age_fuzzy['middle'])))
    rules.append(('medium', min(income_fuzzy['medium'], age_fuzzy['middle'])))
    rules.append(('low', min(income_fuzzy['low'], age_fuzzy['young'])))
    rules.append(('medium', min(income_fuzzy['medium'], age_fuzzy['young'])))
    rules.append(('high', min(income_fuzzy['high'], age_fuzzy['old'])))

    # Aggregation
    agg = {'low': 0, 'medium': 0, 'high': 0}
    for label, value in rules:
        agg[label] = max(agg[label], value)

    return agg

def defuzzify(agg):
    """Centroid method for defuzzification (simplified)"""
    # Assign crisp values: low=0, medium=1, high=2
    numerator = agg['low'] * 0 + agg['medium'] * 1 + agg['high'] * 2
    denominator = agg['low'] + agg['medium'] + agg['high']
    if denominator == 0:
        return 1  # default to medium if no rule fired
    return numerator / denominator

# Apply Mamdani fuzzy system to all rows in dataset
predicted_scores_mamdani = []

for idx, row in encoded_data.iterrows():
    age_f = fuzzify_age(row['Age'])
    income_f = fuzzify_income(row['Income'])
    agg = inference_mamdani(income_f, age_f)
    crisp_output = defuzzify(agg)
    # Convert output to discrete label (0, 1, 2)
    if crisp_output < 0.5:
        label = 0  # Low
    elif crisp_output < 1.5:
        label = 1  # Medium
    else:
        label = 2  # High
    predicted_scores_mamdani.append(label)

# Add predictions to DataFrame
encoded_data['Predicted_Mamdani'] = predicted_scores_mamdani
encoded_data[['Credit_Score', 'Predicted_Mamdani']]


Unnamed: 0,Credit_Score,Predicted_Mamdani
0,2,1
1,2,2
2,2,1
3,2,2
4,2,2
...,...,...
159,0,0
160,1,1
161,2,1
162,2,2


#### Fuzzy Mamdani but with library skfuzzy

In [13]:
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl

# 1. DEFINISI VARIABEL FUZZY
age = ctrl.Antecedent(np.arange(20, 61, 1), 'age')
income = ctrl.Antecedent(np.arange(20000, 150001, 1000), 'income')
credit_score = ctrl.Consequent(np.arange(0, 3.01, 0.01), 'credit_score')

# 2. DEFINISI FUNGSI KEANGGOTAAN
age['young'] = fuzz.trimf(age.universe, [20, 25, 35])
age['middle'] = fuzz.trimf(age.universe, [30, 40, 50])
age['old'] = fuzz.trimf(age.universe, [45, 55, 60])

income['low'] = fuzz.trimf(income.universe, [20000, 40000, 60000])
income['medium'] = fuzz.trimf(income.universe, [50000, 75000, 100000])
income['high'] = fuzz.trimf(income.universe, [90000, 120000, 150000])

credit_score['low'] = fuzz.trimf(credit_score.universe, [0, 0, 1])
credit_score['medium'] = fuzz.trimf(credit_score.universe, [0, 1, 2])
credit_score['high'] = fuzz.trimf(credit_score.universe, [1, 2, 2])

# 3. DEFINISI RULES
rule1 = ctrl.Rule(income['high'] & age['middle'], credit_score['high'])
rule2 = ctrl.Rule(income['medium'] & age['middle'], credit_score['medium'])
rule3 = ctrl.Rule(income['low'] & age['young'], credit_score['low'])
rule4 = ctrl.Rule(income['medium'] & age['young'], credit_score['medium'])
rule5 = ctrl.Rule(income['high'] & age['old'], credit_score['high'])

# 4. SISTEM KONTROL
credit_ctrl = ctrl.ControlSystem([rule1, rule2, rule3, rule4, rule5])
credit_simulator = ctrl.ControlSystemSimulation(credit_ctrl)

# 5. FUNGSI DISKRETISASI OUTPUT
def discretize_output(value):
    if value < 0.5:
        return 0  # Low
    elif value < 1.5:
        return 1  # Medium
    else:
        return 2  # High

# 6. PREDIKSI UNTUK SEMUA DATA
predicted_scores_mamdani_skfuzzy = []

for idx, row in encoded_data.iterrows():
    credit_simulator.input['age'] = row['Age']
    credit_simulator.input['income'] = row['Income']
    try:
        credit_simulator.compute()
        score = credit_simulator.output['credit_score']
        label = discretize_output(score)
    except:
        label = 1  # default to medium if error
    predicted_scores_mamdani_skfuzzy.append(label)

# 7. SIMPAN KE DATAFRAME
encoded_data['Predicted_Mamdani_SKFUZZY'] = predicted_scores_mamdani_skfuzzy


#### Fuzzy Sugeno by Own

In [14]:
# 1. Fuzzifikasi variabel input: Income dan Age
def fuzzify_income(income):
    """Membership degrees for income: low, medium, high"""
    low = max(0, min(1, (60000 - income) / 30000))
    medium = max(0, min((income - 40000) / 20000, (100000 - income) / 20000))
    high = max(0, min(1, (income - 80000) / 20000))
    return {'low': low, 'medium': medium, 'high': high}

def fuzzify_age(age):
    """Membership degrees for age: young, middle, old"""
    young = max(0, min(1, (30 - age) / 10))
    middle = max(0, min((age - 25) / 10, (45 - age) / 10))
    old = max(0, min(1, (age - 35) / 10))
    return {'young': young, 'middle': middle, 'old': old}

# 2. Inferensi Sugeno: Setiap rule menghasilkan output linier (z) dan bobot firing (w)
def inference_sugeno(income_val, age_val, income_fuzzy, age_fuzzy):
    rules = []

    # Rule 1: Income high & Age middle
    w1 = min(income_fuzzy['high'], age_fuzzy['middle'])
    z1 = 0.00001 * income_val + 0.02 * age_val + 0.5
    rules.append((w1, z1))

    # Rule 2: Income medium & Age middle
    w2 = min(income_fuzzy['medium'], age_fuzzy['middle'])
    z2 = 0.00001 * income_val + 0.01 * age_val + 0.3
    rules.append((w2, z2))

    # Rule 3: Income low & Age young
    w3 = min(income_fuzzy['low'], age_fuzzy['young'])
    z3 = 0.000005 * income_val + 0.01 * age_val + 0.2
    rules.append((w3, z3))

    # Rule 4: Income medium & Age young
    w4 = min(income_fuzzy['medium'], age_fuzzy['young'])
    z4 = 0.000007 * income_val + 0.015 * age_val + 0.3
    rules.append((w4, z4))

    # Rule 5: Income high & Age old
    w5 = min(income_fuzzy['high'], age_fuzzy['old'])
    z5 = 0.00001 * income_val + 0.015 * age_val + 0.6
    rules.append((w5, z5))

    # 3. Agregasi: Hitung rata-rata tertimbang (weighted average)
    weighted_sum = sum(weight * output for weight, output in rules)
    total_weight = sum(weight for weight, _ in rules)

    if total_weight == 0:
        return 1.0  # default: medium
    return weighted_sum / total_weight

# 4. Konversi output z menjadi label diskret
def discretize_output(output):
    if output < 0.5:
        return 0  # Low
    elif output < 1.5:
        return 1  # Medium
    else:
        return 2  # High

# 5. Terapkan ke seluruh data
predicted_scores_sugeno = []

for idx, row in encoded_data.iterrows():
    age_val = row['Age']
    income_val = row['Income']
    
    age_f = fuzzify_age(age_val)
    income_f = fuzzify_income(income_val)

    output = inference_sugeno(income_val, age_val, income_f, age_f)
    label = discretize_output(output)

    predicted_scores_sugeno.append(label)

# Tambahkan hasil ke DataFrame
encoded_data['Predicted_Sugeno'] = predicted_scores_sugeno
encoded_data[['Credit_Score', 'Predicted_Sugeno']]


Unnamed: 0,Credit_Score,Predicted_Sugeno
0,2,1
1,2,2
2,2,1
3,2,2
4,2,2
...,...,...
159,0,1
160,1,1
161,2,1
162,2,2


##### Sugeno didn't have any way to use library

#### Evaluasi

In [15]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Mapping label asli ke angka
true_labels = encoded_data['Credit_Score'].map({'Low': 0, 'Medium': 1, 'High': 2}).values
pred_mamdani_scratch = encoded_data['Predicted_Mamdani'].values
pred_mamdani_skfuzzy = encoded_data['Predicted_Mamdani_SKFUZZY'].values
pred_sugeno = encoded_data['Predicted_Sugeno'].values

# Hitung metrik
def get_metrics(true, pred):
    acc = accuracy_score(true, pred)
    report = classification_report(true, pred, output_dict=True)
    cm = confusion_matrix(true, pred)
    return acc, report, cm

acc1, report1, cm1 = get_metrics(true_labels, pred_mamdani_scratch)
acc2, report2, cm2 = get_metrics(true_labels, pred_mamdani_skfuzzy)
acc3, report3, cm3 = get_metrics(true_labels, pred_sugeno)

# Visualisasi confusion matrix
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for ax, cm, title in zip(axes, [cm1, cm2, cm3], 
                         ['Mamdani Manual', 'Mamdani skfuzzy', 'Sugeno']):
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title(f'Confusion Matrix: {title}')
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')

plt.tight_layout()
plt.show()

# Ringkasan metrik
for name, acc, report in zip(
    ['Mamdani Manual', 'Mamdani skfuzzy', 'Sugeno'],
    [acc1, acc2, acc3],
    [report1, report2, report3]
):
    print(f"\n{name} Accuracy: {acc:.4f}")
    print("Macro Avg Precision:", round(report['macro avg']['precision'], 4))
    print("Macro Avg Recall:", round(report['macro avg']['recall'], 4))
    print("Macro Avg F1-score:", round(report['macro avg']['f1-score'], 4))


  return x.astype(dtype, copy=copy, casting=casting)


ValueError: Input y_true contains NaN.

#### Visualisasi

In [None]:
# --- Bar Chart untuk Perbandingan Metrik ---

metrics_names = ['Accuracy', 'Precision', 'Recall', 'F1-score']
mamdani_vals = [acc1, report1['macro avg']['precision'], report1['macro avg']['recall'], report1['macro avg']['f1-score']]
skfuzzy_vals = [acc2, report2['macro avg']['precision'], report2['macro avg']['recall'], report2['macro avg']['f1-score']]
sugeno_vals = [acc3, report3['macro avg']['precision'], report3['macro avg']['recall'], report3['macro avg']['f1-score']]

x = np.arange(len(metrics_names))
width = 0.25

fig, ax = plt.subplots(figsize=(10, 6))
rects1 = ax.bar(x - width, mamdani_vals, width, label='Mamdani Manual')
rects2 = ax.bar(x, skfuzzy_vals, width, label='Mamdani skfuzzy')
rects3 = ax.bar(x + width, sugeno_vals, width, label='Sugeno')

ax.set_ylabel('Scores')
ax.set_title('Comparison of Evaluation Metrics')
ax.set_xticks(x)
ax.set_xticklabels(metrics_names)
ax.legend()
plt.ylim(0, 1.1)
plt.show()


# --- Scatterplot Income vs Age, warna berdasarkan kelas asli dan bentuk marker berdasarkan prediksi Sugeno ---

# Mapping numerik ke label string (untuk legend lebih enak)
label_map = {0: 'Low', 1: 'Medium', 2: 'High'}
encoded_data['Credit_Score_Label'] = encoded_data['Credit_Score'].map(label_map)
encoded_data['Predicted_Sugeno_Label'] = encoded_data['Predicted_Sugeno'].map(label_map)

plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=encoded_data,
    x='Income',
    y='Age',
    hue='Credit_Score_Label',
    style='Predicted_Sugeno_Label',
    palette='Set1',
    s=100,
    alpha=0.7
)
plt.title('Scatterplot of Income vs Age\nColor: True Credit Score, Marker: Sugeno Prediction')
plt.xlabel('Income')
plt.ylabel('Age')
plt.legend(title='Legend', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
