Import Library

Exploratory Data Analysis

Feature Engineering

Data lebih condong ke kanan

Modeling

In [1]:
import pandas as pd
import numpy as np
import os # Added for path manipulation
import kagglehub # Added for Kaggle dataset download
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import resample

# 1. Chargement (Simulé selon votre path)
# Download latest version using kagglehub
download_path = kagglehub.dataset_download("lihxlhx/give-me-some-credit")

print("Path to dataset files:", download_path)

# Load the training data from the downloaded path
df = pd.read_csv(os.path.join(download_path, 'cs-training.csv'))
# Pour l'exemple, supposons que 'df' est déjà chargé comme dans le PDF [cite: 27]

# 2. Nettoyage (Basé sur le PDF [cite: 188-190])
# Remplacement des valeurs manquantes par la médiane/mode
df['NumberOfDependents'].fillna(df['NumberOfDependents'].mode()[0], inplace=True)
df['MonthlyIncome'].fillna(df['MonthlyIncome'].median(), inplace=True)

# 3. Sélection des Features (Les 10 colonnes demandées) et de la Cible
features = [
    'RevolvingUtilizationOfUnsecuredLines', 'age',
    'NumberOfTime30-59DaysPastDueNotWorse', 'DebtRatio',
    'MonthlyIncome', 'NumberOfOpenCreditLinesAndLoans',
    'NumberOfTimes90DaysLate', 'NumberRealEstateLoansOrLines',
    'NumberOfTime60-89DaysPastDueNotWorse', 'NumberOfDependents'
]
target = 'SeriousDlqin2yrs'

X = df[features]
y = df[target]

# 4. Split Train/Test (Comme dans le PDF [cite: 297])
# Séparation Train/Test
X_train_raw, X_test_raw, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Technique de l'Oversampling pour booster l'apprentissage du R-learning
df_train = pd.concat([X_train_raw, y_train], axis=1)
df_sain = df_train[df_train[target] == 0]
df_detresse = df_train[df_train[target] == 1]

# On multiplie les cas de détresse par 5 pour "forcer" l'agent à les voir
df_detresse_upsampled = resample(df_detresse, replace=True, n_samples=len(df_detresse)*5, random_state=42)
df_balanced = pd.concat([df_sain, df_detresse_upsampled]).sample(frac=1).reset_index(drop=True)

X_train_balanced = df_balanced[features]
y_train_balanced = df_balanced[target].values

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_balanced)
X_test_scaled = scaler.transform(X_test_raw)

Downloading from https://www.kaggle.com/api/v1/datasets/download/lihxlhx/give-me-some-credit?dataset_version_number=1...


100%|██████████| 5.16M/5.16M [00:00<00:00, 46.9MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/lihxlhx/give-me-some-credit/versions/1


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['NumberOfDependents'].fillna(df['NumberOfDependents'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['MonthlyIncome'].fillna(df['MonthlyIncome'].median(), inplace=True)


In [2]:
# ---------------------------------------------------------
# 2. L'AGENT AMÉLIORÉ (Weighted Rewards + Decay)
# ---------------------------------------------------------

class ImprovedCreditRLAgent:
    def __init__(self, learning_rate=0.001, epsilon_start=1.0, epsilon_end=0.01, decay_rate=0.99995):
        # Gestion de l'exploration décroissante
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.decay_rate = decay_rate

        # Modèles linéaires (Q-function approximation)
        # 'huber' loss est plus robuste aux outliers que 'squared_error'
        self.q_model_0 = SGDRegressor(loss='huber', learning_rate='adaptive', eta0=learning_rate)
        self.q_model_1 = SGDRegressor(loss='huber', learning_rate='adaptive', eta0=learning_rate)

        self.is_fitted = False

    def _initialize_models(self, state_dim):
        dummy_X = np.zeros((1, state_dim))
        self.q_model_0.partial_fit(dummy_X, [0])
        self.q_model_1.partial_fit(dummy_X, [0])
        self.is_fitted = True

    def select_action(self, state, training=True):
        state = state.reshape(1, -1)
        if not self.is_fitted:
            self._initialize_models(state.shape[1])

        # Epsilon-Greedy avec Decay
        # Si on est en mode test (training=False), on exploite à 100%
        current_epsilon = self.epsilon if training else 0.0

        if np.random.rand() < current_epsilon:
            return np.random.choice([0, 1])

        q0 = self.q_model_0.predict(state)[0]
        q1 = self.q_model_1.predict(state)[0]
        return 0 if q0 > q1 else 1

    def update(self, state, action, reward):
        state = state.reshape(1, -1)
        if not self.is_fitted:
            self._initialize_models(state.shape[1])

        # Mise à jour du modèle concerné
        if action == 0:
            self.q_model_0.partial_fit(state, [reward])
        else:
            self.q_model_1.partial_fit(state, [reward])

        # Mise à jour de l'epsilon (Decay)
        self.epsilon = max(self.epsilon_end, self.epsilon * self.decay_rate)

In [3]:
# ---------------------------------------------------------
# 3. ENTRAÎNEMENT AVEC RÉCOMPENSES ASYMÉTRIQUES
# ---------------------------------------------------------

agent = ImprovedCreditRLAgent(learning_rate=0.001) # Utilise la classe précédente
cumulative_reward = 0

print(f"Entraînement sur {len(X_train_scaled)} échantillons (équilibrés)...")

for i in range(len(X_train_scaled)):
    state = X_train_scaled[i]
    true_label = y_train_balanced[i]

    # Choix de l'action
    action = agent.select_action(state, training=True)

    # --- SYSTÈME DE RÉCOMPENSE CORRIGÉ ---
    if action == true_label:
        if true_label == 1:
            reward = 100  # Récompense massive pour avoir trouvé un cas rare
        else:
            reward = 1    # Récompense de base
    else:
        if true_label == 1 and action == 0:
            reward = -250 # PÉNALITÉ EXTRÊME : Ne surtout pas rater un défaut
        else:
            reward = -10  # Pénalité pour fausse alerte (augmentée pour éviter le spam de '1')

    # Mise à jour
    agent.update(state, action, reward)

    if i % 20000 == 0:
        print(f"Progrès: {i}/{len(X_train_scaled)} | Epsilon: {agent.epsilon:.3f}")


Entraînement sur 133184 échantillons (équilibrés)...
Progrès: 0/133184 | Epsilon: 1.000
Progrès: 20000/133184 | Epsilon: 0.368
Progrès: 40000/133184 | Epsilon: 0.135
Progrès: 60000/133184 | Epsilon: 0.050
Progrès: 80000/133184 | Epsilon: 0.018
Progrès: 100000/133184 | Epsilon: 0.010
Progrès: 120000/133184 | Epsilon: 0.010


In [4]:
y_pred_rl = []
agent.epsilon = 0

# Plus ce seuil est BAS, plus vous capturez de personnes en détresse
# Essayez 0.10 ou 0.05
custom_threshold = 0.35

for i in range(len(X_test_scaled)):
    state = X_test_scaled[i].reshape(1, -1)

    q0 = agent.q_model_0.predict(state)[0]
    q1 = agent.q_model_1.predict(state)[0]

    # Transformation Softmax pour obtenir une probabilité entre 0 et 1
    exp_q = np.exp([q0, q1] - np.max([q0, q1])) # Stabilité numérique
    probs = exp_q / exp_q.sum()
    prob_detresse = probs[1]

    # Décision basée sur le seuil
    if prob_detresse > custom_threshold:
        action = 1
    else:
        action = 0

    y_pred_rl.append(action)

print(f"\n--- RÉSULTATS R-LEARNING (SEUIL PROBABILISTE: {custom_threshold}) ---")
print(confusion_matrix(y_test, y_pred_rl))
print(classification_report(y_test, y_pred_rl))


--- RÉSULTATS R-LEARNING (SEUIL PROBABILISTE: 0.35) ---
[[41783   237]
 [ 2640   340]]
              precision    recall  f1-score   support

           0       0.94      0.99      0.97     42020
           1       0.59      0.11      0.19      2980

    accuracy                           0.94     45000
   macro avg       0.76      0.55      0.58     45000
weighted avg       0.92      0.94      0.92     45000

