# Logistic regression

## 1. Setup and Imports

In [1]:
import os
import sys

try:
    base_path = os.path.dirname(__file__)
except NameError:
    base_path = os.getcwd()

# 1️⃣ Ajouter le dossier parent (un cran au-dessus)
parent_dir = os.path.abspath(os.path.join(base_path, '..'))
sys.path.append(parent_dir)

# 2️⃣ Ajouter le dossier parent du dossier parent (deux crans au-dessus)
two_up_dir = os.path.abspath(os.path.join(base_path, '..', '..'))
sys.path.append(two_up_dir)

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time

# Import the environment and agents
from permuted_mnist.env.permuted_mnist import PermutedMNISTEnv
from models.Logistic_Regression.logistic_regression import Agent as Logistic_Agent
from models.MLP.mlp_v0 import Agent as torch_mlp_Agent

print("✓ Imports successful")

✓ Imports successful


We fix the seed for reproducibility:

In [3]:
seed = 42

We fix the number of CPUs:

In [4]:
n_jobs = 2

## 2. Create the Environment

Let's create an environment with 10 different permuted tasks:

In [5]:
# Create environment with 10 episodes (tasks)
env = PermutedMNISTEnv(number_episodes=10)

# Set seed for reproducibility
env.set_seed(seed)

print(f"Environment created with {env.number_episodes} permuted tasks")
print(f"Training set size: {env.train_size} samples")
print(f"Test set size: {env.test_size} samples")

Environment created with 10 permuted tasks
Training set size: 60000 samples
Test set size: 10000 samples


## Logistic regression:

In [6]:
def Logistic_regression(C, max_iter):
    # Reset environment for fresh start
    env.reset()
    env.set_seed(seed)

    # Create logistic regression agent
    logistic_agent = Logistic_Agent(C, max_iter, n_jobs, seed)

    # Track performance
    logistic_regression_accuracies = []
    logistic_regression_times = []

    print(f"Evaluating Logistic regression Agent with max_iter={max_iter} and C={C}:")

    # Evaluate on all tasks
    task_num = 1
    while True:
        task = env.get_next_task()
        if task is None:
            break
        if task_num >= 2: # limited for quick test
            break
            
        # Reset agent for new task
        logistic_agent.reset()
    
        start_time = time.time()
    
        # Train
        logistic_agent.train(task['X_train'], task['y_train'])
    
        # Make predictions
        predictions = logistic_agent.predict(task['X_test'])
    
        # Calculate time and accuracy
        elapsed_time = time.time() - start_time
        accuracy = env.evaluate(predictions, task['y_test'])
    
        logistic_regression_accuracies.append(accuracy)
        logistic_regression_times.append(elapsed_time)
    
        print(f"Task {task_num}: Accuracy = {accuracy:.2%}, Time = {elapsed_time:.4f}s")
        task_num += 1

    mean_accuracy = np.mean(logistic_regression_accuracies)
    std_accuracy = np.std(logistic_regression_accuracies)
    total_time = np.sum(logistic_regression_times)

    print(f"\nLogistic regression Agent Summary:")
    print(f"  Mean accuracy: {mean_accuracy:.2%} ± {std_accuracy:.2%}")
    print(f"  Total time: {total_time:.2f}s")

    return mean_accuracy

In [7]:
# Get the first task
task = env.get_next_task()

In [None]:
perm = np.random.permutation(784)

# Aplatir puis permuter
X_train_flat = task['X_train'].reshape(-1, 784)
X_test_flat  = task['X_test'].reshape(-1, 784)

X_train_perm = X_train_flat[:, perm]
X_train_same = X_train_flat.copy()

agent = Logistic_Agent()

# 1) Entraînement / test sans permutation
agent.train(X_train_same, task['y_train'])
acc1 = np.mean(agent.predict(task['X_test']) == task['y_test'].ravel())  # predict gère le reshape interne

# 2) Entraînement / test avec la même permutation appliquée aux deux sets
agent.reset()
agent.train(X_train_perm, task['y_train'])

# Ici on doit aussi permuter X_test_flat avec la même permutation
X_test_perm = X_test_flat[:, perm]
acc2 = np.mean(agent.predict(X_test_perm) == task['y_test'].ravel())

print("acc without perm:", acc1, "acc with perm:", acc2)

La régression logistique ne dépend pas de l’ordre des pixels, car elle traite chaque pixel comme une variable indépendante, et n’exploite aucune corrélation spatiale. Donc quelle que soit la permutation l'accuracy est la même. Ainsi on peut entrainer le modèle seulement sur une permutation (une task).

In [None]:
Cs = [0.1]
max_iters = [100]
best_mean_accuracy = 0
best_C = None

for max_iter in max_iters:
    for C in Cs:
        mean_accuracy = Logistic_regression(C, max_iter)
        print('-' * 50)
        if mean_accuracy > best_mean_accuracy:
            best_mean_accuracy = mean_accuracy
            best_C = C
            best_max_iter = max_iter

print(f"The best regularization constant is: {best_C}")
print(f"The best number of iterations is: {best_max_iter}")
print(f"The associated mean accuracy is: {best_mean_accuracy:0.4f}")

Evaluating Logistic regression Agent with max_iter=100 and C=0.1:
