# Apprentissage de vecteurs de mots pour l’analyse de sentiments

Ce notebook explore l’article Learning Word Vectors for Sentiment Analysis (Maas et al., 2011), qui introduit une méthode de plongement lexical supervisé intégrant des informations de polarité pour améliorer la classification de sentiments.

## Imports

In [None]:
import os
import re
import random

import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm

from scipy.special import expit

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

## Pré-processing

In [16]:
base_dir = "aclImdb"
train_dir = os.path.join(base_dir, "train")
test_dir = os.path.join(base_dir, "test")

def load_reviews_from_dir(directory, label):
    data = []
    for fname in os.listdir(directory):
        if fname.endswith(".txt"):
            with open(os.path.join(directory, fname), encoding="utf-8") as f:
                text = f.read()
                data.append((text, label))
    return data

def load_all_data():
    train_pos = load_reviews_from_dir(os.path.join(train_dir, "pos"), 1)
    train_neg = load_reviews_from_dir(os.path.join(train_dir, "neg"), 0)
    test_pos  = load_reviews_from_dir(os.path.join(test_dir,  "pos"), 1)
    test_neg  = load_reviews_from_dir(os.path.join(test_dir,  "neg"), 0)
    return train_pos + train_neg + test_pos + test_neg, len(train_pos) + len(train_neg), len(test_pos) + len(test_neg)

raw_data, len_train, len_test = load_all_data()
df = pd.DataFrame(raw_data, columns=["text", "label"])

def clean_text(text):
    text = re.sub(r"<br\s*/?>", " ", text)  # remplace <br> et <br /> par un espace
    text = re.sub(r"\s+", " ", text)        # normalise les espaces
    return text.strip()

df["text"] = df["text"].apply(clean_text)

vectorizer = CountVectorizer(max_features=5050, token_pattern=r"(?u)\b\w+\b")
vectorizer.fit(df["text"])
full_vocab = vectorizer.get_feature_names_out()

# --- Exclusion des 50 mots les plus fréquents comme dans l'article ---
excluded_words = full_vocab[:50]
vocab = full_vocab[50:]  # Top 5000 mots après exclusion des 50 premiers

def filter_tokens(text, vocab_set):
    tokens = text.split()
    return " ".join([tok for tok in tokens if tok in vocab_set])

vocab_set = set(vocab)
df["filtered_text"] = df["text"].apply(lambda x: filter_tokens(x, vocab_set))

In [18]:
print(df.head(10))

                                                text  label  \
0  Bromwell High is a cartoon comedy. It ran at t...      1   
1  Homelessness (or Houselessness as George Carli...      1   
2  Brilliant over-acting by Lesley Ann Warren. Be...      1   
3  This is easily the most underrated film inn th...      1   
4  This is not the typical Mel Brooks film. It wa...      1   
5  This isn't the comedic Robin Williams, nor is ...      1   
6  Yes its an art... to successfully make a slow ...      1   
7  In this "critically acclaimed psychological th...      1   
8  THE NIGHT LISTENER (2006) **1/2 Robin Williams...      1   
9  You know, Robin Williams, God bless him, is co...      1   

                                       filtered_text  
0  is a cartoon ran at the same time as some othe...  
1  as has been an issue for years but never a pla...  
2  by dramatic lady have ever and love scenes in ...  
3  is easily the most underrated film the its doe...  
4  is not the typical was much 

# Entrainement par vecteur de mots (non supervisé) - Ajustement des hyperparmètres par un algorithme génétique ave cun nombre fixe d'epochs

In [20]:
def softmax_probs(theta_k, R, b):
    logits = np.dot(theta_k, R) + b
    logits = logits - np.max(logits)  # stabilité numérique
    exps = np.exp(logits)
    return exps / np.sum(exps)

class Model:
    def __init__(self, lambda_reg=0.1, nu_reg=0.1, learning_rate=0.01, beta=10):
        self.lambda_reg = lambda_reg
        self.nu_reg = nu_reg
        self.learning_rate = learning_rate
        self.beta = beta

    def train(self, vocab, filtered_texts, epochs=3):
        vectorizer = CountVectorizer(vocabulary=list(vocab))
        X = vectorizer.fit_transform(filtered_texts)
        X = X.toarray()  # shape: (n_docs, vocab_size)

        n_docs, vocab_size = X.shape

        self.R = np.random.normal(0, 0.01, size=(self.beta, vocab_size))
        self.b = np.zeros(vocab_size)

        theta = np.random.normal(0, 0.01, size=(n_docs, self.beta))

        for epoch in range(epochs):
            print(f"Epoch {epoch+1}/{epochs}")
            total_log_likelihood = 0.0
            for k in tqdm(range(n_docs)):
                x_k = X[k]
                if x_k.sum() == 0:
                    continue
                # E-step : optimiser θ_k
                for _ in range(3):
                    probs = softmax_probs(theta[k], self.R, self.b)
                    grad_theta = self.R @ (x_k - probs * x_k.sum()) - self.lambda_reg * theta[k]
                    theta[k] += self.learning_rate * grad_theta

                # Log-likelihood
                log_probs = np.dot(x_k, np.log(probs + 1e-9))
                total_log_likelihood += log_probs

            for k in range(n_docs):
                x_k = X[k]
                if x_k.sum() == 0:
                    continue
                probs = softmax_probs(theta[k], self.R, self.b)
                err = x_k - probs * x_k.sum()
                grad_R = np.outer(theta[k], err)
                grad_b = err

                self.R += self.learning_rate * (grad_R - self.nu_reg * self.R)
                self.b += self.learning_rate * grad_b

            avg_ll = total_log_likelihood / n_docs
            print(f"[Epoch {epoch+1}] Avg semantic log-likelihood: {avg_ll:.4f}")
        
        self.avg_log_likelihood = avg_ll

    def compute_objective(self):
        return self.avg_log_likelihood - self.lambda_reg * np.sum(self.R**2) - self.nu_reg * np.sum(self.b**2)

In [None]:
# Espace de recherche des hyperparamètres
param_space = {
    'beta': [25, 50, 75, 100],
    'lambda_reg': [1e-4, 1e-3, 1e-2, 1e-1],
    'nu_reg': [1e-5, 1e-4, 1e-3, 1e-2],
    'learning_rate': [0.001, 0.005, 0.01, 0.05],
}

# Génère un individu
def generate_individual():
    return {
        'beta': random.choice(param_space['beta']),
        'lambda_reg': random.choice(param_space['lambda_reg']),
        'nu_reg': random.choice(param_space['nu_reg']),
        'learning_rate': random.choice(param_space['learning_rate']),
    }

# Évalue un individu (doit appeler ton modèle ici)
def evaluate(individual, vocab, text):
    model = Model(
        beta=individual['beta'],
        lambda_reg=individual['lambda_reg'],
        nu_reg=individual['nu_reg'],
        learning_rate=individual['learning_rate']
    )
    model.train(vocab, text, epochs=3)
    return model.compute_objective()  # À maximiser

# Crossover
def crossover(parent1, parent2):
    child = {}
    for key in parent1:
        child[key] = random.choice([parent1[key], parent2[key]])
    return child

# Mutation
def mutate(individual, mutation_rate=0.1):
    for key in individual:
        if random.random() < mutation_rate:
            individual[key] = random.choice(param_space[key])
    return individual

# Algorithme principal
def genetic_search(vocab, text, generations=10, population_size=10):
    population = [generate_individual() for _ in range(population_size)]
    
    for generation in range(generations):
        scored = [(ind, evaluate(ind, vocab, text)) for ind in population]
        scored.sort(key=lambda x: x[1], reverse=True)  # Maximize objective

        print(f"\nGeneration {generation + 1}, Best score: {scored[0][1]:.4f}")
        print(f"Best individual: {scored[0][0]}\n")

        survivors = [ind for ind, _ in scored[:population_size // 2]]

        # Reproduction
        next_gen = survivors.copy()
        while len(next_gen) < population_size:
            parents = random.sample(survivors, 2)
            child = mutate(crossover(parents[0], parents[1]))
            next_gen.append(child)

        population = next_gen

    return scored[0]

best = genetic_search(vocab, df.iloc[:len_train]["filtered_text"], generations=10, population_size=10)
print(f"Best individual: {best}")

Epoch 1/3


100%|██████████| 25000/25000 [00:35<00:00, 701.04it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4082
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 702.86it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4842
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 718.04it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2120
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 719.09it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.2758
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 728.26it/s]


[Epoch 2] Avg semantic log-likelihood: -946.0647
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 661.98it/s]


[Epoch 3] Avg semantic log-likelihood: -3155.9929
Epoch 1/3


100%|██████████| 25000/25000 [00:06<00:00, 4014.97it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3225
Epoch 2/3


100%|██████████| 25000/25000 [00:06<00:00, 4124.88it/s]


[Epoch 2] Avg semantic log-likelihood: -2044.1404
Epoch 3/3


100%|██████████| 25000/25000 [00:09<00:00, 2680.32it/s]


[Epoch 3] Avg semantic log-likelihood: -3187.0841
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 725.70it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3473
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 714.93it/s]


[Epoch 2] Avg semantic log-likelihood: -943.5485
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 720.84it/s]


[Epoch 3] Avg semantic log-likelihood: -2580.8963
Epoch 1/3


100%|██████████| 25000/25000 [00:06<00:00, 4120.38it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.2262
Epoch 2/3


100%|██████████| 25000/25000 [00:06<00:00, 3659.43it/s]


[Epoch 2] Avg semantic log-likelihood: -3197.2842
Epoch 3/3


100%|██████████| 25000/25000 [00:08<00:00, 2887.70it/s]


[Epoch 3] Avg semantic log-likelihood: -3202.9233
Epoch 1/3


100%|██████████| 25000/25000 [00:09<00:00, 2675.35it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3318
Epoch 2/3


100%|██████████| 25000/25000 [00:09<00:00, 2568.79it/s]


[Epoch 2] Avg semantic log-likelihood: -941.6370
Epoch 3/3


100%|██████████| 25000/25000 [00:10<00:00, 2488.34it/s]


[Epoch 3] Avg semantic log-likelihood: -1105.3191
Epoch 1/3


100%|██████████| 25000/25000 [00:11<00:00, 2130.48it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3032
Epoch 2/3


100%|██████████| 25000/25000 [00:12<00:00, 1985.91it/s]


[Epoch 2] Avg semantic log-likelihood: -1381.0017
Epoch 3/3


100%|██████████| 25000/25000 [00:14<00:00, 1715.17it/s]


[Epoch 3] Avg semantic log-likelihood: -3173.3338
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 718.88it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4068
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 710.22it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4820
Epoch 3/3


100%|██████████| 25000/25000 [00:35<00:00, 701.75it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1942
Epoch 1/3


100%|██████████| 25000/25000 [00:06<00:00, 4125.95it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4068
Epoch 2/3


100%|██████████| 25000/25000 [00:06<00:00, 3992.83it/s]


[Epoch 2] Avg semantic log-likelihood: -943.7583
Epoch 3/3


100%|██████████| 25000/25000 [00:06<00:00, 4016.37it/s]


[Epoch 3] Avg semantic log-likelihood: -956.7991
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 723.21it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.2905
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 707.40it/s]


[Epoch 2] Avg semantic log-likelihood: -1518.6679
Epoch 3/3


100%|██████████| 25000/25000 [00:40<00:00, 617.33it/s]


[Epoch 3] Avg semantic log-likelihood: -3212.4793

Generation 1, Best score: -949.6968
Best individual: {'beta': 100, 'lambda_reg': 0.001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 717.39it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4072
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 712.11it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4829
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 723.61it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2040
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 715.21it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4097
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 715.98it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4862
Epoch 3/3


100%|██████████| 25000/25000 [00:35<00:00, 705.52it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2273
Epoch 1/3


100%|██████████| 25000/25000 [00:06<00:00, 4102.80it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3965
Epoch 2/3


100%|██████████| 25000/25000 [00:05<00:00, 4181.37it/s]


[Epoch 2] Avg semantic log-likelihood: -943.6596
Epoch 3/3


100%|██████████| 25000/25000 [00:06<00:00, 4125.87it/s]


[Epoch 3] Avg semantic log-likelihood: -956.4805
Epoch 1/3


100%|██████████| 25000/25000 [00:09<00:00, 2698.19it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3498
Epoch 2/3


100%|██████████| 25000/25000 [00:09<00:00, 2607.75it/s]


[Epoch 2] Avg semantic log-likelihood: -942.0147
Epoch 3/3


100%|██████████| 25000/25000 [00:09<00:00, 2572.79it/s]


[Epoch 3] Avg semantic log-likelihood: -1279.6728
Epoch 1/3


100%|██████████| 25000/25000 [00:35<00:00, 712.46it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3692
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 693.21it/s]


[Epoch 2] Avg semantic log-likelihood: -943.4069
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 724.05it/s]


[Epoch 3] Avg semantic log-likelihood: -2782.1899
Epoch 1/3


100%|██████████| 25000/25000 [00:06<00:00, 4121.98it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4038
Epoch 2/3


100%|██████████| 25000/25000 [00:06<00:00, 4029.77it/s]


[Epoch 2] Avg semantic log-likelihood: -943.6861
Epoch 3/3


100%|██████████| 25000/25000 [00:06<00:00, 4131.93it/s]


[Epoch 3] Avg semantic log-likelihood: -956.3298
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 731.30it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4075
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 713.72it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4840
Epoch 3/3


100%|██████████| 25000/25000 [00:35<00:00, 712.57it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2070
Epoch 1/3


100%|██████████| 25000/25000 [00:35<00:00, 711.93it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3547
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 713.94it/s]


[Epoch 2] Avg semantic log-likelihood: -942.9041
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 719.61it/s]


[Epoch 3] Avg semantic log-likelihood: -1672.1687
Epoch 1/3


100%|██████████| 25000/25000 [00:35<00:00, 695.04it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4083
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 710.84it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4855
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 718.02it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2189
Epoch 1/3


100%|██████████| 25000/25000 [00:35<00:00, 705.63it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4092
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 695.42it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4844
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 721.92it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2112

Generation 2, Best score: -949.6620
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 668.32it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4073
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 648.92it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4826
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 677.09it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2011
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 676.48it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4093
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 657.05it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4839
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 684.98it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2058
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 668.05it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4071
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 657.07it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4831
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 663.79it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1993
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 682.68it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4087
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 682.52it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4866
Epoch 3/3


100%|██████████| 25000/25000 [00:38<00:00, 654.89it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2318
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 657.96it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4082
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 674.08it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4842
Epoch 3/3


100%|██████████| 25000/25000 [00:39<00:00, 635.59it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2124
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 644.87it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4094
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 672.82it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4847
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 658.59it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2156
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 681.26it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4056
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 666.11it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4832
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 680.45it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2079
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 665.09it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4047
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 674.10it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4817
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 685.65it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1923
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 658.46it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4079
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 686.65it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4835
Epoch 3/3


100%|██████████| 25000/25000 [00:35<00:00, 695.42it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2025
Epoch 1/3


100%|██████████| 25000/25000 [00:11<00:00, 2154.24it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4119
Epoch 2/3


100%|██████████| 25000/25000 [00:12<00:00, 1995.37it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4945
Epoch 3/3


100%|██████████| 25000/25000 [00:13<00:00, 1911.61it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2752

Generation 3, Best score: -949.6443
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 661.82it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4056
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 680.16it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4800
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 685.61it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1780
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 679.86it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4071
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 647.13it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4821
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 663.01it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1922
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 675.98it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4065
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 652.81it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4811
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 661.14it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1877
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 675.36it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4083
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 657.61it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4825
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 671.50it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1956
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 650.27it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4065
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 675.50it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4813
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 670.03it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1861
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 665.76it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4050
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 672.42it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4801
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 685.92it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1805
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 678.85it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4084
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 675.09it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4844
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 676.49it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2147
Epoch 1/3


100%|██████████| 25000/25000 [00:41<00:00, 598.67it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4066
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 662.64it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4835
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 664.49it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2057
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 650.06it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4086
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 670.48it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4861
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 675.12it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2293
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 684.09it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4080
Epoch 2/3


100%|██████████| 25000/25000 [00:42<00:00, 592.45it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4835
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 669.97it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2040

Generation 4, Best score: -949.6214
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 668.46it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4065
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 661.25it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4805
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 678.13it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1827
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 669.67it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4062
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 665.94it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4825
Epoch 3/3


100%|██████████| 25000/25000 [00:38<00:00, 655.23it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2007
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 681.11it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4064
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 670.62it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4824
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 666.70it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2018
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 670.57it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4060
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 656.62it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4806
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 668.11it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1799
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 651.38it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4078
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 678.84it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4843
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 674.06it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2147
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 657.84it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4043
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 674.14it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4813
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 662.15it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1904
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 681.09it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4088
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 679.34it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4828
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 667.98it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1984
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 667.87it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4102
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 685.12it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4862
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 674.85it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2231
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 673.40it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4069
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 656.14it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4851
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 669.85it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2218
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 670.49it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4054
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 688.13it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4811
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 669.94it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1870

Generation 5, Best score: -949.6260
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 667.42it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4077
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 678.18it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4845
Epoch 3/3


100%|██████████| 25000/25000 [00:38<00:00, 651.35it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2168
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 658.21it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4065
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 683.67it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4830
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 678.95it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2022
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 657.96it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4074
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 670.37it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4831
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 660.12it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2032
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 678.77it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4096
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 671.81it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4847
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 666.70it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2121
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 680.74it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4062
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 660.91it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4836
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 679.13it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2079
Epoch 1/3


100%|██████████| 25000/25000 [00:09<00:00, 2677.91it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4155
Epoch 2/3


100%|██████████| 25000/25000 [00:09<00:00, 2564.55it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4966
Epoch 3/3


100%|██████████| 25000/25000 [00:09<00:00, 2525.87it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2715
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 675.00it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4074
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 663.28it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4842
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 679.13it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2115
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 671.98it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4066
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 670.77it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4829
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 665.36it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2015
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 675.86it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4089
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 660.33it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4853
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 669.79it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2209
Epoch 1/3


100%|██████████| 25000/25000 [00:11<00:00, 2133.02it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4109
Epoch 2/3


100%|██████████| 25000/25000 [00:12<00:00, 1993.41it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4908
Epoch 3/3


100%|██████████| 25000/25000 [00:12<00:00, 2013.70it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2463

Generation 6, Best score: -949.6454
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 680.25it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4068
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 665.73it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4831
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 660.61it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2002
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 668.91it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4093
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 658.40it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4831
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 664.91it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2013
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 664.25it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4067
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 682.61it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4835
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 668.24it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2097
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 689.69it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4054
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 670.39it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4814
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 660.82it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1887
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 675.77it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4062
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 671.07it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4812
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 676.24it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1845
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 669.65it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4071
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 666.79it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4818
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 665.60it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1891
Epoch 1/3


100%|██████████| 25000/25000 [00:41<00:00, 601.41it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4095
Epoch 2/3


100%|██████████| 25000/25000 [00:44<00:00, 562.55it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4857
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 683.18it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2212
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 670.76it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4072
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 648.10it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4826
Epoch 3/3


100%|██████████| 25000/25000 [00:38<00:00, 644.86it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1982
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 648.12it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4073
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 667.56it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4819
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 674.00it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1916
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 651.57it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.3534
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 674.33it/s]


[Epoch 2] Avg semantic log-likelihood: -943.3882
Epoch 3/3


100%|██████████| 25000/25000 [00:43<00:00, 580.75it/s]


[Epoch 3] Avg semantic log-likelihood: -2686.9090

Generation 7, Best score: -949.6325
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:35<00:00, 705.99it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4080
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 729.52it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4824
Epoch 3/3


100%|██████████| 25000/25000 [00:33<00:00, 741.52it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1948
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 728.16it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4062
Epoch 2/3


100%|██████████| 25000/25000 [00:33<00:00, 747.37it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4832
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 728.19it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2050
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 715.21it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4088
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 717.91it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4847
Epoch 3/3


100%|██████████| 25000/25000 [00:33<00:00, 740.44it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2156
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 724.91it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4075
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 706.16it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4816
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 732.97it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1915
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 727.97it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4071
Epoch 2/3


100%|██████████| 25000/25000 [00:33<00:00, 748.99it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4827
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 720.53it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2018
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 733.23it/s]


[Epoch 1] Avg semantic log-likelihood: -1323.6953
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 702.69it/s]


[Epoch 2] Avg semantic log-likelihood: -3172.4484
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 685.05it/s]


[Epoch 3] Avg semantic log-likelihood: -3158.2357
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 728.87it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4074
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 723.68it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4838
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 725.18it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2060
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 731.33it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4080
Epoch 2/3


100%|██████████| 25000/25000 [00:33<00:00, 739.47it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4822
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 731.84it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1951
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 729.44it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4082
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 734.35it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4840
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 735.06it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2094
Epoch 1/3


100%|██████████| 25000/25000 [00:35<00:00, 706.50it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4044
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 734.04it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4801
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 724.81it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1823

Generation 8, Best score: -949.6257
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 727.44it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4070
Epoch 2/3


100%|██████████| 25000/25000 [00:33<00:00, 745.53it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4831
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 727.83it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2051
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 722.47it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4075
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 728.60it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4840
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 733.05it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2149
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 725.36it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4082
Epoch 2/3


100%|██████████| 25000/25000 [00:34<00:00, 726.84it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4817
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 722.43it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1885
Epoch 1/3


100%|██████████| 25000/25000 [00:34<00:00, 721.42it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4073
Epoch 2/3


100%|██████████| 25000/25000 [00:33<00:00, 745.60it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4822
Epoch 3/3


100%|██████████| 25000/25000 [00:35<00:00, 708.13it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1943
Epoch 1/3


100%|██████████| 25000/25000 [00:35<00:00, 713.79it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4062
Epoch 2/3


100%|██████████| 25000/25000 [00:35<00:00, 712.08it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4830
Epoch 3/3


100%|██████████| 25000/25000 [00:34<00:00, 729.88it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2041
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 687.11it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4074
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 684.36it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4839
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 663.98it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2103
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 675.28it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4104
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 674.71it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4864
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 681.61it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2264
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 678.50it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4083
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 680.93it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4846
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 686.08it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2110
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 692.28it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4066
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 671.29it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4816
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 683.34it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1913
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 683.99it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4079
Epoch 2/3


100%|██████████| 25000/25000 [00:38<00:00, 656.72it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4891
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 678.26it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2544

Generation 9, Best score: -949.6318
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 671.92it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4057
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 669.12it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4812
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 676.37it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1918
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 673.00it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4094
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 690.94it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4853
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 673.64it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2189
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 682.32it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4040
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 681.72it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4802
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 676.63it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1813
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 681.86it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4063
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 687.01it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4833
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 678.72it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2058
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 688.30it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4076
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 671.11it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4839
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 683.38it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2091
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 655.69it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4064
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 676.74it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4819
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 680.63it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1954
Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 690.08it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4073
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 685.49it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4856
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 666.69it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2253
Epoch 1/3


100%|██████████| 25000/25000 [00:38<00:00, 656.69it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.2754
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 658.44it/s]


[Epoch 2] Avg semantic log-likelihood: -1594.8655
Epoch 3/3


100%|██████████| 25000/25000 [00:40<00:00, 609.88it/s]


[Epoch 3] Avg semantic log-likelihood: -3150.3703
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 675.09it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4049
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 681.19it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4810
Epoch 3/3


100%|██████████| 25000/25000 [00:37<00:00, 673.16it/s]


[Epoch 3] Avg semantic log-likelihood: -949.1863
Epoch 1/3


100%|██████████| 25000/25000 [00:37<00:00, 671.14it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4075
Epoch 2/3


100%|██████████| 25000/25000 [00:37<00:00, 660.37it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4833
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 691.41it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2018

Generation 10, Best score: -949.6247
Best individual: {'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}

Best individual: ({'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001}, np.float64(-949.6247132291654))


In [22]:
best

({'beta': 100, 'lambda_reg': 0.0001, 'nu_reg': 0.0001, 'learning_rate': 0.001},
 np.float64(-949.6247132291654))

In [23]:
model = Model(
    beta= best[0]['beta'], 
    lambda_reg = best[0]['lambda_reg'], 
    nu_reg = best[0]['nu_reg'], 
    learning_rate = best[0]['learning_rate']
)
model.train(vocab, df.iloc[:len_train]["filtered_text"], epochs=3)
model.compute_objective()

Epoch 1/3


100%|██████████| 25000/25000 [00:36<00:00, 685.69it/s]


[Epoch 1] Avg semantic log-likelihood: -1324.4088
Epoch 2/3


100%|██████████| 25000/25000 [00:36<00:00, 687.20it/s]


[Epoch 2] Avg semantic log-likelihood: -957.4850
Epoch 3/3


100%|██████████| 25000/25000 [00:36<00:00, 687.66it/s]


[Epoch 3] Avg semantic log-likelihood: -949.2149


np.float64(-949.6580182020165)

# Modèle pout la classification supervisé (avec EarlyStop et Logisitique Regression, basé sur les hyperparamètres de l'algorithme génétique)

In [None]:
class ModelWithEarlyStopping:
    def __init__(self, lambda_reg=0.1, nu_reg=0.1, learning_rate=0.01, beta=10, alpha_sup=1.0, vocab=None):
        self.lambda_reg = lambda_reg
        self.nu_reg = nu_reg
        self.learning_rate = learning_rate
        self.beta = beta
        self.alpha_sup = alpha_sup  # pondération du terme supervisé
        self.vocab = vocab

    def train(self, filtered_texts, vocab=None, earlyStop=3, max_epochs=100):
        if vocab:
            self.vocab = vocab  # pour usage futur
        vocab = self.vocab
        vectorizer = CountVectorizer(vocabulary=list(vocab))
        X = vectorizer.fit_transform(filtered_texts).toarray()
        
        self.vectorizer = vectorizer

        n_docs, vocab_size = X.shape
        self.R = np.random.normal(0, 0.01, size=(self.beta, vocab_size))
        self.b = np.zeros(vocab_size)
        theta = np.random.normal(0, 0.01, size=(n_docs, self.beta))

        best_avg_ll = float('-inf')
        best_params = {}
        no_improve_count = 0

        for epoch in range(max_epochs):
            print(f"Epoch {epoch + 1}/{max_epochs}")
            total_log_likelihood = 0.0

            for k in range(n_docs):
                x_k = X[k]
                if x_k.sum() == 0:
                    continue

                for _ in range(3):
                    probs = softmax_probs(theta[k], self.R, self.b)
                    grad_theta = self.R @ (x_k - probs * x_k.sum()) - self.lambda_reg * theta[k]
                    theta[k] += self.learning_rate * grad_theta

                log_probs = np.dot(x_k, np.log(probs + 1e-9))
                total_log_likelihood += log_probs

            for k in range(n_docs):
                x_k = X[k]
                if x_k.sum() == 0:
                    continue
                probs = softmax_probs(theta[k], self.R, self.b)
                err = x_k - probs * x_k.sum()
                grad_R = np.outer(theta[k], err)
                grad_b = err

                self.R += self.learning_rate * (grad_R - self.nu_reg * self.R)
                self.b += self.learning_rate * grad_b

            avg_ll = total_log_likelihood / n_docs
            print(f"[Epoch {epoch+1}] Avg semantic log-likelihood: {avg_ll:.4f}")

            if avg_ll > best_avg_ll + 1e-4:
                best_avg_ll = avg_ll
                best_params = {
                    'R': self.R.copy(),
                    'b': self.b.copy(),
                    'theta': theta.copy()
                }
                no_improve_count = 0
            else:
                no_improve_count += 1
                if no_improve_count >= earlyStop:
                    print(f"Early stopping at epoch {epoch+1}. Best avg log-likelihood: {best_avg_ll:.4f}")
                    break

        self.R = best_params['R']
        self.b = best_params['b']
        self.theta = best_params['theta']
        self.avg_log_likelihood = best_avg_ll

    def train_classifier(self, y):
        """
        Entraîne une régression logistique binaire sur les vecteurs theta_k appris.
        """
        assert hasattr(self, 'theta'), "Train the model first to get theta."
        self.classifier = LinearSVC()
        self.classifier.fit(self.theta, y)
        print("Régression logistique entraînée.")

    def fit(self, filtered_texts, y, vocab=None, earlyStop=3, max_epochs=100):
        """
        Entraîne le modèle et la régression logistique sur les textes filtrés et les labels.
        """
        self.train(filtered_texts, vocab, earlyStop, max_epochs)
        self.train_classifier(y)

    def predict(self, filtered_texts):
        """
        Prédit les labels binaires pour de nouveaux textes.
        """
        assert hasattr(self, 'classifier'), "Train the classifier first."
        X = self.vectorizer.transform(filtered_texts).toarray()
        n_docs = X.shape[0]
        theta = np.zeros((n_docs, self.beta))

        for k in range(n_docs):
            x_k = X[k]
            if x_k.sum() == 0:
                continue
            theta_k = np.zeros(self.beta)
            for _ in range(3):
                probs = softmax_probs(theta_k, self.R, self.b)
                grad_theta = self.R @ (x_k - probs * x_k.sum()) - self.lambda_reg * theta_k
                theta_k += self.learning_rate * grad_theta
            theta[k] = theta_k

        preds = self.classifier.predict(theta)
        return preds

    def compute_objective(self):
        return self.avg_log_likelihood - self.lambda_reg * np.sum(self.R**2) - self.nu_reg * np.sum(self.b**2)


In [28]:
model = ModelWithEarlyStopping(
    beta= best[0]['beta'], 
    lambda_reg = best[0]['lambda_reg'], 
    nu_reg = best[0]['nu_reg'], 
    learning_rate = best[0]['learning_rate'],
    vocab=vocab
)
model.fit(df.iloc[:len_train]["filtered_text"], df.iloc[:len_train]["label"])

Epoch 1/100
[Epoch 1] Avg semantic log-likelihood: -1324.4051
Epoch 2/100
[Epoch 2] Avg semantic log-likelihood: -957.4814
Epoch 3/100
[Epoch 3] Avg semantic log-likelihood: -949.1931
Epoch 4/100
[Epoch 4] Avg semantic log-likelihood: -945.7415
Epoch 5/100
[Epoch 5] Avg semantic log-likelihood: -943.3462
Epoch 6/100
[Epoch 6] Avg semantic log-likelihood: -941.0435
Epoch 7/100
[Epoch 7] Avg semantic log-likelihood: -938.3610
Epoch 8/100
[Epoch 8] Avg semantic log-likelihood: -935.9909
Epoch 9/100
[Epoch 9] Avg semantic log-likelihood: -933.8892
Epoch 10/100
[Epoch 10] Avg semantic log-likelihood: -931.4956
Epoch 11/100
[Epoch 11] Avg semantic log-likelihood: -929.2984
Epoch 12/100
[Epoch 12] Avg semantic log-likelihood: -927.2420
Epoch 13/100
[Epoch 13] Avg semantic log-likelihood: -925.1888
Epoch 14/100
[Epoch 14] Avg semantic log-likelihood: -923.1411
Epoch 15/100
[Epoch 15] Avg semantic log-likelihood: -921.1808
Epoch 16/100
[Epoch 16] Avg semantic log-likelihood: -919.1951
Epoch 17/

In [29]:
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [44]:
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)

In [60]:
print("Log-likelihood:", model.compute_objective())
preds = model.predict(df["filtered_text"].iloc[len_train:])
print(classification_report(df["label"].iloc[len_train:], preds, target_names=["neg", "pos"]))

Log-likelihood: -844.5055963099514
              precision    recall  f1-score   support

         neg       0.62      0.98      0.76     12500
         pos       0.95      0.39      0.55     12500

    accuracy                           0.68     25000
   macro avg       0.78      0.68      0.65     25000
weighted avg       0.78      0.68      0.65     25000



In [100]:
row = df.iloc[random.choice(range(len_train, len_train + len_test))]
pred = model.predict([row["filtered_text"]])[0]
print(f"Predicted: {"pos" if pred == 1 else "neg"}, Actual: {"pos" if row['label'] == 1 else "neg"}, Text: {row['text'][:100]}...")

Predicted: pos, Actual: pos, Text: LACKAWANNA BLUES is a fine stage play by Ruben Santiago-Hudson and an even finer film as the author ...


## Comparaison des performances avec d'autres approches

In [49]:
vectorizer = CountVectorizer(vocabulary=list(vocab))
X = vectorizer.transform(df["filtered_text"])
X = X.toarray()  # shape: (n_docs, vocab_size)

X_train = X[:len_train]
X_test = X[len_train:]
y_train = df["label"].values[:len_train]
y_test = df["label"].values[len_train:]

#### Bag of words

In [50]:
clf = LinearSVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(f"[BoW] Accuracy: {accuracy_score(y_test, y_pred):.4f}")

[BoW] Accuracy: 0.8174


#### LSA

In [51]:
svd = TruncatedSVD(n_components=100)
X_lsa_train = svd.fit_transform(X_train)
X_lsa_test = svd.transform(X_test)

clf = LinearSVC()
clf.fit(X_lsa_train, y_train)
y_pred = clf.predict(X_lsa_test)
print(f"[LSA] Accuracy: {accuracy_score(y_test, y_pred):.4f}")

[LSA] Accuracy: 0.7668


#### LDA

In [52]:
lda = LatentDirichletAllocation(n_components=50, max_iter=10, random_state=42)
X_lda_train = lda.fit_transform(X_train)
X_lda_test = lda.transform(X_test)

clf = LinearSVC()
clf.fit(X_lda_train, y_train)
y_pred = clf.predict(X_lda_test)
print(f"[LDA] Accuracy: {accuracy_score(y_test, y_pred):.4f}")

[LDA] Accuracy: 0.7942


Modèle sémantique seul

In [53]:
def doc_features_from_R(X_data, R):
    sums = X_data.sum(axis=1, keepdims=True)
    sums[sums == 0] = 1  # évite division par zéro
    return (X_data @ R.T) / sums

X_r_train = doc_features_from_R(X_train, R)
X_r_test = doc_features_from_R(X_test, R)

clf = LinearSVC()
clf.fit(X_r_train, y_train)
y_pred = clf.predict(X_r_test)
print(f"[Semantic Only] Accuracy: {accuracy_score(y_test, y_pred):.4f}")

[Semantic Only] Accuracy: 0.6711


#### Modèle complet

In [54]:
def doc_sentiment_features(X_data, R, psi, bc):
    feats = doc_features_from_R(X_data, R)
    sentiment_score = expit(feats @ psi + bc).reshape(-1, 1)
    return np.hstack([feats, sentiment_score])  # concat ψ info

X_full_train = doc_sentiment_features(X_train, R, psi, bc)
X_full_test = doc_sentiment_features(X_test, R, psi, bc)

clf = LinearSVC()
clf.fit(X_full_train, y_train)
y_pred = clf.predict(X_full_test)
print(f"[Semantic + Sentiment] Accuracy: {accuracy_score(y_test, y_pred):.4f}")

[Semantic + Sentiment] Accuracy: 0.6711


#### Concat BoW et modèle sémantique seul

In [55]:
X_comb_train = np.hstack([X_r_train, X_train])
X_comb_test = np.hstack([X_r_test, X_test])

clf = LinearSVC()
clf.fit(X_comb_train, y_train)
y_pred = clf.predict(X_comb_test)
print(f"[Semantic + BoW] Accuracy: {accuracy_score(y_test, y_pred):.4f}")

[Semantic + BoW] Accuracy: 0.8190
