In [1]:
import os
import multiprocessing
import subprocess
import sys

def install_if_missing(package):
    try:
        __import__(package)
    except ImportError:
        print(f"üì¶ Installation de {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])


In [2]:
packages = ['psutil', 'matplotlib', 'seaborn', 'pandas', 'numpy']
for pkg in packages:
    install_if_missing(pkg)

import tensorflow as tf
import psutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

print("üöÄ CONFIGURATION ULTRA-OPTIMIS√âE ACTIV√âE")
print("=" * 60)



üöÄ CONFIGURATION ULTRA-OPTIMIS√âE ACTIV√âE


In [3]:
cpu_count = multiprocessing.cpu_count()
total_memory = psutil.virtual_memory().total / (1024**3)
available_memory = psutil.virtual_memory().available / (1024**3)


In [4]:
print(f"üíª RESSOURCES SYST√àME:")
print(f"   CPU: {cpu_count} threads")
print(f"   RAM totale: {total_memory:.1f} GB")
print(f"   RAM disponible: {available_memory:.1f} GB")

# Configuration TensorFlow ultra-optimis√©e
print(f"\n‚ö° OPTIMISATIONS TENSORFLOW:")

# Configuration threads pour utilisation maximale
tf.config.threading.set_intra_op_parallelism_threads(cpu_count)
tf.config.threading.set_inter_op_parallelism_threads(cpu_count)


üíª RESSOURCES SYST√àME:
   CPU: 14 threads
   RAM totale: 36.0 GB
   RAM disponible: 14.9 GB

‚ö° OPTIMISATIONS TENSORFLOW:


In [5]:



# Variables d'environnement optimales
os.environ['OMP_NUM_THREADS'] = str(cpu_count)
os.environ['TF_NUM_INTEROP_THREADS'] = str(cpu_count)
os.environ['TF_NUM_INTRAOP_THREADS'] = str(cpu_count)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'


In [6]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"   üéÆ GPU d√©tect√©s: {len(gpus)}")
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"   ‚úì Croissance m√©moire GPU activ√©e")
    except:
        print(f"   ‚ö†Ô∏è Configuration GPU partielle")
else:
    print(f"   üíª Mode CPU optimis√©")

   üíª Mode CPU optimis√©


In [7]:
try:
    tf.config.optimizer.set_jit(True)
    print(f"   ‚úì XLA JIT activ√©")
except:
    print(f"   ‚ö†Ô∏è XLA non disponible")

try:
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    print(f"   ‚úì Mixed Precision FP16 activ√©e")
except:
    print(f"   ‚ö†Ô∏è Mixed Precision non support√©e")


   ‚úì XLA JIT activ√©
   ‚úì Mixed Precision FP16 activ√©e


In [8]:
AUTOTUNE = tf.data.AUTOTUNE
print(f"   ‚úì Dataset AUTOTUNE configur√©")

print(f"\n‚úÖ CONFIGURATION TERMIN√âE")
print(f"ÔøΩ Utilisation pr√©vue: CPU {cpu_count} threads, RAM ~{int(available_memory*0.8)}GB")
print("üöÄ Syst√®me optimis√© pour performances maximales!")

   ‚úì Dataset AUTOTUNE configur√©

‚úÖ CONFIGURATION TERMIN√âE
ÔøΩ Utilisation pr√©vue: CPU 14 threads, RAM ~11GB
üöÄ Syst√®me optimis√© pour performances maximales!


In [None]:
# Installation des packages n√©cessaires avec optimisations
!pip install tensorflow tensorflow-addons scikit-learn

In [12]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, MultiHeadAttention, Dropout, LayerNormalization
import pandas as pd
import numpy as np
import re

print("üå∏ Cr√©ation du mod√®le Shirayuki ultra-optimis√©...")

class SimpleTransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim//num_heads, dropout=rate)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="gelu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, x, training=False):
        attn_output = self.att(x, x, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class ShirayukiTransformer(tf.keras.Model):
    def __init__(self, vocab_size, embed_dim=256, num_heads=8, ff_dim=512, maxlen=128, num_layers=4, rate=0.1):
        super().__init__()
        self.embed_dim = embed_dim
        self.maxlen = maxlen

        self.embedding = Embedding(vocab_size, embed_dim, mask_zero=True)
        self.pos_embedding = Embedding(maxlen, embed_dim)

        self.encoder_layers = [SimpleTransformerBlock(embed_dim, num_heads, ff_dim, rate)
                              for _ in range(num_layers)]
        self.decoder_layers = [SimpleTransformerBlock(embed_dim, num_heads, ff_dim, rate)
                              for _ in range(num_layers)]

        self.final_layer = Dense(vocab_size, dtype='float32')
        self.dropout = Dropout(rate)

    def call(self, inputs, training=False):
        if isinstance(inputs, tuple):
            input_ids, target_ids = inputs
        else:
            input_ids = inputs
            target_ids = None

        # Encoder
        encoder_output = self.encode(input_ids, training)

        if target_ids is not None:
            # Decoder avec teacher forcing
            decoder_output = self.decode(target_ids, encoder_output, training)
            return self.final_layer(decoder_output)
        else:
            return encoder_output

    def encode(self, input_ids, training=False):
        seq_len = tf.shape(input_ids)[1]
        x = self.embedding(input_ids)
        x *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))

        positions = tf.range(seq_len)[None, :]
        x += self.pos_embedding(positions)
        x = self.dropout(x, training=training)

        for layer in self.encoder_layers:
            x = layer(x, training=training)
        return x

    def decode(self, target_ids, encoder_output, training=False):
        seq_len = tf.shape(target_ids)[1]
        x = self.embedding(target_ids)
        x *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))

        positions = tf.range(seq_len)[None, :]
        x += self.pos_embedding(positions)
        x = self.dropout(x, training=training)

        for layer in self.decoder_layers:
            x = layer(x, training=training)
        return x


üå∏ Cr√©ation du mod√®le Shirayuki ultra-optimis√©...


In [14]:
def load_shirayuki_data(file_path):
    print(f"üìä Chargement des donn√©es...")

    try:
        df = pd.read_csv(file_path)
        inputs = df['guy'].astype(str).tolist()
        outputs = df['girl'].astype(str).tolist()
        print(f"‚úÖ Fichier CSV charg√©: {len(inputs)} conversations")
    except:
        print("‚ö†Ô∏è Fichier CSV non trouv√©, cr√©ation d'un dataset de d√©monstration...")

    # Nettoyage simple
    clean_pairs = []
    for inp, out in zip(inputs, outputs):
        if inp and out and len(inp.strip()) > 0 and len(out.strip()) > 0:
            clean_pairs.append((inp.strip(), out.strip()))

    print(f"üìä Conversations valides: {len(clean_pairs)}")
    return clean_pairs

# Cr√©ation du tokenizer simplifi√©
def create_simple_tokenizer(conversations, vocab_size=8192, max_length=64):
    print("üîß Cr√©ation du tokenizer...")

    from tensorflow.keras.utils import text_dataset_from_directory
    from tensorflow.keras.layers import TextVectorization

    # Extraction des textes
    all_texts = []
    for inp, out in conversations:
        all_texts.append(inp)
        all_texts.append("[START] " + out + " [END]")

    # Tokenizer optimis√©
    tokenizer = TextVectorization(
        max_tokens=vocab_size,
        output_sequence_length=max_length,
        standardize='lower_and_strip_punctuation',
        split='whitespace'
    )

    tokenizer.adapt(all_texts)

    # Pr√©paration des donn√©es
    inputs = [pair[0] for pair in conversations]
    outputs = ["[START] " + pair[1] + " [END]" for pair in conversations]

    input_ids = tokenizer(inputs)
    output_ids = tokenizer(outputs)

    # Teacher forcing
    decoder_input = output_ids[:, :-1]
    decoder_target = output_ids[:, 1:]

    print(f"‚úÖ Tokenizer cr√©√©: {tokenizer.vocabulary_size()} tokens, longueur {max_length}")
    return tokenizer, input_ids, decoder_input, decoder_target


In [None]:
# Configuration optimale
print("‚öôÔ∏è Configuration du mod√®le...")
vocab_size = 8192
max_length = 64
embed_dim = 256
num_heads = 8
ff_dim = 512
num_layers = 4
batch_size = min(32, max(8, int(available_memory * 4)))


print(f"üìä Param√®tres:")
print(f"   Vocab: {vocab_size} tokens")
print(f"   Longueur max: {max_length}")
print(f"   Dimensions: {embed_dim}")
print(f"   Couches: {num_layers}")
print(f"   Batch size: {batch_size}")

conversations = load_shirayuki_data('/Users/christopher/Documents/IA/ani/conversation_dataset_ShirayukiV3.csv')

tokenizer, input_ids, decoder_input, decoder_target = create_simple_tokenizer(
    conversations, vocab_size, max_length
)

‚öôÔ∏è Configuration du mod√®le...
üìä Param√®tres:
   Vocab: 8192 tokens
   Longueur max: 64
   Dimensions: 256
   Couches: 4
   Batch size: 32
üìä Chargement des donn√©es...
‚úÖ Fichier CSV charg√©: 4363 conversations
üìä Conversations valides: 4362
üîß Cr√©ation du tokenizer...
‚úÖ Tokenizer cr√©√©: 6038 tokens, longueur 64


In [None]:
# üöÄ MOD√àLE SHIRAYUKI ULTRA-SIMPLIFI√â ET ROBUSTE (CORRIG√â)
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, MultiHeadAttention, Dropout, LayerNormalization
import pandas as pd
import numpy as np
import re

print("üå∏ Cr√©ation du mod√®le Shirayuki ultra-optimis√©...")

# D√©sactiver mixed precision pour √©viter les conflits
tf.keras.mixed_precision.set_global_policy('float32')

# Classes optimis√©es simplifi√©es avec types coh√©rents
class SimpleTransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim//num_heads, dropout=rate)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="gelu", dtype='float32'),
            Dense(embed_dim, dtype='float32'),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6, dtype='float32')
        self.layernorm2 = LayerNormalization(epsilon=1e-6, dtype='float32')
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, x, training=False):
        attn_output = self.att(x, x, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class ShirayukiTransformer(tf.keras.Model):
    def __init__(self, vocab_size, embed_dim=256, num_heads=8, ff_dim=512, maxlen=128, num_layers=4, rate=0.1):
        super().__init__()
        self.embed_dim = embed_dim
        self.maxlen = maxlen

        self.embedding = Embedding(vocab_size, embed_dim, mask_zero=True, dtype='float32')
        self.pos_embedding = Embedding(maxlen, embed_dim, dtype='float32')

        self.encoder_layers = [SimpleTransformerBlock(embed_dim, num_heads, ff_dim, rate)
                              for _ in range(num_layers)]
        self.decoder_layers = [SimpleTransformerBlock(embed_dim, num_heads, ff_dim, rate)
                              for _ in range(num_layers)]

        self.final_layer = Dense(vocab_size, dtype='float32')
        self.dropout = Dropout(rate)

    def call(self, inputs, training=False):
        if isinstance(inputs, tuple):
            input_ids, target_ids = inputs
        else:
            input_ids = inputs
            target_ids = None

        # Encoder
        encoder_output = self.encode(input_ids, training)

        if target_ids is not None:
            # Decoder avec teacher forcing
            decoder_output = self.decode(target_ids, encoder_output, training)
            return self.final_layer(decoder_output)
        else:
            return encoder_output

    def encode(self, input_ids, training=False):
        seq_len = tf.shape(input_ids)[1]
        x = self.embedding(input_ids)
        x = tf.cast(x, tf.float32)  # Force float32
        x *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))

        positions = tf.range(seq_len)[None, :]
        pos_emb = self.pos_embedding(positions)
        pos_emb = tf.cast(pos_emb, tf.float32)  # Force float32
        x += pos_emb
        x = self.dropout(x, training=training)

        for layer in self.encoder_layers:
            x = layer(x, training=training)
        return x

    def decode(self, target_ids, encoder_output, training=False):
        seq_len = tf.shape(target_ids)[1]
        x = self.embedding(target_ids)
        x = tf.cast(x, tf.float32)  # Force float32
        x *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))

        positions = tf.range(seq_len)[None, :]
        pos_emb = self.pos_embedding(positions)
        pos_emb = tf.cast(pos_emb, tf.float32)  # Force float32
        x += pos_emb
        x = self.dropout(x, training=training)

        for layer in self.decoder_layers:
            x = layer(x, training=training)
        return x

# Fonction de chargement de donn√©es robuste
def load_shirayuki_data(file_path):
    print(f"üìä Chargement des donn√©es...")

    try:
        df = pd.read_csv(file_path)
        inputs = df['guy'].astype(str).tolist()
        outputs = df['girl'].astype(str).tolist()
        print(f"‚úÖ Fichier CSV charg√©: {len(inputs)} conversations")
    except:
        print("‚ö†Ô∏è Fichier CSV non trouv√©, cr√©ation d'un dataset de d√©monstration...")
        # Dataset de demo tsundere
        demo_conversations = [
            ("Bonjour Shirayuki", "H-H√© ! Ne me parle pas si soudainement ! *rougit*"),
            ("Comment √ßa va ?", "√áa va bien... pas que √ßa t'int√©resse ! Hmph !"),
            ("Tu es mignonne", "Q-Quoi ?! Ne dis pas des choses comme √ßa ! *devient rouge*"),
            ("Je t'aime", "C-Ce n'est pas comme si... si j'√©tais contente ! Baka !"),
            ("Tu veux sortir ?", "P-Peut-√™tre... si tu insistes vraiment..."),
            ("Bonne nuit", "Bonne nuit... et ne r√™ve pas de moi ! *d√©tourne le regard*"),
            ("Tu me manques", "Tu... tu me manques aussi... mais juste un peu !"),
            ("Merci", "C-C'est normal ! Ne me remercie pas ! *embarrass√©e*"),
            ("Tu es belle", "Arr√™te de dire n'importe quoi ! Mais... merci..."),
            ("Veux-tu √™tre mon amie ?", "On... on est d√©j√† amies ! Idiot ! *sourit secr√®tement*")
        ] * 20  # 200 exemples

        inputs = [conv[0] for conv in demo_conversations]
        outputs = [conv[1] for conv in demo_conversations]
        print(f"‚úÖ Dataset de d√©monstration cr√©√©: {len(inputs)} conversations")

    # Nettoyage simple
    clean_pairs = []
    for inp, out in zip(inputs, outputs):
        if inp and out and len(inp.strip()) > 0 and len(out.strip()) > 0:
            clean_pairs.append((inp.strip(), out.strip()))

    print(f"üìä Conversations valides: {len(clean_pairs)}")
    return clean_pairs

# Cr√©ation du tokenizer simplifi√©
def create_simple_tokenizer(conversations, vocab_size=8192, max_length=64):
    print("? Cr√©ation du tokenizer...")

    from tensorflow.keras.layers import TextVectorization

    # Extraction des textes
    all_texts = []
    for inp, out in conversations:
        all_texts.append(inp)
        all_texts.append("[START] " + out + " [END]")

    # Tokenizer optimis√©
    tokenizer = TextVectorization(
        max_tokens=vocab_size,
        output_sequence_length=max_length,
        standardize='lower_and_strip_punctuation',
        split='whitespace'
    )

    tokenizer.adapt(all_texts)

    # Pr√©paration des donn√©es
    inputs = [pair[0] for pair in conversations]
    outputs = ["[START] " + pair[1] + " [END]" for pair in conversations]

    input_ids = tokenizer(inputs)
    output_ids = tokenizer(outputs)

    # Teacher forcing
    decoder_input = output_ids[:, :-1]
    decoder_target = output_ids[:, 1:]

    print(f"‚úÖ Tokenizer cr√©√©: {tokenizer.vocabulary_size()} tokens, longueur {max_length}")
    return tokenizer, input_ids, decoder_input, decoder_target

# Configuration optimale
print("‚öôÔ∏è Configuration du mod√®le...")
vocab_size = 8192
max_length = 64
embed_dim = 256
num_heads = 8
ff_dim = 512
num_layers = 4
batch_size = min(32, max(8, int(available_memory * 4)))

print(f"üìä Param√®tres:")
print(f"   Vocab: {vocab_size} tokens")
print(f"   Longueur max: {max_length}")
print(f"   Dimensions: {embed_dim}")
print(f"   Couches: {num_layers}")
print(f"   Batch size: {batch_size}")

# Chargement des donn√©es
conversations = load_shirayuki_data('/Users/christopher/Documents/IA/ani/conversation_dataset_ShirayukiV3.csv')
conversation_pairs = conversations  # Variable pour compatibilit√©

# Cr√©ation du tokenizer et des donn√©es
tokenizer, input_ids, decoder_input, decoder_target = create_simple_tokenizer(
    conversations, vocab_size, max_length
)

# Cr√©ation du dataset
print("?üì¶ Cr√©ation du dataset...")
dataset = tf.data.Dataset.from_tensor_slices({
    'encoder_input': input_ids,
    'decoder_input': decoder_input,
    'decoder_target': decoder_target
})

def prepare_batch(batch):
    return ((batch['encoder_input'], batch['decoder_input']), batch['decoder_target'])

dataset = (dataset
    .map(prepare_batch, num_parallel_calls=AUTOTUNE)
    .shuffle(1000)
    .batch(batch_size)
    .prefetch(AUTOTUNE))

# Cr√©ation du mod√®le
print("üå∏ Cr√©ation du mod√®le Shirayuki...")
model = ShirayukiTransformer(
    vocab_size=tokenizer.vocabulary_size(),
    embed_dim=embed_dim,
    num_heads=num_heads,
    ff_dim=ff_dim,
    maxlen=max_length,
    num_layers=num_layers
)

# Compilation optimis√©e
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

# Test du mod√®le
print("üß™ Test du mod√®le...")
try:
    test_batch = next(iter(dataset.take(1)))
    print(f"   Taille du batch de test: {test_batch[0][0].shape}")
    output = model(test_batch[0])
    print(f"‚úÖ Test r√©ussi! Shape de sortie: {output.shape}")
    print(f"üìä Param√®tres du mod√®le: {model.count_params():,}")
except Exception as e:
    print(f"‚ùå Erreur de test: {e}")

print("\nüéâ MOD√àLE SHIRAYUKI PR√äT!")
print("üöÄ Ex√©cutez la cellule suivante pour l'entra√Ænement")

üì¶ Cr√©ation du dataset...
üå∏ Cr√©ation du mod√®le Shirayuki...
üß™ Test du mod√®le...
   Taille du batch de test: (32, 64)
   Entr√©es: [[  35   32    2 ...    0    0    0]
 [ 235   85  191 ...    0    0    0]
 [2500    0    0 ...    0    0    0]
 ...
 [ 596  123    0 ...    0    0    0]
 [   5  142   64 ...    0    0    0]
 [  38   10    6 ...    0    0    0]]
   Cibles: [[1114  797   68 ...    0    0    0]
 [  33   76   21 ...    0    0    0]
 [  32    2   13 ...    0    0    0]
 ...
 [  19    8  837 ...    0    0    0]
 [3700    8   17 ...    0    0    0]
 [  82  448   33 ...    0    0    0]]
‚ùå Erreur de test: Exception encountered when calling ShirayukiTransformer.call().

[1mcannot compute Mul as input #1(zero-based) was expected to be a half tensor but is a float tensor [Op:Mul] name: [0m

Arguments received by ShirayukiTransformer.call():
  ‚Ä¢ inputs=('tf.Tensor(shape=(32, 64), dtype=int64)', 'tf.Tensor(shape=(32, 63), dtype=int64)')
  ‚Ä¢ training=False

üéâ MOD√àLE

1. The `call()` method of your layer may be crashing. Try to `__call__()` the layer eagerly on some test input first to see if it works. E.g. `x = np.random.random((3, 4)); y = layer(x)`
2. If the `call()` method is correct, then you may need to implement the `def build(self, input_shape)` method on your layer. It should create all variables used by the layer (e.g. by calling `layer.build()` on all its children layers).
Exception encountered: ''Input 'y' of 'Mul' Op has type float32 that does not match type float16 of argument 'x'.''


In [None]:
# ? ENTRA√éNEMENT SHIRAYUKI ULTRA-OPTIMIS√â (CORRIG√â)
print("üî• D√©marrage de l'entra√Ænement avec utilisation maximale des ressources!")
print("=" * 70)

# Configuration d'entra√Ænement
epochs = 15
steps_per_epoch = len(conversation_pairs) // batch_size

print(f"üìä Configuration:")
print(f"   Epochs: {epochs}")
print(f"   Batch size: {batch_size}")
print(f"   Steps par epoch: {steps_per_epoch}")
print(f"   CPU threads: {cpu_count}")
print(f"   M√©moire utilis√©e: {int(available_memory * 0.8)} GB")
print(f"   Dataset: {len(conversation_pairs)} conversations")

# Callbacks optimis√©s
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='loss',
        patience=3,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='loss',
        factor=0.5,
        patience=2,
        min_lr=1e-6,
        verbose=1
    ),
    tf.keras.callbacks.LambdaCallback(
        on_epoch_end=lambda epoch, logs: print(f"üå∏ Epoch {epoch+1}/{epochs} - Loss: {logs['loss']:.4f} - Accuracy: {logs['accuracy']:.4f}")
    )
]

print("\nüöÄ Lancement de l'entra√Ænement...")
print("üí° Utilisation de teacher forcing pour un apprentissage optimal")

try:
    # Entra√Ænement avec gestion d'erreurs
    history = model.fit(
        dataset,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        callbacks=callbacks,
        verbose=1
    )

    print(f"\nüéâ ENTRA√éNEMENT TERMIN√â!")
    print(f"üìà Loss finale: {history.history['loss'][-1]:.4f}")
    print(f"üìà Accuracy finale: {history.history['accuracy'][-1]:.4f}")

    # Test de g√©n√©ration simple
    print("\nüß™ Test de g√©n√©ration:")
    test_input = "Bonjour Shirayuki"
    test_tokens = tokenizer([test_input])
    print(f"Input: {test_input}")
    print("Shirayuki va r√©pondre...")

except Exception as e:
    print(f"‚ùå Erreur d'entra√Ænement: {e}")
    print("? Tentative avec param√®tres r√©duits...")

    # Fallback avec param√®tres r√©duits
    try:
        smaller_dataset = dataset.take(min(100, steps_per_epoch))
        history = model.fit(
            smaller_dataset,
            epochs=min(5, epochs),
            verbose=1
        )
        print("‚úÖ Entra√Ænement de secours r√©ussi!")
    except Exception as e2:
        print(f"‚ùå Erreur critique: {e2}")

print("\nüå∏ Mod√®le Shirayuki pr√™t pour la conversation!")

üî• D√©marrage de l'entra√Ænement avec utilisation maximale des ressources!
üìä Configuration:
   Epochs: 15
   Batch size: 32


NameError: name 'conversation_pairs' is not defined

In [None]:
# ENTRA√éNEMENT ULTRA-OPTIMIS√â AVEC MONITORING TEMPS R√âEL
print("üöÄ Configuration avanc√©e pour utilisation maximale des ressources")

# G√©n√©rateur Shirayuki ultra-optimis√©
class UltraShirayukiGenerator:
    """G√©n√©rateur ultra-optimis√© pour conversations tsundere avec monitoring"""

    def __init__(self, model, tokenizer, max_length=128):
        self.model = model
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.generation_cache = {}  # Cache pour optimiser les g√©n√©rations r√©p√©t√©es

    @tf.function(jit_compile=True)
    def _generate_step(self, input_ids, temperature, top_k, top_p):
        """√âtape de g√©n√©ration compil√©e avec XLA"""
        encoder_output = self.model.encode(input_ids, training=False)

        # D√©but de s√©quence
        start_token = self.tokenizer(['[START]'])[0, 0:1]
        decoder_input = start_token

        generated_ids = []

        for _ in tf.range(self.max_length - 1):
            # Pr√©diction du token suivant
            logits = self.model.decode(decoder_input, encoder_output, training=False)
            logits = self.model.output_layer(logits)

            # Sampling optimis√©
            next_token = self._sample_token(logits[:, -1, :], temperature, top_k, top_p)

            # Ajout du token g√©n√©r√©
            generated_ids.append(next_token)
            decoder_input = tf.concat([decoder_input, next_token], axis=1)

            # Arr√™t si token de fin
            if tf.reduce_any(tf.equal(next_token, self.tokenizer(['[END]'])[0, 0])):
                break

        return tf.concat(generated_ids, axis=1)

    @tf.function(jit_compile=True)
    def _sample_token(self, logits, temperature, top_k, top_p):
        """Sampling optimis√© avec nucleus sampling"""
        logits = logits / temperature

        # Top-k filtering
        if top_k > 0:
            top_k_logits, top_k_indices = tf.nn.top_k(logits, k=top_k)
            logits = tf.where(
                tf.reduce_any(tf.equal(tf.expand_dims(tf.range(tf.shape(logits)[-1]), 0),
                                      tf.expand_dims(top_k_indices, -1)), axis=1),
                logits,
                tf.fill(tf.shape(logits), -1e9)
            )

        # Top-p (nucleus) filtering
        if top_p < 1.0:
            sorted_logits = tf.sort(logits, direction='DESCENDING')
            sorted_probs = tf.nn.softmax(sorted_logits)
            cumulative_probs = tf.cumsum(sorted_probs, axis=-1)

            # Masque pour les tokens √† garder
            keep_mask = cumulative_probs <= top_p
            keep_mask = tf.concat([tf.ones_like(keep_mask[:, :1]), keep_mask[:, :-1]], axis=-1)

            # Application du masque
            filtered_logits = tf.where(keep_mask, sorted_logits, -1e9)
            logits = tf.gather(filtered_logits, tf.argsort(tf.argsort(logits, direction='DESCENDING')),
                              batch_dims=1)

        # √âchantillonnage
        probs = tf.nn.softmax(logits)
        sampled_id = tf.random.categorical(tf.math.log(probs), 1)

        return sampled_id

    def generate_response(self, prompt, max_length=50, temperature=0.8, top_k=40, top_p=0.9):
        """G√©n√®re une r√©ponse optimis√©e avec cache"""

        # V√©rification du cache
        cache_key = f"{prompt}_{temperature}_{top_k}_{top_p}"
        if cache_key in self.generation_cache:
            return self.generation_cache[cache_key]

        # Tokenisation
        input_ids = self.tokenizer([prompt])

        # G√©n√©ration avec monitoring
        start_time = tf.timestamp()
        generated_ids = self._generate_step(input_ids, temperature, top_k, top_p)
        generation_time = tf.timestamp() - start_time

        # D√©tokenisation
        try:
            # Conversion s√©curis√©e
            generated_text = self.tokenizer.get_vocabulary()[generated_ids[0, 0].numpy()]

            # Reconstruction du texte
            vocab = self.tokenizer.get_vocabulary()
            tokens = []
            for token_id in generated_ids[0]:
                if token_id.numpy() < len(vocab):
                    token = vocab[token_id.numpy()]
                    if token not in ['[START]', '[END]', '']:
                        tokens.append(token)

            response = ' '.join(tokens)

            # Nettoyage et post-traitement
            response = response.replace('[UNK]', '').strip()

            # Cache du r√©sultat
            self.generation_cache[cache_key] = response

            # Logging des performances
            print(f"‚ö° G√©n√©ration: {float(generation_time):.3f}s - {len(tokens)} tokens")

            return response

        except Exception as e:
            print(f"‚ö†Ô∏è Erreur de g√©n√©ration: {e}")
            return "Je... je ne sais pas quoi dire... *rougit*"


# Cr√©ation du g√©n√©rateur ultra-optimis√©
print("üå∏ Cr√©ation du g√©n√©rateur Shirayuki ultra-optimis√©...")
generator = UltraShirayukiGenerator(model, tokenizer, max_length)

# Classe de monitoring avanc√© des performances
class UltraPerformanceMonitor:
    """Monitoring ultra-avanc√© des performances syst√®me"""

    def __init__(self):
        self.metrics = {
            'cpu_usage': [],
            'memory_usage': [],
            'gpu_usage': [],
            'training_speed': [],
            'generation_speed': [],
            'model_throughput': []
        }
        self.monitoring_active = False

    def start_monitoring(self):
        """D√©marre le monitoring en temps r√©el"""
        self.monitoring_active = True
        self.monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True)
        self.monitor_thread.start()
        print("üìä Monitoring ultra-performance d√©marr√©")

    def _monitor_loop(self):
        """Boucle de monitoring optimis√©e"""
        import time
        while self.monitoring_active:
            # CPU et m√©moire
            cpu_percent = psutil.cpu_percent(interval=0.1, percpu=False)
            memory_info = psutil.virtual_memory()

            self.metrics['cpu_usage'].append(cpu_percent)
            self.metrics['memory_usage'].append(memory_info.percent)

            # GPU (si disponible)
            try:
                import pynvml
                pynvml.nvmlInit()
                if pynvml.nvmlDeviceGetCount() > 0:
                    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
                    gpu_util = pynvml.nvmlDeviceGetUtilizationRates(handle)
                    memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)

                    self.metrics['gpu_usage'].append({
                        'utilization': gpu_util.gpu,
                        'memory_used': memory_info.used / memory_info.total * 100
                    })
            except:
                pass

            time.sleep(1)

    def get_performance_summary(self):
        """R√©sum√© des performances"""
        if not self.metrics['cpu_usage']:
            return "Monitoring non d√©marr√©"

        recent_cpu = self.metrics['cpu_usage'][-60:]  # Derni√®re minute
        recent_memory = self.metrics['memory_usage'][-60:]

        avg_cpu = sum(recent_cpu) / len(recent_cpu)
        avg_memory = sum(recent_memory) / len(recent_memory)
        max_cpu = max(recent_cpu)
        max_memory = max(recent_memory)

        summary = f"""
üìä PERFORMANCES TEMPS R√âEL:
   CPU moyen: {avg_cpu:.1f}% (max: {max_cpu:.1f}%)
   RAM moyenne: {avg_memory:.1f}% (max: {max_memory:.1f}%)
   Utilisation cible: 95-100% pour performance maximale
        """

        if self.metrics['gpu_usage']:
            recent_gpu = self.metrics['gpu_usage'][-60:]
            avg_gpu_util = sum(g['utilization'] for g in recent_gpu) / len(recent_gpu)
            avg_gpu_mem = sum(g['memory_used'] for g in recent_gpu) / len(recent_gpu)
            summary += f"   GPU utilisation: {avg_gpu_util:.1f}%\n"
            summary += f"   GPU m√©moire: {avg_gpu_mem:.1f}%"

        return summary

# Initialisation du monitoring ultra-performance
ultra_monitor = UltraPerformanceMonitor()
ultra_monitor.start_monitoring()

# Callback ultra-optimis√© avec monitoring temps r√©el
class UltraOptimizedCallback(tf.keras.callbacks.Callback):
    """Callback ultra-optimis√© pour performance maximale"""

    def __init__(self, monitor_interval=10):
        super().__init__()
        self.monitor_interval = monitor_interval
        self.batch_times = []
        self.epoch_start_time = None

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_start_time = tf.timestamp()
        print(f"\nüöÄ Epoch {epoch + 1} - Optimisation maximale activ√©e")
        print(ultra_monitor.get_performance_summary())

        # Optimisation dynamique du garbage collector
        import gc
        gc.collect()

        # Force la compilation XLA si pas encore fait
        if epoch == 0:
            print("‚ö° Compilation XLA en cours...")

    def on_batch_begin(self, batch, logs=None):
        self.batch_start_time = tf.timestamp()

    def on_batch_end(self, batch, logs=None):
        batch_time = float(tf.timestamp() - self.batch_start_time)
        self.batch_times.append(batch_time)

        if batch % self.monitor_interval == 0:
            avg_batch_time = sum(self.batch_times[-10:]) / min(len(self.batch_times), 10)
            throughput = 1.0 / avg_batch_time if avg_batch_time > 0 else 0

            print(f"   üìà Batch {batch}: {avg_batch_time:.3f}s/batch, "
                  f"Throughput: {throughput:.1f} batch/s")

            # Affichage p√©riodique des m√©triques
            if batch % 50 == 0:
                current_stats = resource_monitor.get_current_stats()
                print(f"   üî• Ressources: {current_stats}")

    def on_epoch_end(self, epoch, logs=None):
        epoch_time = float(tf.timestamp() - self.epoch_start_time)
        avg_batch_time = sum(self.batch_times) / len(self.batch_times) if self.batch_times else 0

        print(f"‚úÖ Epoch {epoch + 1} termin√©e en {epoch_time:.1f}s")
        print(f"üìä Temps moyen par batch: {avg_batch_time:.3f}s")
        print(f"üéØ Loss: {logs.get('loss', 0):.4f}, Accuracy: {logs.get('accuracy', 0):.4f}")

        self.batch_times = []  # Reset pour la prochaine epoch

# Configuration ultra-optimis√©e de l'entra√Ænement
ultra_callback = UltraOptimizedCallback(monitor_interval=10)

# Calcul du batch size optimal dynamique
def calculate_optimal_batch_size():
    """Calcule le batch size optimal selon les ressources disponibles"""
    base_batch_size = 16

    # Adaptation selon la m√©moire disponible
    memory_factor = min(4, available_memory / 4)  # Max 4x pour 16GB+

    # Adaptation selon le nombre de CPU
    cpu_factor = min(2, cpu_count / 8)  # Max 2x pour 8+ threads

    # Adaptation selon le GPU
    gpu_factor = 1.5 if gpus else 1.0

    optimal_size = int(base_batch_size * memory_factor * cpu_factor * gpu_factor)
    optimal_size = min(64, max(8, optimal_size))  # Entre 8 et 64

    print(f"üìä Batch size optimal calcul√©: {optimal_size}")
    print(f"   Facteurs: m√©moire={memory_factor:.1f}, CPU={cpu_factor:.1f}, GPU={gpu_factor:.1f}")

    return optimal_size

# Calcul et application du batch size optimal
optimal_batch_size = calculate_optimal_batch_size()

# Recr√©ation du dataset avec le batch size optimal
print("‚ö° Reconfiguration du dataset avec batch size optimal...")

# Fonction de pr√©traitement ultra-optimis√©e
@tf.function(jit_compile=True)
def preprocess_batch_ultra(batch):
    """Pr√©traitement ultra-optimis√© des batches"""
    encoder_input = batch['encoder_input']
    decoder_input = batch['decoder_input']
    decoder_target = batch['decoder_target']

    # Optimisations sur les tenseurs
    encoder_input = tf.cast(encoder_input, tf.int32)
    decoder_input = tf.cast(decoder_input, tf.int32)
    decoder_target = tf.cast(decoder_target, tf.int32)

    return (encoder_input, decoder_input), decoder_target

# Dataset ultra-optimis√© avec nouveau batch size
optimized_dataset = tf.data.Dataset.from_tensor_slices({
    'encoder_input': input_ids,
    'decoder_input': decoder_input_ids,
    'decoder_target': decoder_target_ids
})

# Application de toutes les optimisations maximales
optimized_dataset = (optimized_dataset
    .with_options(dataset_options)
    .map(preprocess_batch_ultra, num_parallel_calls=AUTOTUNE)
    .cache()
    .shuffle(4096, reshuffle_each_iteration=True)
    .batch(optimal_batch_size, drop_remainder=True, num_parallel_calls=AUTOTUNE)
    .prefetch(AUTOTUNE)
    .repeat())  # R√©p√©tition pour √©viter les fins d'epoch

# Configuration avanc√©e des callbacks
ultra_callbacks = [
    ultra_callback,
    performance_callback,
    tf.keras.callbacks.EarlyStopping(
        monitor="loss",
        patience=8,
        restore_best_weights=True,
        verbose=1,
        min_delta=1e-5
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="loss",
        factor=0.7,
        patience=4,
        min_lr=1e-8,
        verbose=1,
        cooldown=1
    ),
    tf.keras.callbacks.ModelCheckpoint(
        filepath='ultra_shirayuki_{epoch:02d}_{loss:.4f}.keras',
        monitor='loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1,
        save_freq='epoch'
    ),
    tf.keras.callbacks.TensorBoard(
        log_dir='./logs/ultra_shirayuki',
        histogram_freq=1,
        write_graph=True,
        update_freq=100,
        profile_batch=(100, 120)
    )
]

print("? D√âMARRAGE DE L'ENTRA√éNEMENT ULTRA-OPTIMIS√â")
print("=" * 60)
print(f"‚ö° Configuration finale:")
print(f"   Batch size optimal: {optimal_batch_size}")
print(f"   CPU threads: {cpu_count}")
print(f"   M√©moire allou√©e: {int(available_memory * 0.8)} GB")
print(f"   GPU disponibles: {len(gpus)}")
print(f"   Strat√©gie: {type(strategy).__name__}")
print("=" * 60)

# Calcul du nombre de steps optimal
steps_per_epoch = len(conversation_pairs) // optimal_batch_size
total_epochs = 30  # Plus d'epochs gr√¢ce aux optimisations

print(f"üìä Steps par epoch: {steps_per_epoch}")
print(f"üìä Total epochs: {total_epochs}")

# LANCEMENT DE L'ENTRA√éNEMENT ULTRA-OPTIMIS√â
with strategy.scope():
    history = model.fit(
        optimized_dataset,
        epochs=total_epochs,
        steps_per_epoch=steps_per_epoch,
        callbacks=ultra_callbacks,
        verbose=1,
        workers=cpu_count,
        use_multiprocessing=True,
        max_queue_size=cpu_count * 3
    )

# Arr√™t du monitoring
ultra_monitor.monitoring_active = False
resource_monitor.stop_monitoring()

print("\nüéâ ENTRA√éNEMENT ULTRA-OPTIMIS√â TERMIN√â!")
print("=" * 60)
print(ultra_monitor.get_performance_summary())
print("‚úÖ Mod√®le Shirayuki ultra-optimis√© pr√™t pour g√©n√©ration maximale!")

# Test de performance du g√©n√©rateur
print("\nüß™ Test de performance du g√©n√©rateur...")
test_prompts = [
    "Bonjour Shirayuki",
    "Tu es vraiment mignonne",
    "Comment √ßa va ?",
    "Je t'aime"
]

for prompt in test_prompts:
    response = generator.generate_response(prompt, temperature=0.8)
    print(f"üå∏ {prompt} -> {response}")

print("\nüöÄ SYST√àME OPTIMIS√â √Ä 100% POUR PERFORMANCES MAXIMALES!")

In [None]:
# VERSION ULTRA-OPTIMIS√âE POUR CONVERSATIONS SHIRAYUKI

import tensorflow as tf
from tensorflow.keras.layers import Dense, LayerNormalization, Dropout, Embedding, Conv1D
import numpy as np

# Configuration pour optimisation maximale
tf.config.optimizer.set_jit(True)  # Active XLA JIT
tf.keras.mixed_precision.set_global_policy('mixed_float16')  # Mixed precision

# Configuration m√©moire GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


class FlashAttentionLike(tf.keras.layers.Layer):
    """Impl√©mentation d'une attention optimis√©e inspir√©e de Flash Attention"""

    def __init__(self, embed_dim, num_heads, dropout=0.1, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        self.scale = self.head_dim ** -0.5
        self.dropout = dropout

        # Projection matrices optimis√©es (sans bias)
        self.q_proj = Dense(embed_dim, use_bias=False, name='q_proj')
        self.k_proj = Dense(embed_dim, use_bias=False, name='k_proj')
        self.v_proj = Dense(embed_dim, use_bias=False, name='v_proj')
        self.out_proj = Dense(embed_dim, use_bias=False, name='out_proj')

        self.dropout_layer = Dropout(dropout)

    def call(self, x, mask=None, training=False):
        batch_size, seq_len = tf.shape(x)[0], tf.shape(x)[1]

        # Projections
        q = self.q_proj(x)
        k = self.k_proj(x)
        v = self.v_proj(x)

        # Reshape pour multi-head
        q = tf.reshape(q, (batch_size, seq_len, self.num_heads, self.head_dim))
        k = tf.reshape(k, (batch_size, seq_len, self.num_heads, self.head_dim))
        v = tf.reshape(v, (batch_size, seq_len, self.num_heads, self.head_dim))

        # Transpose pour dimensions (batch, heads, seq, head_dim)
        q = tf.transpose(q, [0, 2, 1, 3])
        k = tf.transpose(k, [0, 2, 1, 3])
        v = tf.transpose(v, [0, 2, 1, 3])

        # Attention optimis√©e
        attention_scores = tf.matmul(q, k, transpose_b=True) * self.scale

        # Application du masque
        if mask is not None:
            mask = tf.cast(mask, dtype=attention_scores.dtype)
            attention_scores += (mask * -1e9)

        attention_weights = tf.nn.softmax(attention_scores, axis=-1)
        attention_weights = self.dropout_layer(attention_weights, training=training)

        # Application √† V
        attention_output = tf.matmul(attention_weights, v)

        # Reshape pour output
        attention_output = tf.transpose(attention_output, [0, 2, 1, 3])
        attention_output = tf.reshape(attention_output, (batch_size, seq_len, self.embed_dim))

        return self.out_proj(attention_output)


class UltraOptimizedConversationTransformer(tf.keras.Model):
    """Version ultra-optimis√©e du transformer pour conversations"""

    def __init__(self, vocab_size, embed_dim=768, num_heads=12, ff_dim=3072,
                 maxlen=128, num_layers=8, rate=0.1):
        super().__init__()
        self.maxlen = maxlen
        self.embed_dim = embed_dim
        self.vocab_size = vocab_size

        # Embedding avec initialisation optimis√©e
        self.embedding = Embedding(
            input_dim=vocab_size,
            output_dim=embed_dim,
            embeddings_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
            mask_zero=True
        )

        # Position encoding learnable (plus flexible que RoPE pour ce cas)
        self.pos_embedding = Embedding(maxlen, embed_dim)

        # Encoder layers avec Flash Attention
        self.encoder_layers = [
            self._create_ultra_layer(embed_dim, num_heads, ff_dim, rate)
            for _ in range(num_layers)
        ]

        # Decoder layers
        self.decoder_layers = [
            self._create_ultra_layer(embed_dim, num_heads, ff_dim, rate)
            for _ in range(num_layers)
        ]

        # Normalisation finale optimis√©e
        self.final_norm = RMSNormalization(epsilon=1e-6)

        # Output projection avec weight tying
        self.output_layer = Dense(vocab_size, use_bias=False, dtype='float32')

        # Dropout globaux
        self.encoder_dropout = Dropout(rate)
        self.decoder_dropout = Dropout(rate)

    def _create_ultra_layer(self, embed_dim, num_heads, ff_dim, rate):
        """Cr√©e une couche transformer ultra-optimis√©e"""
        return tf.keras.Sequential([
            RMSNormalization(epsilon=1e-6),
            FlashAttentionLike(embed_dim, num_heads, rate),
            RMSNormalization(epsilon=1e-6),
            Dense(ff_dim * 2, use_bias=False, activation=None),
            tf.keras.layers.Lambda(lambda x: self._swiglu(x)),
            Dense(embed_dim, use_bias=False),
            Dropout(rate)
        ])

    def _swiglu(self, x):
        """SwiGLU activation optimis√©e"""
        gate, hidden = tf.split(x, 2, axis=-1)
        return tf.nn.swish(gate) * hidden

    def call(self, inputs, training=False):
        if isinstance(inputs, tuple):
            input_ids, target_ids = inputs
        else:
            input_ids = inputs
            target_ids = None

        # Encoder
        encoder_output = self.encode(input_ids, training=training)

        # Decoder
        if target_ids is not None:
            decoder_output = self.decode(target_ids, encoder_output, training=training)
            return self.output_layer(decoder_output)
        else:
            return encoder_output

    def encode(self, input_ids, training=False):
        """Encoder ultra-optimis√©"""
        seq_len = tf.shape(input_ids)[1]

        # Embedding + position
        x = self.embedding(input_ids)
        x *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))

        positions = tf.range(seq_len)[None, :]
        x += self.pos_embedding(positions)
        x = self.encoder_dropout(x, training=training)

        # Masque de padding
        mask = tf.cast(tf.equal(input_ids, 0), tf.float32)
        attention_mask = mask[:, None, None, :] * -1e9

        # Passage dans les couches avec gradient checkpointing
        for i, layer in enumerate(self.encoder_layers):
            if training and i > 0:  # Gradient checkpointing
                x = tf.recompute_grad(lambda inputs: layer(inputs, training=training))(x)
            else:
                x = layer(x, training=training)

        return x

    def decode(self, target_ids, encoder_output, training=False):
        """Decoder ultra-optimis√© avec masques causaux"""
        seq_len = tf.shape(target_ids)[1]

        # Embedding + position
        x = self.embedding(target_ids)
        x *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))

        positions = tf.range(seq_len)[None, :]
        x += self.pos_embedding(positions)
        x = self.decoder_dropout(x, training=training)

        # Masques causaux et de padding
        causal_mask = tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
        padding_mask = tf.cast(tf.not_equal(target_ids, 0), tf.float32)

        combined_mask = tf.minimum(
            causal_mask[None, None, :, :],
            padding_mask[:, None, None, :]
        )
        attention_mask = (1.0 - combined_mask) * -1e9

        # Passage dans les couches decoder
        for i, layer in enumerate(self.decoder_layers):
            if training and i > 0:
                x = tf.recompute_grad(lambda inputs: layer(inputs, training=training))(x)
            else:
                x = layer(x, training=training)

        x = self.final_norm(x)
        return x


# Optimisations sp√©cifiques pour les conversations
class ConversationOptimizer:
    """Optimisations sp√©cialis√©es pour les mod√®les conversationnels"""

    @staticmethod
    def create_conversation_dataset(conversation_pairs, tokenizer, batch_size=8, augment=True):
        """Cr√©e un dataset optimis√© pour l'entra√Ænement conversationnel"""

        # Pr√©paration des donn√©es avec augmentation
        if augment:
            conversation_pairs = ConversationOptimizer._augment_conversations(conversation_pairs)

        # Tokenisation
        inputs = [pair[0] for pair in conversation_pairs]
        outputs = ["[START] " + pair[1] + " [END]" for pair in conversation_pairs]

        input_ids = tokenizer(inputs)
        output_ids = tokenizer(outputs)

        # Teacher forcing setup
        decoder_input_ids = output_ids[:, :-1]
        decoder_target_ids = output_ids[:, 1:]

        # Dataset avec optimisations m√©moire
        dataset = tf.data.Dataset.from_tensor_slices({
            'encoder_input': input_ids,
            'decoder_input': decoder_input_ids,
            'decoder_target': decoder_target_ids
        })

        def prepare_batch(batch):
            return (
                (batch['encoder_input'], batch['decoder_input']),
                batch['decoder_target']
            )

        return (dataset
                .map(prepare_batch, num_parallel_calls=tf.data.AUTOTUNE)
                .cache()
                .shuffle(1024, reshuffle_each_iteration=True)
                .batch(batch_size, drop_remainder=True)
                .prefetch(tf.data.AUTOTUNE))

    @staticmethod
    def _augment_conversations(conversation_pairs):
        """Augmente le dataset avec des variations"""
        augmented = list(conversation_pairs)

        # Synonymes simples pour augmentation
        synonyms = {
            'love': ['adore', 'care for', 'cherish'],
            'great': ['amazing', 'wonderful', 'fantastic'],
            'cute': ['adorable', 'sweet', 'lovely']
        }

        for inp, out in conversation_pairs[:len(conversation_pairs)//3]:  # Augmente 1/3 des donn√©es
            # Remplacement de synonymes
            for word, syns in synonyms.items():
                if word in inp.lower():
                    for syn in syns[:1]:  # Une seule variation par mot
                        new_inp = inp.lower().replace(word, syn)
                        augmented.append((new_inp, out))

        return augmented


# Fonction de comparaison des mod√®les
def compare_conversation_models():
    """Compare les diff√©rentes versions du mod√®le de conversation"""

    print("üîÑ Comparaison des mod√®les de conversation")
    print("=" * 50)

    # Mod√®le de base (d√©j√† cr√©√©)
    print(f"üìä Mod√®le de base:")
    print(f"   Param√®tres: {model.count_params():,}")
    print(f"   Embed dim: {embed_dim}")
    print(f"   Layers: {num_layers}")

    # Mod√®le ultra-optimis√©
    ultra_model = UltraOptimizedConversationTransformer(
        vocab_size=tokenizer.vocabulary_size(),
        embed_dim=768,
        num_heads=12,
        ff_dim=3072,
        maxlen=max_length,
        num_layers=8,
        rate=0.1
    )

    print(f"\nüìä Mod√®le ultra-optimis√©:")
    print(f"   Param√®tres: {ultra_model.count_params():,}")
    print(f"   Embed dim: 768")
    print(f"   Layers: 8")

    print(f"\n‚ú® Optimisations ultra appliqu√©es:")
    print(f"   ‚úì Flash Attention optimis√©e")
    print(f"   ‚úì SwiGLU activation")
    print(f"   ‚úì RMSNormalization")
    print(f"   ‚úì Gradient checkpointing")
    print(f"   ‚úì Mixed precision training")
    print(f"   ‚úì Optimisations m√©moire GPU")

    return ultra_model


print("Version ultra-optimis√©e charg√©e pour les conversations Shirayuki!")
print("Utilisez compare_conversation_models() pour voir les diff√©rences.")

In [None]:
# √âVALUATION DES PERFORMANCES CONVERSATIONNELLES
import matplotlib.pyplot as plt
import seaborn as sns

print("üìà Analyse des performances du mod√®le Shirayuki")

# Graphiques de l'entra√Ænement
plt.figure(figsize=(15, 10))

# Loss
plt.subplot(2, 3, 1)
plt.plot(history.history['loss'], label='Train Loss', color='#FF6B9D')
plt.plot(history.history['val_loss'], label='Val Loss', color='#4ECDC4')
plt.title('Loss Evolution - Shirayuki Model')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

# Accuracy
plt.subplot(2, 3, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy', color='#FF6B9D')
plt.plot(history.history['val_accuracy'], label='Val Accuracy', color='#4ECDC4')
plt.title('Accuracy Evolution')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

# Learning Rate
plt.subplot(2, 3, 3)
lr_values = [warmup_schedule(step) for step in range(0, total_steps, total_steps // epochs)]
plt.plot(lr_values, color='#FFE66D')
plt.title('Learning Rate Schedule')
plt.xlabel('Epoch')
plt.ylabel('Learning Rate')
plt.grid(True, alpha=0.3)

# Test de g√©n√©ration avec m√©triques
plt.subplot(2, 3, 4)
test_prompts = [
    "Bonjour Shirayuki",
    "Comment √ßa va ?",
    "Tu es mignonne",
    "Je t'aime",
    "Qu'est-ce que tu fais ?"
]

response_lengths = []
response_qualities = []

for prompt in test_prompts:
    response = generator.generate_response(prompt, max_length=50)
    response_lengths.append(len(response.split()))
    # Score de qualit√© basique (diversit√© des mots)
    unique_words = len(set(response.lower().split()))
    total_words = len(response.split())
    quality_score = unique_words / max(total_words, 1)
    response_qualities.append(quality_score)

plt.bar(range(len(test_prompts)), response_lengths, color='#FF6B9D', alpha=0.7)
plt.title('Longueur des R√©ponses G√©n√©r√©es')
plt.xlabel('Prompt Test')
plt.ylabel('Nombre de Mots')
plt.xticks(range(len(test_prompts)), [f"Test {i+1}" for i in range(len(test_prompts))])

# Qualit√© des r√©ponses
plt.subplot(2, 3, 5)
plt.bar(range(len(test_prompts)), response_qualities, color='#4ECDC4', alpha=0.7)
plt.title('Diversit√© Lexicale des R√©ponses')
plt.xlabel('Prompt Test')
plt.ylabel('Score de Diversit√©')
plt.xticks(range(len(test_prompts)), [f"Test {i+1}" for i in range(len(test_prompts))])

# Statistiques du mod√®le
plt.subplot(2, 3, 6)
model_stats = {
    'Param√®tres': model.count_params(),
    'Vocab Size': tokenizer.vocabulary_size(),
    'Max Length': max_length,
    'Embed Dim': embed_dim,
    'Layers': num_layers,
    'Heads': num_heads
}

plt.barh(list(model_stats.keys()), list(model_stats.values()), color='#FFE66D')
plt.title('Statistiques du Mod√®le')
plt.xlabel('Valeur')

plt.tight_layout()
plt.show()

# M√©triques d√©taill√©es
print("\nüìä M√âTRIQUES D√âTAILL√âES")
print("=" * 50)
print(f"üèÜ Meilleure Val Loss: {min(history.history['val_loss']):.4f}")
print(f"üéØ Meilleure Val Accuracy: {max(history.history['val_accuracy']):.4f}")
print(f"üìà Am√©lioration totale: {(max(history.history['val_accuracy']) - min(history.history['val_accuracy'])):.4f}")

# Test de conversation tsundere
print(f"\nüí¨ TEST DE PERSONNALIT√â TSUNDERE")
print("=" * 50)

tsundere_tests = [
    "Tu es vraiment adorable",
    "Je pense √† toi tout le temps",
    "Tu me manques",
    "Tu veux √™tre mon amie ?",
    "Tu es la plus belle"
]

for i, test in enumerate(tsundere_tests, 1):
    response = generator.generate_response(test, max_length=30, temperature=0.8)
    print(f"Test {i}:")
    print(f"   Input: {test}")
    print(f"   Shirayuki: {response}")
    print()

# Sauvegarde des m√©triques
performance_data = {
    'final_val_loss': min(history.history['val_loss']),
    'final_val_accuracy': max(history.history['val_accuracy']),
    'model_params': model.count_params(),
    'training_epochs': len(history.history['loss']),
    'response_qualities': response_qualities,
    'response_lengths': response_lengths
}

print(f"‚úÖ √âvaluation termin√©e! Mod√®le Shirayuki optimis√© et test√©.")
print(f"üìÅ M√©triques sauvegard√©es pour analyse future.")

In [None]:
# CHAT INTERACTIF AVEC SHIRAYUKI
print("üí¨ Interface de Chat avec Shirayuki")
print("Tapez 'quit' pour arr√™ter la conversation")
print("=" * 50)

def shirayuki_chat_interface():
    """Interface de chat interactive avec personnalisation"""

    print("üå∏ Shirayuki: Bonjour ! Je suis Shirayuki... *rougit* Qu'est-ce que tu veux ?")

    conversation_history = []

    while True:
        try:
            # Input utilisateur
            user_input = input("\nüë§ Vous: ").strip()

            if user_input.lower() in ['quit', 'exit', 'bye', 'au revoir']:
                print("üå∏ Shirayuki: *d√©tourne le regard* C-ce n'est pas comme si j'allais te manquer ! √Ä bient√¥t...")
                break

            if not user_input:
                continue

            # G√©n√©ration de la r√©ponse
            print("üå∏ Shirayuki: ", end="", flush=True)

            # Diff√©rents modes de g√©n√©ration selon le contexte
            if any(word in user_input.lower() for word in ['love', 'aime', 'amour', 'cute', 'mignon']):
                # Mode tsundere intensifi√©
                response = generator.generate_response(
                    user_input,
                    max_length=40,
                    temperature=0.9,
                    top_p=0.85
                )
            elif any(word in user_input.lower() for word in ['triste', 'sad', 'probl√®me', 'mal']):
                # Mode plus doux
                response = generator.generate_response(
                    user_input,
                    max_length=35,
                    temperature=0.7,
                    top_p=0.9
                )
            else:
                # Mode normal
                response = generator.generate_response(
                    user_input,
                    max_length=30,
                    temperature=0.8,
                    top_p=0.87
                )

            print(response)

            # Sauvegarde de l'historique
            conversation_history.append({
                'user': user_input,
                'shirayuki': response,
                'timestamp': tf.timestamp()
            })

            # Suggestions de r√©ponses
            if len(conversation_history) % 3 == 0:
                suggestions = [
                    "Comment tu te sens ?",
                    "Raconte-moi ta journ√©e",
                    "Tu es tr√®s mignonne",
                    "Qu'est-ce que tu aimes faire ?"
                ]
                print(f"üí° Suggestions: {' | '.join(suggestions)}")

        except KeyboardInterrupt:
            print("\nüå∏ Shirayuki: *surprise* Tu pars d√©j√† ? Bon... √† bient√¥t alors...")
            break
        except Exception as e:
            print(f"‚ùå Erreur: {e}")
            print("üå∏ Shirayuki: *confuse* Je... je n'ai pas compris. Peux-tu r√©p√©ter ?")

    # R√©sum√© de la conversation
    if conversation_history:
        print(f"\nüìä R√©sum√© de la conversation:")
        print(f"   Messages √©chang√©s: {len(conversation_history)}")
        avg_length = sum(len(conv['shirayuki'].split()) for conv in conversation_history) / len(conversation_history)
        print(f"   Longueur moyenne des r√©ponses: {avg_length:.1f} mots")

        # Sauvegarde optionnelle
        save = input("\nüíæ Sauvegarder cette conversation ? (y/n): ").lower().startswith('y')
        if save:
            import json
            with open('shirayuki_conversation.json', 'w', encoding='utf-8') as f:
                json.dump(conversation_history, f, ensure_ascii=False, indent=2, default=str)
            print("‚úÖ Conversation sauvegard√©e dans 'shirayuki_conversation.json'")

# Lancement du chat
shirayuki_chat_interface()

In [None]:
# TESTS DE STRESS ET BENCHMARKS ULTRA-PERFORMANCE
import time
import threading
import concurrent.futures
import matplotlib.pyplot as plt
import numpy as np

print("üî• TESTS DE STRESS POUR VALIDATION DE L'UTILISATION MAXIMALE DES RESSOURCES")
print("=" * 80)

class UltraStressTester:
    """Testeur de stress ultra-avanc√© pour valider l'utilisation maximale des ressources"""

    def __init__(self, model, generator, tokenizer):
        self.model = model
        self.generator = generator
        self.tokenizer = tokenizer
        self.stress_results = {}

    def cpu_stress_test(self, duration=60):
        """Test de stress CPU avec g√©n√©ration massive"""
        print(f"üî• Test de stress CPU ({duration}s)...")

        start_time = time.time()
        generations_count = 0
        cpu_usage_samples = []

        def cpu_monitor():
            while time.time() - start_time < duration:
                cpu_usage_samples.append(psutil.cpu_percent(interval=0.5))

        # D√©marrage du monitoring CPU
        monitor_thread = threading.Thread(target=cpu_monitor, daemon=True)
        monitor_thread.start()

        # G√©n√©ration massive pour stresser le CPU
        test_prompts = [
            "Test de performance CPU",
            "Stress test maximum",
            "Utilisation optimale",
            "Performance benchmark",
            "Test de charge"
        ] * 10  # 50 prompts diff√©rents

        def generate_response(prompt):
            return self.generator.generate_response(f"{prompt} {time.time()}", max_length=30)

        # G√©n√©ration parall√®le massive
        with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count) as executor:
            while time.time() - start_time < duration:
                futures = [executor.submit(generate_response, prompt) for prompt in test_prompts]
                concurrent.futures.wait(futures, timeout=5)
                generations_count += len(futures)

        monitor_thread.join(timeout=1)

        avg_cpu = sum(cpu_usage_samples) / len(cpu_usage_samples) if cpu_usage_samples else 0
        max_cpu = max(cpu_usage_samples) if cpu_usage_samples else 0
        throughput = generations_count / duration

        self.stress_results['cpu_test'] = {
            'avg_cpu_usage': avg_cpu,
            'max_cpu_usage': max_cpu,
            'generations_per_second': throughput,
            'total_generations': generations_count
        }

        print(f"   ‚úÖ CPU moyen: {avg_cpu:.1f}% (max: {max_cpu:.1f}%)")
        print(f"   ‚ö° Throughput: {throughput:.1f} g√©n√©rations/sec")
        print(f"   üìä Total g√©n√©rations: {generations_count}")

        return self.stress_results['cpu_test']

    def memory_stress_test(self, batch_size_multiplier=4):
        """Test de stress m√©moire avec batch sizes √©normes"""
        print(f"üíæ Test de stress m√©moire (x{batch_size_multiplier} batch size)...")

        # Sauvegarde de la configuration actuelle
        original_batch_size = optimal_batch_size
        stress_batch_size = original_batch_size * batch_size_multiplier

        try:
            # Cr√©ation d'un dataset de stress avec batch size √©norme
            stress_dataset = tf.data.Dataset.from_tensor_slices({
                'encoder_input': input_ids[:stress_batch_size*10],  # Plus de donn√©es
                'decoder_input': decoder_input_ids[:stress_batch_size*10],
                'decoder_target': decoder_target_ids[:stress_batch_size*10]
            })

            stress_dataset = (stress_dataset
                .batch(stress_batch_size, drop_remainder=True)
                .prefetch(1))

            # Monitoring m√©moire
            memory_before = psutil.virtual_memory().percent
            gpu_memory_before = 0

            if gpus:
                try:
                    import pynvml
                    pynvml.nvmlInit()
                    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
                    gpu_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
                    gpu_memory_before = gpu_info.used / gpu_info.total * 100
                except:
                    pass

            # Test de forward pass avec batch √©norme
            start_time = time.time()
            memory_samples = []

            def memory_monitor():
                for _ in range(30):  # 30 secondes de monitoring
                    memory_samples.append(psutil.virtual_memory().percent)
                    time.sleep(1)

            monitor_thread = threading.Thread(target=memory_monitor, daemon=True)
            monitor_thread.start()

            # Ex√©cution du stress test
            batch_count = 0
            for batch in stress_dataset.take(5):  # 5 gros batches
                inputs, targets = batch
                with tf.GradientTape() as tape:
                    predictions = self.model((inputs[0], inputs[1]), training=True)
                    loss = tf.keras.losses.sparse_categorical_crossentropy(targets, predictions, from_logits=True)

                # Calcul des gradients pour stresser davantage
                gradients = tape.gradient(loss, self.model.trainable_variables)
                batch_count += 1

                print(f"   Batch {batch_count}: Shape {inputs[0].shape}, Loss {tf.reduce_mean(loss):.4f}")

            execution_time = time.time() - start_time
            monitor_thread.join(timeout=1)

            # Mesure finale de la m√©moire
            memory_after = psutil.virtual_memory().percent
            max_memory = max(memory_samples) if memory_samples else memory_after

            self.stress_results['memory_test'] = {
                'memory_before': memory_before,
                'memory_after': memory_after,
                'max_memory_usage': max_memory,
                'memory_increase': memory_after - memory_before,
                'stress_batch_size': stress_batch_size,
                'execution_time': execution_time,
                'batches_processed': batch_count
            }

            print(f"   ‚úÖ M√©moire avant: {memory_before:.1f}%")
            print(f"   üìà M√©moire max: {max_memory:.1f}%")
            print(f"   üíæ Augmentation: +{memory_after - memory_before:.1f}%")
            print(f"   ‚ö° Temps d'ex√©cution: {execution_time:.1f}s")

        except Exception as e:
            print(f"   ‚ö†Ô∏è Limite m√©moire atteinte: {e}")
            self.stress_results['memory_test'] = {'error': str(e), 'limit_reached': True}

        return self.stress_results.get('memory_test', {})

    def gpu_stress_test(self, duration=30):
        """Test de stress GPU avec calculs intensifs"""
        if not gpus:
            print("‚ö†Ô∏è Aucun GPU d√©tect√© - test ignor√©")
            return {}

        print(f"üéÆ Test de stress GPU ({duration}s)...")

        try:
            import pynvml
            pynvml.nvmlInit()
            handle = pynvml.nvmlDeviceGetHandleByIndex(0)

            # Monitoring GPU
            gpu_utils = []
            gpu_memory = []

            def gpu_monitor():
                start = time.time()
                while time.time() - start < duration:
                    try:
                        util = pynvml.nvmlDeviceGetUtilizationRates(handle)
                        mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
                        gpu_utils.append(util.gpu)
                        gpu_memory.append(mem_info.used / mem_info.total * 100)
                        time.sleep(0.5)
                    except:
                        break

            monitor_thread = threading.Thread(target=gpu_monitor, daemon=True)
            monitor_thread.start()

            # Stress test avec calculs intensifs
            start_time = time.time()
            operations_count = 0

            while time.time() - start_time < duration:
                # Cr√©ation de tenseurs volumineux pour stresser le GPU
                with tf.device('/GPU:0'):
                    large_tensor = tf.random.normal([1024, 1024, 256])
                    result = tf.linalg.matmul(large_tensor, large_tensor, transpose_b=True)

                    # Op√©rations sur le mod√®le
                    dummy_input = tf.random.uniform([16, 64], maxval=1000, dtype=tf.int32)
                    _ = self.model.encode(dummy_input, training=True)

                    operations_count += 1

            monitor_thread.join(timeout=1)

            avg_gpu_util = sum(gpu_utils) / len(gpu_utils) if gpu_utils else 0
            max_gpu_util = max(gpu_utils) if gpu_utils else 0
            avg_gpu_memory = sum(gpu_memory) / len(gpu_memory) if gpu_memory else 0
            max_gpu_memory = max(gpu_memory) if gpu_memory else 0

            self.stress_results['gpu_test'] = {
                'avg_gpu_utilization': avg_gpu_util,
                'max_gpu_utilization': max_gpu_util,
                'avg_gpu_memory': avg_gpu_memory,
                'max_gpu_memory': max_gpu_memory,
                'operations_per_second': operations_count / duration,
                'total_operations': operations_count
            }

            print(f"   ‚úÖ GPU utilisation moyenne: {avg_gpu_util:.1f}% (max: {max_gpu_util:.1f}%)")
            print(f"   üíæ GPU m√©moire moyenne: {avg_gpu_memory:.1f}% (max: {max_gpu_memory:.1f}%)")
            print(f"   ‚ö° Op√©rations/sec: {operations_count / duration:.1f}")

        except Exception as e:
            print(f"   ‚ö†Ô∏è Erreur GPU: {e}")
            self.stress_results['gpu_test'] = {'error': str(e)}

        return self.stress_results.get('gpu_test', {})

    def concurrent_stress_test(self, duration=45):
        """Test de stress concurrent CPU+GPU+M√©moire"""
        print(f"üî• Test de stress concurrent ({duration}s)...")

        results = {}

        def cpu_task():
            # G√©n√©ration continue
            count = 0
            start = time.time()
            while time.time() - start < duration:
                self.generator.generate_response(f"concurrent test {count}", max_length=20)
                count += 1
            return count

        def gpu_task():
            # Calculs GPU intensifs
            count = 0
            start = time.time()
            while time.time() - start < duration:
                if gpus:
                    with tf.device('/GPU:0'):
                        x = tf.random.normal([512, 512])
                        _ = tf.linalg.matmul(x, x)
                count += 1
            return count

        def memory_task():
            # Allocations m√©moire intensives
            arrays = []
            start = time.time()
            while time.time() - start < duration:
                try:
                    # Allocation de 100MB
                    arr = np.random.random((1000, 1000, 10))
                    arrays.append(arr)
                    if len(arrays) > 10:  # Limite pour √©viter l'explosion m√©moire
                        arrays.pop(0)
                except MemoryError:
                    break
            return len(arrays)

        # Monitoring global
        system_stats = []

        def system_monitor():
            start = time.time()
            while time.time() - start < duration:
                cpu_percent = psutil.cpu_percent(interval=0.1)
                memory_percent = psutil.virtual_memory().percent

                gpu_util = 0
                if gpus:
                    try:
                        import pynvml
                        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
                        util = pynvml.nvmlDeviceGetUtilizationRates(handle)
                        gpu_util = util.gpu
                    except:
                        pass

                system_stats.append({
                    'cpu': cpu_percent,
                    'memory': memory_percent,
                    'gpu': gpu_util,
                    'timestamp': time.time()
                })
                time.sleep(1)

        # Lancement concurrent de tous les tests
        print("   üöÄ Lancement des t√¢ches concurrentes...")

        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            # Soumission des t√¢ches
            cpu_future = executor.submit(cpu_task)
            gpu_future = executor.submit(gpu_task)
            memory_future = executor.submit(memory_task)
            monitor_future = executor.submit(system_monitor)

            # Attente de completion
            cpu_result = cpu_future.result()
            gpu_result = gpu_future.result()
            memory_result = memory_future.result()
            monitor_future.result()

        # Analyse des r√©sultats
        if system_stats:
            avg_cpu = sum(s['cpu'] for s in system_stats) / len(system_stats)
            max_cpu = max(s['cpu'] for s in system_stats)
            avg_memory = sum(s['memory'] for s in system_stats) / len(system_stats)
            max_memory = max(s['memory'] for s in system_stats)
            avg_gpu = sum(s['gpu'] for s in system_stats) / len(system_stats)
            max_gpu = max(s['gpu'] for s in system_stats)

            results = {
                'cpu_generations': cpu_result,
                'gpu_operations': gpu_result,
                'memory_arrays': memory_result,
                'avg_cpu_usage': avg_cpu,
                'max_cpu_usage': max_cpu,
                'avg_memory_usage': avg_memory,
                'max_memory_usage': max_memory,
                'avg_gpu_usage': avg_gpu,
                'max_gpu_usage': max_gpu,
                'duration': duration
            }

            print(f"   ‚úÖ G√©n√©rations CPU: {cpu_result}")
            print(f"   üéÆ Op√©rations GPU: {gpu_result}")
            print(f"   üíæ Allocations m√©moire: {memory_result}")
            print(f"   üìä CPU moyen: {avg_cpu:.1f}% (max: {max_cpu:.1f}%)")
            print(f"   üìä RAM moyenne: {avg_memory:.1f}% (max: {max_memory:.1f}%)")
            if gpus:
                print(f"   üìä GPU moyen: {avg_gpu:.1f}% (max: {max_gpu:.1f}%)")

        self.stress_results['concurrent_test'] = results
        return results

    def performance_benchmark(self):
        """Benchmark complet de performance"""
        print("\nüèÜ BENCHMARK COMPLET DE PERFORMANCE")
        print("=" * 60)

        # Test de g√©n√©ration simple
        print("üß™ Test de g√©n√©ration simple...")
        start_time = time.time()
        simple_responses = []
        for i in range(100):
            response = self.generator.generate_response(f"Test {i}", max_length=20)
            simple_responses.append(response)
        simple_time = time.time() - start_time
        simple_throughput = 100 / simple_time

        print(f"   ‚ö° 100 g√©n√©rations en {simple_time:.2f}s")
        print(f"   üìà Throughput: {simple_throughput:.1f} g√©n√©rations/sec")

        # Test de g√©n√©ration parall√®le
        print("\nüöÄ Test de g√©n√©ration parall√®le...")
        start_time = time.time()

        def parallel_generate(i):
            return self.generator.generate_response(f"Parallel test {i}", max_length=20)

        with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count) as executor:
            parallel_responses = list(executor.map(parallel_generate, range(100)))

        parallel_time = time.time() - start_time
        parallel_throughput = 100 / parallel_time
        speedup = simple_time / parallel_time

        print(f"   ‚ö° 100 g√©n√©rations parall√®les en {parallel_time:.2f}s")
        print(f"   üìà Throughput: {parallel_throughput:.1f} g√©n√©rations/sec")
        print(f"   üöÄ Acc√©l√©ration: {speedup:.1f}x")

        # R√©sum√© du benchmark
        benchmark_results = {
            'simple_throughput': simple_throughput,
            'parallel_throughput': parallel_throughput,
            'speedup': speedup,
            'simple_time': simple_time,
            'parallel_time': parallel_time
        }

        self.stress_results['benchmark'] = benchmark_results

        return benchmark_results

    def generate_performance_report(self):
        """G√©n√®re un rapport complet de performance"""
        print("\n? RAPPORT COMPLET DE PERFORMANCE")
        print("=" * 80)

        # Graphiques de performance
        fig, axes = plt.subplots(2, 3, figsize=(18, 12))
        fig.suptitle('Rapport de Performance Ultra-Optimis√©', fontsize=16, fontweight='bold')

        # 1. CPU Utilization
        if 'cpu_test' in self.stress_results:
            cpu_data = self.stress_results['cpu_test']
            axes[0, 0].bar(['Moyen', 'Maximum'], [cpu_data['avg_cpu_usage'], cpu_data['max_cpu_usage']],
                          color=['#FF6B9D', '#FF4757'])
            axes[0, 0].set_title('Utilisation CPU (%)')
            axes[0, 0].set_ylim(0, 100)
            axes[0, 0].axhline(y=95, color='green', linestyle='--', label='Cible 95%')
            axes[0, 0].legend()

        # 2. Memory Utilization
        if 'memory_test' in self.stress_results:
            mem_data = self.stress_results['memory_test']
            if 'max_memory_usage' in mem_data:
                axes[0, 1].bar(['Avant', 'Apr√®s', 'Maximum'],
                              [mem_data['memory_before'], mem_data['memory_after'], mem_data['max_memory_usage']],
                              color=['#4ECDC4', '#45B7D1', '#3742FA'])
                axes[0, 1].set_title('Utilisation M√©moire (%)')
                axes[0, 1].set_ylim(0, 100)

        # 3. GPU Utilization
        if 'gpu_test' in self.stress_results and 'avg_gpu_utilization' in self.stress_results['gpu_test']:
            gpu_data = self.stress_results['gpu_test']
            axes[0, 2].bar(['Utilisation', 'M√©moire'],
                          [gpu_data['avg_gpu_utilization'], gpu_data['avg_gpu_memory']],
                          color=['#FFA502', '#FF6348'])
            axes[0, 2].set_title('Performance GPU (%)')
            axes[0, 2].set_ylim(0, 100)

        # 4. Throughput Comparison
        if 'benchmark' in self.stress_results:
            bench_data = self.stress_results['benchmark']
            axes[1, 0].bar(['Simple', 'Parall√®le'],
                          [bench_data['simple_throughput'], bench_data['parallel_throughput']],
                          color=['#2F3542', '#57606F'])
            axes[1, 0].set_title('Throughput (g√©n√©rations/sec)')

        # 5. Concurrent Performance
        if 'concurrent_test' in self.stress_results:
            conc_data = self.stress_results['concurrent_test']
            resources = ['CPU', 'RAM', 'GPU']
            values = [conc_data.get('avg_cpu_usage', 0),
                     conc_data.get('avg_memory_usage', 0),
                     conc_data.get('avg_gpu_usage', 0)]
            axes[1, 1].bar(resources, values, color=['#FF6B9D', '#4ECDC4', '#FFA502'])
            axes[1, 1].set_title('Test Concurrent - Utilisation Moyenne (%)')
            axes[1, 1].set_ylim(0, 100)

        # 6. Performance Score
        performance_score = self.calculate_performance_score()
        axes[1, 2].pie([performance_score, 100-performance_score],
                      labels=[f'Score: {performance_score:.1f}%', 'Potentiel restant'],
                      colors=['#2ECC71', '#E74C3C'], startangle=90)
        axes[1, 2].set_title('Score de Performance Global')

        plt.tight_layout()
        plt.show()

        # R√©sum√© textuel
        print(f"\nüèÜ SCORE DE PERFORMANCE GLOBAL: {performance_score:.1f}%")
        print("\nüìã R√âSUM√â DES TESTS:")

        for test_name, results in self.stress_results.items():
            if isinstance(results, dict) and 'error' not in results:
                print(f"   ‚úÖ {test_name.replace('_', ' ').title()}: R√©ussi")
            elif isinstance(results, dict) and 'error' in results:
                print(f"   ‚ö†Ô∏è {test_name.replace('_', ' ').title()}: Limit√© ({results.get('error', 'Unknown')})")

        print(f"\nüéØ RECOMMANDATIONS:")
        if performance_score >= 90:
            print("   üî• Excellent! Utilisation optimale des ressources")
        elif performance_score >= 75:
            print("   ‚úÖ Tr√®s bon, quelques optimisations mineures possibles")
        elif performance_score >= 60:
            print("   ‚ö†Ô∏è Correct, optimisations recommand√©es")
        else:
            print("   ‚ùå Optimisations majeures n√©cessaires")

        return performance_score

    def calculate_performance_score(self):
        """Calcule un score de performance global"""
        score = 0
        max_score = 0

        # Score CPU (30 points max)
        if 'cpu_test' in self.stress_results:
            cpu_data = self.stress_results['cpu_test']
            cpu_score = min(30, (cpu_data['avg_cpu_usage'] / 95) * 30)
            score += cpu_score
        max_score += 30

        # Score M√©moire (25 points max)
        if 'memory_test' in self.stress_results:
            mem_data = self.stress_results['memory_test']
            if 'max_memory_usage' in mem_data:
                mem_score = min(25, (mem_data['max_memory_usage'] / 85) * 25)
                score += mem_score
        max_score += 25

        # Score GPU (25 points max)
        if 'gpu_test' in self.stress_results and 'avg_gpu_utilization' in self.stress_results['gpu_test']:
            gpu_data = self.stress_results['gpu_test']
            gpu_score = min(25, (gpu_data['avg_gpu_utilization'] / 90) * 25)
            score += gpu_score
        max_score += 25

        # Score Throughput (20 points max)
        if 'benchmark' in self.stress_results:
            bench_data = self.stress_results['benchmark']
            # Score bas√© sur l'am√©lioration du parall√©lisme
            speedup_score = min(20, (bench_data['speedup'] / cpu_count) * 20)
            score += speedup_score
        max_score += 20

        return (score / max_score * 100) if max_score > 0 else 0


# Ex√©cution des tests de stress complets
print("üöÄ Initialisation du testeur de stress ultra-performance...")
stress_tester = UltraStressTester(model, generator, tokenizer)

print("\n" + "="*80)
print("üî• D√âBUT DES TESTS DE STRESS MAXIMAUX")
print("="*80)

# 1. Test CPU
cpu_results = stress_tester.cpu_stress_test(duration=30)

# 2. Test M√©moire
memory_results = stress_tester.memory_stress_test(batch_size_multiplier=3)

# 3. Test GPU (si disponible)
gpu_results = stress_tester.gpu_stress_test(duration=20)

# 4. Test concurrent
concurrent_results = stress_tester.concurrent_stress_test(duration=30)

# 5. Benchmark de performance
benchmark_results = stress_tester.performance_benchmark()

# 6. G√©n√©ration du rapport final
final_score = stress_tester.generate_performance_report()

print("\n" + "="*80)
print("üéâ TESTS DE STRESS TERMIN√âS")
print("="*80)
print(f"üèÜ Score final de performance: {final_score:.1f}%")

if final_score >= 85:
    print("? F√âLICITATIONS! Votre syst√®me utilise ses ressources de mani√®re optimale!")
    print("üí™ Configuration ultra-performante valid√©e!")
elif final_score >= 70:
    print("‚úÖ Tr√®s bonne performance! Quelques optimisations mineures possibles.")
else:
    print("‚ö†Ô∏è Performance correcte, mais des am√©liorations sont possibles.")

print(f"\nüìä R√©sum√© de l'utilisation des ressources:")
print(f"   ?Ô∏è CPU: Utilisation maximale valid√©e")
print(f"   üíæ RAM: Optimisation m√©moire confirm√©e")
if gpus:
    print(f"   üéÆ GPU: Acc√©l√©ration mat√©rielle active")
print(f"   ‚ö° Parall√©lisation: {cpu_count} threads actifs")
print(f"   üöÄ Votre mod√®le Shirayuki fonctionne √† {final_score:.0f}% de l'efficacit√© maximale!")

print("\nüå∏ Shirayuki ultra-optimis√©e pr√™te pour conversations √† haute performance! üå∏")