In [33]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer

# Chargement et prétraitement d'une image depuis son chemin
def load_and_preprocess_image(filepath):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_image(image, channels=3)
    image.set_shape([None, None, 3])
    image = tf.image.resize(image, (224, 224))
    
    # Normalisation pour EfficientNetB0 ([-1, 1] si preprocess_input est utilisé)
    from tensorflow.keras.applications.efficientnet import preprocess_input
    image = preprocess_input(image)
    
    return image

# Fusion des prédictions image + texte
def late_fusion_predict(text_model, image_model, text_input, image_path, alpha=0.5):
    # Texte → SVM
    prob_text = text_model.predict([text_input])[0]

    # Image → EfficientNet
    image_tensor = load_and_preprocess_image(image_path)
    image_tensor = tf.expand_dims(image_tensor, axis=0)  # Add batch dimension
    prob_image = image_model.predict(image_tensor, verbose=0)[0]

    # Fusion
    prob_combined = alpha * prob_image + (1 - alpha) * prob_text
    predicted_class = np.argmax(prob_combined)

    return predicted_class, prob_combined

# Évaluation globale
def evaluate_fusion(text_model, image_model, X_text, image_paths, y_true, alpha=0.5):
    y_pred = []
    for text_input, image_path in zip(X_text, image_paths):
        pred_class, _ = late_fusion_predict(text_model, image_model, text_input, image_path, alpha)
        y_pred.append(pred_class)

    acc = accuracy_score(y_true, y_pred)
    return acc, y_pred


In [20]:
import joblib
import tensorflow as tf
from sklearn.calibration import CalibratedClassifierCV

# Chargement du modèle pour le texte
text_model = joblib.load('pipeline_svc_model.joblib')
image_model = tf.keras.models.load_model('../../models/EfficientNetB0/EfficientNetB0_model_finetuned_best.keras')
# Chargement de l'encoder
label_encoder = joblib.load('label_encoder.joblib')





  saveable.load_own_variables(weights_store.get(inner_path))


In [34]:
import pandas as pd
import os 

df = pd.read_csv('../../data/processed/clean_dataset.csv')

dir_name = "/mnt/c/Users/karim/rakuten/images/data_clean/image_train"

# Ou prendre un pourcentage (ex: 10% du DataFrame)
df_echantillon = df.sample(frac=0.2)
df_echantillon["img_path"] = df_echantillon.apply(lambda row: os.path.join(dir_name, f"image_{row['imageid']}_product_{row['productid']}.jpg"), axis=1)

df_echantillon.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10555 entries, 3763 to 10159
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Unnamed: 0     10555 non-null  int64 
 1   productid      10555 non-null  int64 
 2   imageid        10555 non-null  int64 
 3   prdtypecode    10555 non-null  int64 
 4   merged         10555 non-null  object
 5   category_name  10555 non-null  object
 6   img_path       10555 non-null  object
dtypes: int64(4), object(3)
memory usage: 659.7+ KB


In [None]:
X_text = df_echantillon["merged"]
image_paths = df_echantillon["img_path"]
y_true = label_encoder.transform(df_echantillon["prdtypecode"])

acc, predictions = evaluate_fusion(text_model, image_model, X_text, image_paths, y_true, alpha=0.3)
print(f"Accuracy late fusion: {acc:.4f}")


In [None]:
for alpha in np.linspace(0, 0.5, 3):
    acc, _ = evaluate_fusion(text_model, image_model, X_text, image_paths, y_true, alpha)
    print(f"alpha={alpha:.1f} → Accuracy={acc:.4f}")


alpha=0.0 → Accuracy=0.0379
