In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences



In [21]:
data = {
    "nom": [
        "Smartphone Samsung Galaxy A14", "Chaussures Nike Air Max",
        "Machine à café Philips", "Livre Python pour Débutants",
        "Montre connectée Fitbit"
    ],
    "type": ["Électronique", "Mode", "Maison "," Cuisine", "Livres", ""],
    "description": [
        "Téléphone avec grand écran 5G", "Chaussures confortables pour courir",
        "Machine automatique avec moulin", "Guide complet pour apprendre Python",
        "Montre connectée pour suivre votre activité"
    ],
    "prix": [250, 120, 300, 20, 150],
}



In [22]:
# convertir les données en DataFrame
import pandas as pd
df = pd.DataFrame(data)

# Encode labels
label_encoder = LabelEncoder()
df["type_encoded"] = label_encoder.fit_transform(df["type"])
categories = label_encoder.classes_

print(categories)



[' Cuisine' 'Livres' 'Maison ' 'Mode' 'Électronique']


In [23]:
# Tokenize descriptions
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(df["description"])
sequences = tokenizer.texts_to_sequences(df["description"])
padded_sequences = pad_sequences(sequences, maxlen=10, padding='post')
print(padded_sequences)

[[ 4  3  5  6  7  0  0  0  0  0]
 [ 8  9  2 10  0  0  0  0  0  0]
 [11 12  3 13  0  0  0  0  0  0]
 [14 15  2 16 17  0  0  0  0  0]
 [18 19  2 20 21 22  0  0  0  0]]


In [24]:
# Normalize prices
df["prix"] = df["prix"] / df["prix"].max()
print(df["prix"])

0    0.833333
1    0.400000
2    1.000000
3    0.066667
4    0.500000
Name: prix, dtype: float64


In [11]:

X = np.hstack((padded_sequences, df["prix"].values.reshape(-1, 1)))
y = df["type_encoded"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=1000, output_dim=16, input_length=10),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(len(categories), activation='softmax')
])

# Compilation
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])



In [25]:
# Entraînement et Evaluation
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print("\nTest accuracy:", test_acc)
new_product = {
    "description": ["le bâtiment est tellement jolie que quand tu entre à l'intérieur tu n'as plus envie de ressortir."],
    "prix": [125]
}

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step - accuracy: 0.5000 - loss: 1.1476 - val_accuracy: 0.0000e+00 - val_loss: 1.7996
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.5000 - loss: 1.1410 - val_accuracy: 0.0000e+00 - val_loss: 1.8148
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.5000 - loss: 1.1344 - val_accuracy: 0.0000e+00 - val_loss: 1.8304
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.5000 - loss: 1.1276 - val_accuracy: 0.0000e+00 - val_loss: 1.8464
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.5000 - loss: 1.1208 - val_accuracy: 0.0000e+00 - val_loss: 1.8629
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.5000 - loss: 1.1140 - val_accuracy: 0.0000e+00 - val_loss: 1.8796
Epoch 7/10
[1m1/1[0

In [26]:
new_sequences = tokenizer.texts_to_sequences(new_product["description"])
new_padded = pad_sequences(new_sequences, maxlen=10, padding='post')
new_price = np.array([new_product["prix"][0] / df["prix"].max()]).reshape(-1, 1)
new_input = np.hstack((new_padded, new_price))

prediction = model.predict(new_input)
predicted_category = categories[np.argmax(prediction)]
print("Catégorie prédite :", predicted_category)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Catégorie prédite : Livres
