In [1]:
import pandas as pd
from torch.utils.data import Dataset
from transformers import AutoTokenizer, AutoModel
import torch.nn as nn
import torch
from torch.utils.data import DataLoader



def status_to_score(status):
    mapping = {
        1.0: 1.0,  # neuf
        2.0: 0.7,  # très bon état
        3.0: 0.5,  # bon état
        4.0: 0.2   # mauvais état
    }
    return mapping.get(status, 0.0)

class VintedDescriptionStatusDataset(Dataset):
    def __init__(self, dataframe):
        self.descriptions = dataframe["Description"].fillna("").tolist()
        self.statuses = dataframe["ItemStatus"].fillna(0).tolist()
        self.labels = [status_to_score(s) for s in self.statuses]

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, idx):
        return (
            self.descriptions[idx],
            str(self.statuses[idx]),  # en string pour concaténation avec le texte
            torch.tensor(self.labels[idx], dtype=torch.float)
        )



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/Users/njie/Desktop/Projet_APIA/env/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/User

In [9]:
class DescriptionStatusToScoreModel(nn.Module):
    def __init__(self, model_name="xlm-roberta-base"):
        super().__init__()
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.encoder = AutoModel.from_pretrained(model_name)
        self.regressor = nn.Sequential(
            nn.Linear(self.encoder.config.hidden_size, 128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, 1),
            nn.Sigmoid()  # car score entre 0 et 1
        )

    def forward(self, descriptions, statuses):
        inputs = [f"{d} [SEP] {s}" for d, s in zip(descriptions, statuses)]
        tokens = self.tokenizer(inputs, padding=True, truncation=True, return_tensors="pt")
        output = self.encoder(**tokens)
        cls_token = output.last_hidden_state[:, 0, :]  # [CLS]
        return self.regressor(cls_token).squeeze()


In [None]:
'''
# Charger les données
df = pd.read_csv("manteaux_labelise.csv")

# Dataset + Dataloader
dataset = VintedDescriptionStatusDataset(df)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Initialisation
model = DescriptionStatusToScoreModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

# Entraînement
model.train()
for epoch in range(5):
    for descriptions, statuses, labels in dataloader:
        descriptions = list(descriptions)
        statuses = list(statuses)

        preds = model(descriptions, statuses)
        loss = criterion(preds, labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    print(f"Epoch {epoch+1} - Loss: {loss.item():.4f}")
'''

In [10]:
df = pd.read_csv("manteaux_labelise.csv")
df.drop(columns = ["bonne_affaire"], inplace = True)

In [11]:
model = DescriptionStatusToScoreModel()
model.load_state_dict(torch.load("model_description_status.pt"))

<All keys matched successfully>

In [12]:
def predict_scores_from_dataframe(model, df, desc_col="Description", status_col="ItemStatus"):
    model.eval()
    predicted_scores = []

    with torch.no_grad():
        for i, row in df.iterrows():
            desc = str(row[desc_col])
            status = str(row[status_col])
            score = model([desc], [status]).item()
            predicted_scores.append(score)

    df["PredictedScore"] = predicted_scores
    return df


df1 = predict_scores_from_dataframe(model, df)

In [14]:
df1

Unnamed: 0,Title,Brand,Price,ItemStatus,NbFavourite,UserRating,NbEvalUser,Description,URL,Type,PredictedScore
0,Carhartt WIP Kilda Jacket,362,110.0,1.0,10,4.8,8.0,"Verkaufe diese Carhartt Jacke in Camel, perfek...",https://www.vinted.fr/items/5989867275-carhart...,1907,0.932054
1,Carhartt Trenchcoat Kurzmantel Grösse XL,362,139.0,2.0,2,5.0,7.0,Ungetragen leider zu gross,https://www.vinted.fr/items/5989107593-carhart...,1907,0.785149
2,"grauer Carhartt Jenison Coat, Wollmischung, Gr. M",362,149.0,3.0,6,5.0,9.0,Kerniger Wollmantel von Carhartt aus Wollmisch...,https://www.vinted.fr/items/5967790373-grauer-...,1907,0.512592
3,Carhartt Trent Parka Gr.S,362,109.0,1.0,19,5.0,206.0,Neu mit Etikett \n\nCarhartt Jacke perfekt gee...,https://www.vinted.fr/items/4221695682-carhart...,1907,0.934116
4,Man coat,362,110.0,3.0,0,5.0,2.0,"In a very good condition, has no damage",https://www.vinted.fr/items/5962711675-man-coat,1907,0.521220
...,...,...,...,...,...,...,...,...,...,...,...
476,Abrigo Carhatt,362,130.0,3.0,4,,,Plumas carhatt como nuevo \nNegociamos en priv...,https://www.vinted.fr/items/4297925544-abrigo-...,1907,0.524128
477,Veste OG Artic Coat Carharrt M,362,140.9,1.0,11,5.0,141.0,Veste OG Artic Coat Carharrt taille M\nNeuve a...,https://www.vinted.fr/items/5464478810-veste-o...,1907,0.934188
478,Chaquetón negro de Carhartt,362,120.0,3.0,6,4.9,78.0,Chaquetón negro de hombre de Carhartt. Cuello ...,https://www.vinted.fr/items/3391678548-chaquet...,1907,0.519748
479,Doudoune Carhartt noire,362,120.0,4.0,6,4.9,71.0,Bon état \nTaille M et tombe sur la taille cor...,https://www.vinted.fr/items/5152070645-doudoun...,1907,0.170200
