In [6]:
# === 1. Rendre le dossier src importable ===
import sys
import os

# Dossier racine du projet (le parent de "notebooks/")
ROOT_DIR = os.path.abspath("..")
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)

# === 2. Imports ===
import pandas as pd
from src.utils.cleaning import clean_text, detect_language

# === 3. Charger le CSV Yelp ===
# Ton fichier est dans : PROJET_API/data/yelp_reviews_selenium_page1.csv
csv_path = os.path.join(ROOT_DIR, "data", "yelp_reviews_selenium_page1.csv")
df_yelp = pd.read_csv(csv_path)

print("Shape brut :", df_yelp.shape)
df_yelp.head()


Shape brut : (101, 1)


Unnamed: 0,Avis
0,This little bistro on a great street in Paris ...
1,Charming small bistro restaurant with a wonder...
2,What a nice suprise! Get the Croquet Monsieur ...
3,"I, like many, visited for the croque monsieur..."
4,"Nice ambience, good food, and great happy hour..."


In [7]:
# === 4. Nettoyage du texte + d√©tection de langue ===

# S√©cu : s'assurer que la colonne Avis est bien en string
df_yelp["Avis"] = df_yelp["Avis"].astype(str)

# Texte nettoy√©
df_yelp["Avis_clean"] = df_yelp["Avis"].apply(clean_text)

# Langue d√©tect√©e
df_yelp["Langue"] = df_yelp["Avis_clean"].apply(detect_language)

# Infos utiles pour ton projet
df_yelp["platform"] = "Yelp"
df_yelp["brand"] = "Le Petit Cler"

# Aper√ßu
df_yelp.head()


Unnamed: 0,Avis,Avis_clean,Langue,platform,brand
0,This little bistro on a great street in Paris ...,This little bistro on a great street in Paris ...,en,Yelp,Le Petit Cler
1,Charming small bistro restaurant with a wonder...,Charming small bistro restaurant with a wonder...,en,Yelp,Le Petit Cler
2,What a nice suprise! Get the Croquet Monsieur ...,What a nice suprise! Get the Croquet Monsieur ...,en,Yelp,Le Petit Cler
3,"I, like many, visited for the croque monsieur...","I, like many, visited for the croque monsieur/...",en,Yelp,Le Petit Cler
4,"Nice ambience, good food, and great happy hour...","Nice ambience, good food, and great happy hour...",en,Yelp,Le Petit Cler


In [9]:
df_yelp["Titre de l'avis"] = None
df_yelp["Date"] = None
df_yelp["Date_str"] = None


In [10]:
output_path = os.path.join(ROOT_DIR, "data", "yelp_reviews_clean.csv")
df_yelp.to_csv(output_path, index=False)
print("Fichier sauvegard√© :", output_path)


Fichier sauvegard√© : c:\Users\radjema\Desktop\PROJET_API\data\yelp_reviews_clean.csv


In [13]:
import pandas as pd

# Charger tes datasets propres
df_car = pd.read_csv("../data/reviews_trustpilot_carhartt_clean.csv")
df_yelp = pd.read_csv("../data/yelp_reviews_clean.csv")

# Colonnes manquantes pour Yelp
df_yelp["Titre de l'avis"] = None
df_yelp["Date"] = None
df_yelp["Date_str"] = None

# Ajouter colonne len si elle n'existe pas
for df in [df_car, df_yelp]:
    if "len" not in df.columns:
        df["len"] = df["Avis_clean"].astype(str).apply(len)

# Colonnes finales harmonis√©es
cols = [
    "Titre de l'avis",
    "Avis",
    "Date_str",
    "Date",
    "Avis_clean",
    "Langue",
    "platform",
    "brand",
    "len"
]

# S√©lection
df_car = df_car.reindex(columns=cols)
df_yelp = df_yelp.reindex(columns=cols)

# Fusionner
df_all = pd.concat([df_car, df_yelp], ignore_index=True)

# Sauvegarder
df_all.to_csv("../data/reviews_all_clean.csv", index=False)

df_all.head()


Unnamed: 0,Titre de l'avis,Avis,Date_str,Date,Avis_clean,Langue,platform,brand,len
0,Tr√®s grande qualit√© de coton,"Tr√®s grande qualit√© de coton, coupe impeccable...",6 nov. 2025,2025-11-06,"Tr√®s grande qualit√© de coton, coupe impeccable...",fr,Trustpilot,Carhartt WIP,79
1,Un service client√®le g√©nial !,Un service client√®le g√©nial ! Je m‚Äô√©tais total...,29 oct. 2025,2025-10-29,Un service client√®le g√©nial ! Je m‚Äô√©tais total...,fr,Trustpilot,Carhartt WIP,248
2,Un √©change satisfaisant avec Nadia,Un √©change satisfaisant avec Nadia. Merci.,23 oct. 2025,2025-10-23,Un √©change satisfaisant avec Nadia. Merci.,fr,Trustpilot,Carhartt WIP,42
3,Efficacit√©,La personne que j‚Äôai contact√© m‚Äôa r√©pondu rapi...,21 oct. 2025,2025-10-21,La personne que j‚Äôai contact√© m‚Äôa r√©pondu rapi...,fr,Trustpilot,Carhartt WIP,75
4,Demande de facture,Merci beaucoup pour notre r√©activit√© et votre ...,17 oct. 2025,2025-10-17,Merci beaucoup pour notre r√©activit√© et votre ...,fr,Trustpilot,Carhartt WIP,87


In [14]:
df_all.head(70)

Unnamed: 0,Titre de l'avis,Avis,Date_str,Date,Avis_clean,Langue,platform,brand,len
0,Tr√®s grande qualit√© de coton,"Tr√®s grande qualit√© de coton, coupe impeccable...",6 nov. 2025,2025-11-06,"Tr√®s grande qualit√© de coton, coupe impeccable...",fr,Trustpilot,Carhartt WIP,79
1,Un service client√®le g√©nial !,Un service client√®le g√©nial ! Je m‚Äô√©tais total...,29 oct. 2025,2025-10-29,Un service client√®le g√©nial ! Je m‚Äô√©tais total...,fr,Trustpilot,Carhartt WIP,248
2,Un √©change satisfaisant avec Nadia,Un √©change satisfaisant avec Nadia. Merci.,23 oct. 2025,2025-10-23,Un √©change satisfaisant avec Nadia. Merci.,fr,Trustpilot,Carhartt WIP,42
3,Efficacit√©,La personne que j‚Äôai contact√© m‚Äôa r√©pondu rapi...,21 oct. 2025,2025-10-21,La personne que j‚Äôai contact√© m‚Äôa r√©pondu rapi...,fr,Trustpilot,Carhartt WIP,75
4,Demande de facture,Merci beaucoup pour notre r√©activit√© et votre ...,17 oct. 2025,2025-10-17,Merci beaucoup pour notre r√©activit√© et votre ...,fr,Trustpilot,Carhartt WIP,87
...,...,...,...,...,...,...,...,...,...
65,,Bien trop cher pour la qualit√© et les portions...,,,Bien trop cher pour la qualit√© et les portions...,en,Yelp,Le Petit Cler,124
66,,I ate here a couple times because it was acros...,,,I ate here a couple times because it was acros...,en,Yelp,Le Petit Cler,226
67,,"Wanted to get a ""Parisian"" breakfast and this ...",,,"Wanted to get a ""Parisian"" breakfast and this ...",en,Yelp,Le Petit Cler,242
68,,This is really good. I had been disappointed b...,,,This is really good. I had been disappointed b...,en,Yelp,Le Petit Cler,497


In [18]:
import pandas as pd
from src.nlp.sentiment import analyze_sentiment

df_all = pd.read_csv("../data/reviews_all_clean.csv")

df_all["sentiment"] = df_all["Avis_clean"].apply(analyze_sentiment)

df_all["sentiment"].value_counts()


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


sentiment
positive    115
negative     10
neutral       9
Name: count, dtype: int64

In [20]:
df_all.head(100)


Unnamed: 0,Titre de l'avis,Avis,Date_str,Date,Avis_clean,Langue,platform,brand,len,sentiment
0,Tr√®s grande qualit√© de coton,"Tr√®s grande qualit√© de coton, coupe impeccable...",6 nov. 2025,2025-11-06,"Tr√®s grande qualit√© de coton, coupe impeccable...",fr,Trustpilot,Carhartt WIP,79,positive
1,Un service client√®le g√©nial !,Un service client√®le g√©nial ! Je m‚Äô√©tais total...,29 oct. 2025,2025-10-29,Un service client√®le g√©nial ! Je m‚Äô√©tais total...,fr,Trustpilot,Carhartt WIP,248,positive
2,Un √©change satisfaisant avec Nadia,Un √©change satisfaisant avec Nadia. Merci.,23 oct. 2025,2025-10-23,Un √©change satisfaisant avec Nadia. Merci.,fr,Trustpilot,Carhartt WIP,42,positive
3,Efficacit√©,La personne que j‚Äôai contact√© m‚Äôa r√©pondu rapi...,21 oct. 2025,2025-10-21,La personne que j‚Äôai contact√© m‚Äôa r√©pondu rapi...,fr,Trustpilot,Carhartt WIP,75,positive
4,Demande de facture,Merci beaucoup pour notre r√©activit√© et votre ...,17 oct. 2025,2025-10-17,Merci beaucoup pour notre r√©activit√© et votre ...,fr,Trustpilot,Carhartt WIP,87,positive
...,...,...,...,...,...,...,...,...,...,...
95,,The onion soup is the best I've ever had anywh...,,,The onion soup is the best I've ever had anywh...,en,Yelp,Le Petit Cler,634,positive
96,,"We had a fabulous, fun early dinner on our fir...",,,"We had a fabulous, fun early dinner on our fir...",en,Yelp,Le Petit Cler,269,positive
97,,Terrific from Happy Hour through dinner. Night...,,,Terrific from Happy Hour through dinner. Night...,en,Yelp,Le Petit Cler,270,positive
98,,My sister and I were in Paris for the day. We ...,,,My sister and I were in Paris for the day. We ...,en,Yelp,Le Petit Cler,845,neutral


In [21]:
df_all.to_csv("../data/reviews_all_with_sentiment.csv", index=False)


In [22]:
df_all["sentiment"].value_counts(normalize=True)


sentiment
positive    0.858209
negative    0.074627
neutral     0.067164
Name: proportion, dtype: float64

In [23]:
df_all.groupby("platform")["sentiment"].value_counts(normalize=True)


platform    sentiment
Trustpilot  positive     0.848485
            negative     0.121212
            neutral      0.030303
Yelp        positive     0.861386
            neutral      0.079208
            negative     0.059406
Name: proportion, dtype: float64

In [24]:
import pandas as pd
from src.nlp.response_generator import generate_reply

# Charger le dataset avec sentiments
df_all = pd.read_csv("../data/reviews_all_with_sentiment.csv")

# Exemple : prendre 3 avis au hasard
sample = df_all.sample(3, random_state=42)

for i, row in sample.iterrows():
    print("=" * 80)
    print("Avis :", row["Avis_clean"])
    print("Sentiment :", row["sentiment"], "| Langue :", row["Langue"], "| Platform :", row["platform"])
    print("\nR√©ponse g√©n√©r√©e :\n")
    reply = generate_reply(
        review_text=row["Avis_clean"],
        sentiment=row["sentiment"],
        platform=row["platform"],
        brand=row["brand"],
        lang=row["Langue"],
        tone="empathique",       # essaie aussi "formel" ou "amical"
        agent_name="L'√©quipe Carhartt WIP",
    )
    print(reply)
    print()


Avis : I came here for one thing, and one thing only: Snails. Better known by you French folk as escargots. No word of mouth, I just picked a spot and help for the escargots, and voila. I must say, the outdoor seating here gives you a real taste of non-touristy Paris. Which is welcomed after going to the overly walked tourist attractions. Sit down, eat slow, and enjoy a taste of the parisean cafes. Back to the snails, truly tasty. I've had good escargot back in the states, but the water must be different because these were what all escargot should be. Succulent, sauce to die for, and the bread to sop it all up. Definitely worth trying if you've never had it before. Would eat any weird mollusc from here/10
Sentiment : positive | Langue : en | Platform : Yelp

R√©ponse g√©n√©r√©e :

Hello, and thank you for taking the time to share your feedback.

Your positive feedback about Le Petit Cler really means a lot to us. We're glad we could contribute to a good experience.

We hope to see you 

In [26]:
from src.nlp.response_generator import generate_reply


avis_test = "Service tr√®s lent, je suis vraiment d√©√ßu. J'attends toujours ma commande."
sentiment = "negative"
langue = "fr"
platform = "Trustpilot"
brand = "Carhartt WIP"

print(generate_reply(avis_test, sentiment, langue, platform, brand))


Bonjour üòä

Navr√©s d'apprendre que votre exp√©rience avec Trustpilot n'a pas √©t√© au top üòî Merci d'avoir pris le temps de nous expliquer ce qui s'est pass√©.

Si vous le souhaitez, vous pouvez nous √©crire via le service client ou en message priv√© sur fr pour que l'on regarde cela ensemble üëç

Cordialement,
L'√©quipe Service Client


In [27]:
row = df_all[df_all["Langue"] == "fr"].iloc[0]

avis = row["Avis_clean"]
sentiment = row["sentiment"]
langue = row["Langue"]
platform = row["platform"]
brand = row["brand"]

print("Avis :", avis)
print("Sentiment :", sentiment)
print("Langue :", langue)
print("Platform :", platform)
print("\nR√©ponse g√©n√©r√©e :\n")
print(generate_reply(avis, sentiment, langue, platform, brand))


Avis : Tr√®s grande qualit√© de coton, coupe impeccable ! Sweat bien chaud pour l'hiver.
Sentiment : positive
Langue : fr
Platform : Trustpilot

R√©ponse g√©n√©r√©e :

Bonjour üòä

Un grand merci pour votre super avis sur Trustpilot ! Nous sommes vraiment contents que tout se soit bien pass√© üôè

Au plaisir de vous revoir bient√¥t üòä

Cordialement,
L'√©quipe Service Client
