In [3]:
from utils.clean_data import clean_and_structure_data
import pandas as pd
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer

# df = pd.read_csv("../Tesla_Trustpilot_Reviews.csv", delimiter=',')
df = clean_and_structure_data("Tesla_Trustpilot_Reviews.csv", "output")

# Fonction d'analyse de sentiment avec TextBlob
def analyze_sentiment_textblob(text):
    try:
        # Utilisation de TextBlob pour obtenir la polarité (positif, négatif, neutre)
        analysis = TextBlob(text)
        return analysis.sentiment.polarity
    except Exception as e:
        print(f"Erreur dans l'analyse avec TextBlob: {e}")
        return None

# Fonction d'analyse de sentiment avec VADER
def analyze_sentiment_vader(text):
    analyzer = SentimentIntensityAnalyzer()
    try:
        # Analyser le texte et renvoyer le score de sentiment
        sentiment_score = analyzer.polarity_scores(text)
        return sentiment_score['compound']  # Renvoie un score global de sentiment
    except Exception as e:
        print(f"Erreur dans l'analyse avec VADER: {e}")
        return None

# Appliquer TF-IDF sur le contenu pour extraire les mots-clés
def extract_keywords(df, n_keywords=10):
    vectorizer = TfidfVectorizer(stop_words='english', max_features=50)
    tfidf_matrix = vectorizer.fit_transform(df['Content'])

    # Récupérer les mots-clés en fonction de leur score TF-IDF
    feature_names = vectorizer.get_feature_names_out()

    docterm = pd.DataFrame(tfidf_matrix.todense(), columns=feature_names)

    sorted_indices = tfidf_matrix.sum(axis=0).argsort()[0, ::-1]  # Tri des indices des mots-clés
    # Récupérer les meilleurs mots-clés
    keywords = [feature_names[i] for i in sorted_indices[:n_keywords]]
    return keywords, docterm

# Extraire les 10 mots-clés les plus importants
keywords = extract_keywords(df, n_keywords=10)

# Afficher les mots-clés
print("Mots-clés les plus importants:", keywords)


# Appliquer les fonctions d'analyse de sentiment sur les données
df['Sentiment_TextBlob'] = df['Content'].apply(analyze_sentiment_textblob)
df['Sentiment_VADER'] = df['Content'].apply(analyze_sentiment_vader)

# Afficher les résultats de l'analyse de sentiment
print(df[['Username', 'Title', 'Sentiment_TextBlob', 'Sentiment_VADER']])

# Extraire les mots-clés
keywords = extract_keywords(df, n_keywords=10)
print("Mots-clés les plus importants:", keywords)
display(
    df, 
    extract_keywords(df, n_keywords=5)
)


Fichier nettoyé et structuré sauvegardé sous : output

Mots-clés les plus importants: ([array([['tesla', 'car', 'service', 'customer', 'model', 'company', 'new',
        'just', 'experience', 'don', 'time', 'delivery', 'told', 'worst',
        'vehicle', 'buy', 'cars', 'like', 've', 'months', 'days',
        'phone', 'app', 'day', 'good', 'great', 'order', 'bad', 'bought',
        'said', 'got', 'issues', 'years', 'weeks', 'issue', 'drive',
        'hours', 'people', 'need', 'problem', 'way', 'miles', 'money',
        'know', 'appointment', 'customers', 'make', 'work', 'center',
        'did']], dtype=object)],      app  appointment       bad  bought  buy       car      cars    center  \
0    0.0          0.0  0.000000     0.0  0.0  0.000000  0.407825  0.000000   
1    0.0          0.0  0.000000     0.0  0.0  0.390858  0.372437  0.000000   
2    0.0          0.0  0.000000     0.0  0.0  0.000000  0.290051  0.000000   
3    0.0          0.0  0.000000     0.0  0.0  0.071848  0.000000  0.0

Unnamed: 0,Username,Title,Content,Rating,Date,Sentiment_TextBlob,Sentiment_VADER
0,John McLean,Utter Rubbish,Utter Rubbish 3 owner for the last 3 years Goo...,1,2024-11-19,-0.078704,-0.6249
1,Angry from Manchester,5 5 for the car 0 5 for the dealer,The car model 3 is excellent though feels a li...,3,2024-11-18,0.179167,0.9279
2,Kevin O,Musk is the most embarrassing man that,Musk is the most embarrassing man that has eve...,1,2024-11-21,-0.100000,-0.7778
3,Andrew Haworth,Put a deposit on a cybertruck 4 years,Put a deposit on a cybertruck 4 years ago For ...,1,2024-11-20,-0.283333,-0.8856
4,Ahmed Radm,Thanks for the office tomorrow so I can,Thanks for the office tomorrow so I can do it ...,5,2024-11-23,0.200000,0.4404
...,...,...,...,...,...,...,...
995,Jacques W,Worst customer service in Austin Research Blvd,Disappointing customer service experience Whil...,1,2021-09-11,-0.220000,-0.9705
996,Hamid Naraghi,I am only giving one star because that,I am only giving one star because that is the ...,1,2021-09-09,-0.325000,-0.7538
997,ppr,Tesla Service is extremely poor,This is an email I sent to a Mr Walker who I f...,1,2021-09-07,0.074899,0.9856
998,Steven Girouard,Tesla is hands down the worst,Tesla is hands down the worst customer service...,1,2021-09-07,-0.048569,-0.9720


([array([['tesla', 'car', 'service', 'customer', 'model', 'company', 'new',
          'just', 'experience', 'don', 'time', 'delivery', 'told', 'worst',
          'vehicle', 'buy', 'cars', 'like', 've', 'months', 'days',
          'phone', 'app', 'day', 'good', 'great', 'order', 'bad', 'bought',
          'said', 'got', 'issues', 'years', 'weeks', 'issue', 'drive',
          'hours', 'people', 'need', 'problem', 'way', 'miles', 'money',
          'know', 'appointment', 'customers', 'make', 'work', 'center',
          'did']], dtype=object)],
      app  appointment       bad  bought  buy       car      cars    center  \
 0    0.0          0.0  0.000000     0.0  0.0  0.000000  0.407825  0.000000   
 1    0.0          0.0  0.000000     0.0  0.0  0.390858  0.372437  0.000000   
 2    0.0          0.0  0.000000     0.0  0.0  0.000000  0.290051  0.000000   
 3    0.0          0.0  0.000000     0.0  0.0  0.071848  0.000000  0.000000   
 4    0.0          0.0  0.000000     0.0  0.0  0.000000  0