In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import ComplementNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Données d'entrée (critiques) et étiquettes (émotions)
reviews = [
    "J'ai adoré ce film, c'était incroyable !",
    "Ce film était correct, rien de spécial.",
    "Je n'ai pas du tout aimé ce film, c'était nul.",
    "Ce film était génial, je le recommande vivement.",
    "J'ai trouvé ce film assez ennuyeux.",
    "Ce film était une déception totale."
]

emotions = ['Positif', 'Neutre', 'Négatif', 'Positif', 'Neutre', 'Négatif']

In [2]:
#Vectorisation des données textuelles
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(reviews)

In [4]:
vectorizer

In [5]:
# Séparation des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, emotions, test_size=0.2, random_state=42)

# Entraînement du modèle ComplementNB
clf = ComplementNB()
clf.fit(X_train, y_train)

# Prédictions sur l'ensemble de test
y_pred = clf.predict(X_test)

# Calcul de l'accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.5


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Définition du vecteur TF-IDF
vectorizer = TfidfVectorizer()

# Apprentissage du vocabulaire et transformation des critiques en une matrice TF-IDF
X = vectorizer.fit_transform(reviews)

# Affichage de la matrice TF-IDF résultante
print("Matrice TF-IDF :\n", X.toarray())

# Affichage des noms des caractéristiques (mots)
print("Noms des caractéristiques (mots) :\n", vectorizer.get_feature_names_out())


Matrice TF-IDF :
 [[0.56470429 0.39095179 0.         0.         0.25067186 0.
  0.         0.         0.         0.         0.25067186 0.
  0.56470429 0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.2893131 ]
 [0.         0.         0.         0.         0.20570808 0.46341154
  0.46341154 0.         0.         0.         0.20570808 0.
  0.         0.         0.         0.         0.         0.
  0.46341154 0.46341154 0.         0.         0.         0.
  0.         0.23741812]
 [0.         0.26532797 0.3832489  0.         0.17012393 0.
  0.         0.3832489  0.         0.         0.17012393 0.
  0.         0.31426958 0.         0.3832489  0.3832489  0.
  0.         0.         0.         0.3832489  0.         0.
  0.         0.19634865]
 [0.         0.         0.         0.         0.19229227 0.
  0.         0.         0.         0.         0.19229227 0.43318891
  0.         0.3552211  0.43318891 0.         0.   

In [2]:
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')

# Example function to handle negations in text
def process_negations(text):
    # Tokenize the words in the text
    tokens = word_tokenize(text)
    # List of negation words
    negation_words = set(["not", "no", "never", "none"])
    # Initialize the list of processed words
    processed_tokens = []
    # Initialize the negation flag
    negation_flag = False

    # Iterate through the tokens in the text
    for token in tokens:
        # If the token is a negation
        if token.lower() in negation_words:
            # Activate the negation flag
            negation_flag = True
            # Do not add the negation itself to the list of processed words
            continue
        # If the negation flag is active and the token is not punctuation
        if negation_flag and token not in ["'", '"', ".", ",", "!", "?", ":", ";"]:
            # Add a prefix to indicate negation
            processed_tokens.append("NOT_" + token)
            # Reset the negation flag
            negation_flag = False
        else:
            # Add the token as is to the list of processed words
            processed_tokens.append(token)

    # Return the processed text
    return " ".join(processed_tokens)

# Example usage on a text
text = "I don't like this movie at all, but it's not so bad."
processed_text = process_negations(text)
print("Original Text:", text)
print("Processed Text:", processed_text)


Original Text: I don't like this movie at all, but it's not so bad.
Processed Text: I do n't like this movie at all , but it 's NOT_so bad .


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Example of POS tagging function
def pos_tagging(text):
    # Tokenize the words in the text
    tokens = word_tokenize(text)
    # Perform POS tagging
    tagged_tokens = nltk.pos_tag(tokens)
    return tagged_tokens

# Example usage on a text
text = "I love natural language processing."
tagged_text = pos_tagging(text)
print("Tagged Text:", tagged_text)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


Tagged Text: [('I', 'PRP'), ('love', 'VBP'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('.', '.')]
