**STEP 1 : INSTALL DEPENDENCIES**

In [1]:
!pip install nltk scikit-learn pandas




**STEP 2 : IMPORT LIBRARIES**

In [3]:
import pandas as pd
import nltk
import string

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


**STEP 3 : DOWNLOAD NLTK RESOURCES**

In [11]:
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

**STEP 4 : CREATE SAMPLE DATASET**

In [5]:
data = {
    "review": [
        "This movie was amazing and very enjoyable",
        "I hated the movie, it was terrible",
        "Fantastic acting and great story",
        "Worst movie I have ever watched",
        "The film was okay but a bit boring",
        "Absolutely loved it, highly recommend",
        "Not good, very disappointing"
    ],
    "sentiment": ["positive", "negative", "positive", "negative", "negative", "positive", "negative"]
}

df = pd.DataFrame(data)
df

Unnamed: 0,review,sentiment
0,This movie was amazing and very enjoyable,positive
1,"I hated the movie, it was terrible",negative
2,Fantastic acting and great story,positive
3,Worst movie I have ever watched,negative
4,The film was okay but a bit boring,negative
5,"Absolutely loved it, highly recommend",positive
6,"Not good, very disappointing",negative


**STEP 5 : CLEANING FUNCTION**

In [6]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stop_words]
    return " ".join(tokens)


**STEP 6 : APPLY PREPROCESSING**

In [12]:
df["cleaned_review"] = df["review"].apply(clean_text)
df

Unnamed: 0,review,sentiment,cleaned_review
0,This movie was amazing and very enjoyable,positive,movie amazing enjoyable
1,"I hated the movie, it was terrible",negative,hated movie terrible
2,Fantastic acting and great story,positive,fantastic acting great story
3,Worst movie I have ever watched,negative,worst movie ever watched
4,The film was okay but a bit boring,negative,film okay bit boring
5,"Absolutely loved it, highly recommend",positive,absolutely loved highly recommend
6,"Not good, very disappointing",negative,good disappointing


**STEP 7 : FEATURE EXTRACTION (TF-IDF)**

In [13]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["cleaned_review"])
y = df["sentiment"]


**STEP 8 : TRAIN-TEST SPLIT**

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)


**STEP 9 : TRAIN MODEL (NAIVE BAYES)**

In [15]:
model = MultinomialNB()
model.fit(X_train, y_train)


**STEP 10 : MODEL EVALUATION**

In [16]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.3333333333333333

Classification Report:
               precision    recall  f1-score   support

    negative       0.33      1.00      0.50         1
    positive       0.00      0.00      0.00         2

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**STEP 11 : TEST WITH NEW SENTENCE (DEMO READYðŸŽ¤)**

In [17]:
def predict_sentiment(text):
    cleaned = clean_text(text)
    vector = vectorizer.transform([cleaned])
    return model.predict(vector)[0]

predict_sentiment("The movie was fantastic and fun")


np.str_('negative')