In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df = pd.read_csv("Tweets.csv")

df = df.sample(frac=0.1, random_state=42)

print(df.head())

df["label"] = df["airline_sentiment"].map({'negative': 0, 'neutral': 1, 'positive': 2})

X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2, random_state=42)

vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

model = MultinomialNB()
model.fit(X_train_vectorized, y_train)

y_pred = model.predict(X_test_vectorized)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{class_report}')


                 tweet_id airline_sentiment  airline_sentiment_confidence  \
4794   569731104070115329          positive                        1.0000   
10480  569263373092823040          negative                        1.0000   
8067   568818669024907264          negative                        1.0000   
8880   567775864679456768           neutral                        1.0000   
8292   568526521910079488          negative                        0.6625   

               negativereason  negativereason_confidence     airline  \
4794                      NaN                        NaN   Southwest   
10480        Cancelled Flight                     1.0000  US Airways   
8067              Late Flight                     0.6770       Delta   
8880                      NaN                        NaN       Delta   
8292   Customer Service Issue                     0.3394       Delta   

      airline_sentiment_gold             name negativereason_gold  \
4794                     NaN       