In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm

# Load your dataset
df = pd.read_csv('labelled_data.csv')

# Prepare text data
texts = df['Comment Text']
labels = df['Sentiment Label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Create TF-IDF vectorizer and transform the text data
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Initialize and train the Random Forest classifier with progress bar
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
for _ in tqdm(range(1), desc="Training Progress"):
    classifier.fit(X_train_tfidf, y_train)

# Predict and evaluate
y_pred = classifier.predict(X_test_tfidf)
print(classification_report(y_test, y_pred))


Training Progress: 100%|██████████| 1/1 [04:06<00:00, 246.76s/it]


              precision    recall  f1-score   support

         NEG       0.87      0.39      0.53      1045
         NEU       0.89      0.96      0.92     11197
         POS       0.88      0.81      0.85      4339

    accuracy                           0.88     16581
   macro avg       0.88      0.72      0.77     16581
weighted avg       0.88      0.88      0.88     16581

