In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm

# Load your dataset
df = pd.read_csv('labelled_data.csv')

# Prepare text data
texts = df['Comment Text']
labels = df['Sentiment Label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Create TF-IDF vectorizer and SVM pipeline
pipeline = make_pipeline(TfidfVectorizer(), SVC(kernel='linear', probability=True))

# Train the model with progress bar
with tqdm(total=len(X_train), desc="Training SVM") as pbar:
    pipeline.fit(X_train, y_train)
    pbar.update(len(X_train))

# Predict and evaluate
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))


Training SVM: 100%|██████████| 66324/66324 [28:24<00:00, 38.92it/s]


              precision    recall  f1-score   support

         NEG       0.77      0.54      0.64      1045
         NEU       0.91      0.95      0.93     11197
         POS       0.88      0.84      0.86      4339

    accuracy                           0.89     16581
   macro avg       0.85      0.78      0.81     16581
weighted avg       0.89      0.89      0.89     16581



In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm

# Load your dataset
df = pd.read_csv('labelled_data.csv')

# Prepare text data
texts = df['Comment Text']
labels = df['Sentiment Label']

# Map the sentiment labels to numerical values
label_mapping = {'NEG': 0, 'NEU': 1, 'POS': 2}
labels = labels.map(label_mapping)

# Split data
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Create TF-IDF vectorizer and SVM pipeline
pipeline = make_pipeline(TfidfVectorizer(), SVC(kernel='linear', probability=True))

# Train the model with progress bar
with tqdm(total=len(X_train), desc="Training SVM") as pbar:
    pipeline.fit(X_train, y_train)
    pbar.update(len(X_train))

# Predict and evaluate
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))


Training SVM: 100%|██████████| 66324/66324 [26:43<00:00, 41.36it/s]


              precision    recall  f1-score   support

           0       0.77      0.54      0.64      1045
           1       0.91      0.95      0.93     11197
           2       0.88      0.84      0.86      4339

    accuracy                           0.89     16581
   macro avg       0.85      0.78      0.81     16581
weighted avg       0.89      0.89      0.89     16581

