In [None]:
import pandas as pd
import numpy as np
import string
import re

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report

df = pd.read_excel("D:\Sentiment\yashi_sentiment1.xlsx")

def clean_text(text):
    text = str(text).lower()  # Ensure text is string
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@\w+", "", text)
    text = re.sub(r"#\w+", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    text = re.sub(r"\d+", "", text)
    text = text.strip()
    return text

df["Text"] = df["Text"].apply(clean_text)

X = df["Text"]
y = df["Sentiment"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('clf', SVC())
])

params = {
    'clf__C': [0.1, 1, 10],
    'clf__kernel': ['linear', 'rbf']
}

grid = GridSearchCV(pipeline, param_grid=params, cv=3, n_jobs=-1, scoring='accuracy')
grid.fit(X_train, y_train)

best_model = grid.best_estimator_

print("\n Best Model Parameters:", grid.best_params_)
print(f" Best Cross-Validation Accuracy: {grid.best_score_:.2f}")

y_pred = best_model.predict(X_test)
print("\n Classification Report on Test Data:")
print(classification_report(y_test, y_pred))

def predict_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        text = f.read()
    cleaned = clean_text(text)
    prediction = best_model.predict([cleaned])[0]
    print(f"\n The sentiment of the text in '{file_path}' is: {prediction}")


  df = pd.read_excel("D:\Sentiment\yashi_sentiment1.xlsx")



✅ Best Model Parameters: {'clf__C': 10, 'clf__kernel': 'rbf'}
📈 Best Cross-Validation Accuracy: 0.81

📊 Classification Report on Test Data:
              precision    recall  f1-score   support

    Negative       0.85      0.86      0.85        71
     Neutral       0.78      0.78      0.78        63
    Positive       0.95      0.94      0.95        65

    accuracy                           0.86       199
   macro avg       0.86      0.86      0.86       199
weighted avg       0.86      0.86      0.86       199



In [133]:
y = predict_from_file("D:/Sentiment/txxt.txt")
y


📁 The sentiment of the text in 'D:/Sentiment/txxt.txt' is: Positive


In [134]:
import joblib
joblib.dump(best_model, "best_svm_model.pkl")

['best_svm_model.pkl']