In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load the cleaned dataset
data = pd.read_csv("Movie_Questions_Base.csv")

# Define the features (questions) and labels (categories)
X = data['question']
y = data['category']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Vectorize the text using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train an SVM classifier
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_tfidf, y_train)

# Predict on the test set
y_pred = svm_model.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)


Accuracy: 0.9615384615384616
Classification Report:
                precision    recall  f1-score   support

       Factual       0.91      1.00      0.95        50
    Multimedia       0.97      0.97      0.97        40
Recommendation       1.00      1.00      1.00        42
     Unrelated       0.98      0.88      0.93        50

      accuracy                           0.96       182
     macro avg       0.97      0.96      0.96       182
  weighted avg       0.96      0.96      0.96       182



In [14]:
import torch

# Save the trained SVM model and vectorizer
torch.save({
    'svm_model': svm_model,
    'vectorizer': vectorizer
}, "svm_question_classifier.pth")






In [15]:
import torch
# Load the model and vectorizer
checkpoint = torch.load("svm_question_classifier.pth")
loaded_model = checkpoint['svm_model']
loaded_vectorizer = checkpoint['vectorizer']

  checkpoint = torch.load("svm_question_classifier.pth")


In [18]:
# Use the loaded model for inference
new_questions = ["What movie do I like if I like inception"]
new_questions_tfidf = loaded_vectorizer.transform(new_questions)
predictions = loaded_model.predict(new_questions_tfidf)
print(predictions)

['Recommendation']
