In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from joblib import dump

# Load data
df = pd.read_csv("tasks_preprocessed.csv")

# Combine text
texts = df["lemmas"].fillna("")

# ✅ TF-IDF for SVM model (can be different feature size)
tfidf_svm = TfidfVectorizer(max_features=3000, ngram_range=(1, 2))
X_text = tfidf_svm.fit_transform(texts)

# Target variable (you can change this column)
y = df["priority"]  # e.g., Bug, Feature, Improvement

# Split
X_train, X_test, y_train, y_test = train_test_split(X_text, y, test_size=0.2, random_state=42)

# Train SVM
svm = LinearSVC(random_state=42, max_iter=10000)
svm.fit(X_train, y_train)

# Evaluate
preds = svm.predict(X_test)
print("Task Classifier Accuracy:", accuracy_score(y_test, preds))
print(classification_report(y_test, preds))

# Save model + vectorizer
dump(svm, "model_svm.joblib")
dump(tfidf_svm, "tfidf_vectorizer_svm.joblib")
print("✅ SVM model & vectorizer saved successfully.")


Task Classifier Accuracy: 0.73
              precision    recall  f1-score   support

           0       0.70      0.97      0.81       116
           1       0.50      0.04      0.08        24
           2       0.89      0.53      0.67        60

    accuracy                           0.73       200
   macro avg       0.70      0.52      0.52       200
weighted avg       0.73      0.73      0.68       200

✅ SVM model & vectorizer saved successfully.


