In [1]:
%pip install pandas scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [2]:
from flask import Flask, request, jsonify
import joblib
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

app = Flask(__name__)

# Load dataset from CSV
df = pd.read_csv('data.csv')

# Drop rows with NaN values
df = df.dropna()

# Split dataset into features (X) and labels (y)
X = df['question'].astype(str)
y = df['category']

# Split data into training and testing sets with random shuffling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

# Convert text data to TF-IDF vectors
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Train a Support Vector Machine (SVM) classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_tfidf, y_train)

# Save the model and TF-IDF vectorizer
joblib.dump(svm_classifier, 'question_classifier_model.pkl')
joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')


['tfidf_vectorizer.pkl']

In [3]:
from sklearn.metrics import accuracy_score

# Load the trained model
svm_classifier = joblib.load('question_classifier_model.pkl')

# Predict categories for test set
y_pred = svm_classifier.predict(X_test_tfidf)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.9882352941176471


In [4]:
#Save the model (optional)
import joblib
joblib.dump(svm_classifier, 'question_classifier_model.pkl')

['question_classifier_model.pkl']

In [5]:
@app.route('/classify', methods=['POST'])
def classify_question():
    # Get the JSON data from the request
    data = request.json
    
    # Extract the question from the JSON data
    new_question = data['question']
    
    # Load the saved model and TF-IDF vectorizer
    svm_classifier = joblib.load('question_classifier_model.pkl')
    tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')
    
    # Convert the new question to TF-IDF vector using the loaded vectorizer
    new_question_tfidf = tfidf_vectorizer.transform([new_question])
    
    # Predict the category of the new question
    predicted_category = svm_classifier.predict(new_question_tfidf)
    
    # Return the predicted category as JSON response
    return jsonify({'predicted_category': predicted_category[0]})


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8002)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8002
 * Running on http://192.168.18.66:8002
Press CTRL+C to quit
