In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, classification_report, accuracy_score


In [6]:
train_data = pd.read_csv('../data/train.csv')
test_data = pd.read_csv('../data/test.csv')

In [7]:

# Seperate data from labels
# data is what is should learn about
# labels is what it should predict
data = train_data['titre'] + " " + train_data['ingredients']
labels = train_data['type']

In [8]:
# Defining the pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('svm', SVC(kernel='linear')),
])

# Fit the pipeline to the training data
pipeline.fit(data, labels)

In [9]:
# Combine title and ingredients into a single text feature for test data
test_data_combined = test_data['titre'] + " " + test_data['ingredients']
test_labels = test_data['type']

In [10]:
# Predict on the test data
test_predictions = pipeline.predict(test_data_combined)

In [11]:
# Calculer la précision
accuracy = accuracy_score(test_labels, test_predictions)

# Générer d'autres métriques d'évaluation
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, test_predictions, average='macro')
conf_matrix = confusion_matrix(test_labels, test_predictions)
class_report = classification_report(test_labels, test_predictions)

# Imprimer les métriques d'évaluation
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score (Macro):", f1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Accuracy: 0.8523054755043228
Precision: 0.8466432722734796
Recall: 0.8348208884591287
F1-Score (Macro): 0.8391646904253504
Confusion Matrix:
 [[405   1   1]
 [  5 213 119]
 [  3  76 565]]
Classification Report:
                 precision    recall  f1-score   support

       Dessert       0.98      1.00      0.99       407
        Entrée       0.73      0.63      0.68       337
Plat principal       0.82      0.88      0.85       644

      accuracy                           0.85      1388
     macro avg       0.85      0.83      0.84      1388
  weighted avg       0.85      0.85      0.85      1388

