In [None]:
  classifiers = {
        'Naive Bayes': MultinomialNB(),
        'Logistic Regression': LogisticRegression(max_iter=1000),
        'Support Vector Machine': SVC(probability=True),
        'Decision Tree': DecisionTreeClassifier(),
        'Random Forest': RandomForestClassifier(),
        'Gradient Boosting': GradientBoostingClassifier()
    }

    metrics_without_lemmatization = {metric: [] for metric in ['Accuracy', 'Precision', 'Recall', 'F1-score', 'ROC-AUC']}
    metrics_with_lemmatization = {metric: [] for metric in ['Accuracy', 'Precision', 'Recall', 'F1-score', 'ROC-AUC']}

    for name, classifier in classifiers.items():
        print(f"Evaluating {name} without lemmatization...")
        vectorizer = TfidfVectorizer(max_features=1000)
        trained_classifier, _, authors = train_model(classifier, vectorizer)
        evaluation_results = evaluate_model(trained_classifier, vectorizer, authors)
        for metric, value in evaluation_results.items():
            metrics_without_lemmatization[metric].append(value)

        print(f"Evaluating {name} with lemmatization...")
        vectorizer = TfidfVectorizer(max_features=1000)
        trained_classifier, _, authors = train_model(classifier, vectorizer)
        evaluation_results = evaluate_model(trained_classifier, vectorizer, authors)
        for metric, value in evaluation_results.items():
            metrics_with_lemmatization[metric].append(value)

    # Plotting
    plt.figure(figsize=(10, 6))
    for metric in metrics_without_lemmatization.keys():
        plt.plot(classifiers.keys(), metrics_without_lemmatization[metric], marker='o', label=f'{metric} without Lemmatization')
        plt.plot(classifiers.keys(), metrics_with_lemmatization[metric], marker='o', label=f'{metric} with Lemmatization')
    plt.title('Performance Comparison with and without Lemmatization')
    plt.xlabel('Classifier')
    plt.ylabel('Score')
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()