In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import cross_val_score
# Load the cleaned dataset
file_path = './cleaned_tweets.csv'
tweets_data = pd.read_csv(file_path)

# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(
    tweets_data['cleaned_text'], 
    tweets_data['airline_sentiment'], 
    test_size=0.1, 
    random_state=3, 
    stratify=tweets_data['airline_sentiment']  # Ensure class distribution is preserved
)

# Print class distribution
print("Training set class distribution:")
print(train_labels.value_counts())
print("\nTest set class distribution:")
print(test_labels.value_counts())


# Initialize TF-IDF vectorizer
tfidf = TfidfVectorizer(max_features=5000)

# Fit and transform the training data
X_train = tfidf.fit_transform(train_data)
X_test = tfidf.transform(test_data)


# Initialize the model
svm_model = SGDClassifier(
    loss="hinge",
    penalty="l2",
    alpha=1e-4,
    max_iter=100,
    tol=None,
    shuffle=True,
    random_state=3
)

# Train the model
svm_model.fit(X_train, train_labels)


# Perform 10-fold cross-validation
# cv_scores = cross_val_score(svm_model, X_train, train_labels, cv=10, scoring="accuracy")
# Perform 10-fold cross-validation
cv_scores = cross_val_score(svm_model, X_train, train_labels, cv=10, scoring="accuracy")

# Calculate fold statistics
mean_accuracy = cv_scores.mean()
min_accuracy = cv_scores.min()
max_accuracy = cv_scores.max()
std_accuracy = cv_scores.std()

# Output cross-validation statistics
print("10-Fold Cross-Validation Results:")
print(f"Mean Accuracy: {mean_accuracy:.4f}")
print(f"Minimum Accuracy: {min_accuracy:.4f}")
print(f"Maximum Accuracy: {max_accuracy:.4f}")
print(f"Standard Deviation: {std_accuracy:.4f}")

# Calculate mean validation accuracy
validation_accuracy = cv_scores.mean()
print(f"10-Fold Cross-Validation Accuracy: {validation_accuracy:.4f}")

# Calculate mean validation accuracy
validation_accuracy = cv_scores.mean()
print(f"10-Fold Cross-Validation Accuracy: {validation_accuracy:.4f}")
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Predict on test data
test_predictions = svm_model.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(test_labels, test_predictions)
print(f"Test Set Accuracy: {test_accuracy:.4f}")

# Print classification report
print("\nClassification Report:")
print(classification_report(test_labels, test_predictions))

# Print confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(test_labels, test_predictions))


Training set class distribution:
negative    8170
neutral     2663
positive    1995
Name: airline_sentiment, dtype: int64

Test set class distribution:
negative    908
neutral     296
positive    222
Name: airline_sentiment, dtype: int64
10-Fold Cross-Validation Results:
Mean Accuracy: 0.8009
Minimum Accuracy: 0.7847
Maximum Accuracy: 0.8168
Standard Deviation: 0.0088
10-Fold Cross-Validation Accuracy: 0.8009
10-Fold Cross-Validation Accuracy: 0.8009
Test Set Accuracy: 0.8114

Classification Report:
              precision    recall  f1-score   support

    negative       0.84      0.94      0.89       908
     neutral       0.68      0.51      0.58       296
    positive       0.81      0.69      0.75       222

    accuracy                           0.81      1426
   macro avg       0.78      0.71      0.74      1426
weighted avg       0.80      0.81      0.80      1426


Confusion Matrix:
[[851  44  13]
 [120 152  24]
 [ 40  28 154]]


# Optuna Find the Best Combination 

In [3]:
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')  # 確保多語言支持


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [4]:
import optuna
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import re
from nltk.stem import WordNetLemmatizer
import pandas as pd

# Initialize lemmatizer and contractions
lemmatizer = WordNetLemmatizer()
contractions = {"don't": "do not", "can't": "cannot", "i'm": "i am"}

# Define a cleaning function with toggleable options
def clean_text_variant(
    text, remove_mentions=True, remove_urls=True, lemmatize=True, expand_contractions=True
):
    text = re.sub(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "", text)  # Remove emails
    text = re.sub(r"\$\d+(?:\.\d{2})?", "", text)  # Remove currency
    text = re.sub(r"[^\w\s,]", "", text, flags=re.UNICODE)  # Remove emojis
    text = re.sub(r"&[a-z]+;", "", text)  # Remove HTML escaped chars
    text = re.sub(r"[^\w\s]", "", text)  # Remove punctuation
    text = re.sub(r"[\u4e00-\u9fff]", "", text)  # Remove Chinese characters
    if remove_mentions:
        text = re.sub(r"@\w+", "", text)  # Remove mentions
    if remove_urls:
        text = re.sub(r"http[s]?://\S+", "", text)  # Remove URLs
    if expand_contractions:
        text = " ".join([contractions[word] if word in contractions else word for word in text.split()])
    text = re.sub(r"(.)\1{2,}", r"\1", text)  # Remove repeated characters
    text = " ".join([word for word in text.split() if 2 <= len(word) <= 15])  # Limit word length
    if lemmatize:
        text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])  # Lemmatize
    text = re.sub(r"\s+", " ", text).strip()  # Remove extra spaces
    return text

# Load dataset
file_path = './cleaned_tweets.csv'
tweets_data = pd.read_csv(file_path)

# Store detailed results for all combinations
detailed_results = []

def objective(trial):
    # Suggest cleaning steps
    remove_mentions = trial.suggest_categorical("remove_mentions", [True, False])
    remove_urls = trial.suggest_categorical("remove_urls", [True, False])
    lemmatize = trial.suggest_categorical("lemmatize", [True, False])
    expand_contractions = trial.suggest_categorical("expand_contractions", [True, False])
    
    # Apply cleaning with the suggested configuration
    tweets_data['variant_cleaned_text'] = tweets_data['text'].apply(
        lambda x: clean_text_variant(
            x,
            remove_mentions=remove_mentions,
            remove_urls=remove_urls,
            lemmatize=lemmatize,
            expand_contractions=expand_contractions,
        )
    )
    
    # TF-IDF vectorization
    tfidf = TfidfVectorizer(max_features=5000)
    X = tfidf.fit_transform(tweets_data['variant_cleaned_text'])
    y = tweets_data['airline_sentiment']
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.1, random_state=3, stratify=y
    )
    
    # Train a model
    svm_model = SGDClassifier(
        loss="hinge", penalty="l2", alpha=1e-4, max_iter=100, tol=None, shuffle=True, random_state=3
    )
    svm_model.fit(X_train, y_train)
    
    # Perform 10-fold cross-validation
    cv_scores = cross_val_score(svm_model, X_train, y_train, cv=10, scoring="accuracy")
    mean_accuracy = cv_scores.mean()
    min_accuracy = cv_scores.min()
    max_accuracy = cv_scores.max()
    std_accuracy = cv_scores.std()

    # Print cross-validation details (optional for debugging)
    print(f"Cross-Validation Accuracy - Mean: {mean_accuracy:.4f}, Min: {min_accuracy:.4f}, Max: {max_accuracy:.4f}, Std: {std_accuracy:.4f}")
    
    # Evaluate on the test set
    y_pred = svm_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    confusion = confusion_matrix(y_test, y_pred)
    
    # Store detailed results
    detailed_results.append({
        "trial_params": trial.params,
        "cross_val_accuracy": mean_accuracy,
        "min_accuracy": min_accuracy,
        "max_accuracy": max_accuracy,
        "std_accuracy": std_accuracy,
        "test_accuracy": test_accuracy,
        "classification_report": report,
        "confusion_matrix": confusion,
    })
    
    return mean_accuracy

# Use Optuna to optimize
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=256, n_jobs=-1)

# Display the best results
best_trial = study.best_trial
best_params = best_trial.params

print("\nBest Cleaning Steps:")
print(best_params)
print(f"Best Cross-Validation Accuracy: {study.best_value:.4f}")

# Extract the best trial's detailed results
best_result = next(
    res for res in detailed_results if res["trial_params"] == best_params
)

# Add statistics for cross-validation accuracy range
print("\nCross-Validation Accuracy Details for Best Cleaning Steps:")
print(f"Mean Accuracy: {best_result['cross_val_accuracy']:.4f}")
print(f"Min Accuracy: {best_result['min_accuracy']:.4f}")
print(f"Max Accuracy: {best_result['max_accuracy']:.4f}")
print(f"Std Dev of Accuracy: {best_result['std_accuracy']:.4f}")

# Display the best test set results
print("\nBest Test Set Accuracy:")
print(best_result["test_accuracy"])
print("\nBest Classification Report:")
print(pd.DataFrame(best_result["classification_report"]).T)
print("\nBest Confusion Matrix:")
print(best_result["confusion_matrix"])



[I 2024-12-06 17:10:27,819] A new study created in memory with name: no-name-fc3f7395-7707-47c1-ad71-9f69829f5125


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:11:41,412] Trial 1 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 1 with value: 0.7992660532609925.
[I 2024-12-06 17:11:42,679] Trial 3 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': False, 'expand_contractions': False}. Best is trial 1 with value: 0.7992660532609925.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:11:47,910] Trial 2 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': False, 'expand_contractions': True}. Best is trial 1 with value: 0.7992660532609925.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:11:48,887] Trial 6 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': False, 'expand_contractions': True}. Best is trial 1 with value: 0.7992660532609925.
[I 2024-12-06 17:11:49,252] Trial 0 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': False}. Best is trial 1 with value: 0.7992660532609925.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077

Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:11:57,113] Trial 7 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': False, 'expand_contractions': True}. Best is trial 1 with value: 0.7992660532609925.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:11:59,637] Trial 10 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': False}. Best is trial 1 with value: 0.7992660532609925.
[I 2024-12-06 17:12:00,283] Trial 12 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': False, 'expand_contractions': False}. Best is trial 1 with value: 0.7992660532609925.
[I 2024-12-06 17:12:00,290] Trial 14 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 1 with value: 0.7992660532609925.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:12:02,578] Trial 18 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': False, 'expand_contractions': False}. Best is trial 1 with value: 0.7992660532609925.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091



[I 2024-12-06 17:12:03,700] Trial 5 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:12:04,327] Trial 4 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:12:10,799] Trial 19 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:12:16,820] Trial 8 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:12:17,260] Trial 9 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:12:19,000] Trial 11 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:12:19,965] Trial 13 finished 

Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:12:54,056] Trial 21 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': False, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:13:03,114] Trial 22 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:06,916] Trial 20 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077



[I 2024-12-06 17:13:18,653] Trial 23 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:20,198] Trial 24 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:20,460] Trial 26 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:21,933] Trial 27 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:13:30,343] Trial 31 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:31,449] Trial 32 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:32,532] Trial 30 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:13:36,132] Trial 25 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:13:43,603] Trial 29 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091



[I 2024-12-06 17:13:43,923] Trial 28 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:13:52,301] Trial 35 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:13:56,332] Trial 34 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:56,784] Trial 39 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:57,103] Trial 37 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:13:57,277] Trial 33 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:14:00,083] Trial 38 finished with v

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:14:01,466] Trial 36 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:14:04,684] Trial 40 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:14:17,168] Trial 41 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:14:17,615] Trial 42 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:14:50,866] Trial 43 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:14:53,139] Trial 44 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:14:59,047] Trial 46 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:15:02,888] Trial 45 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:15:07,349] Trial 47 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:08,360] Trial 48 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:08,366] Trial 49 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091



[I 2024-12-06 17:15:22,291] Trial 50 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:15:27,699] Trial 53 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:28,046] Trial 60 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:28,251] Trial 51 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:28,770] Trial 61 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:15:29,787] Trial 52 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:31,528] Trial 54 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:31,749] Trial 62 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:33,683] Trial 55 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:34,508] Trial 57 finish

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:15:58,844] Trial 64 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:15:59,500] Trial 63 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:16:08,158] Trial 67 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:16:10,593] Trial 65 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:16:12,715] Trial 66 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:13,215] Trial 69 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:13,559] Trial 68 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:16:26,825] Trial 77 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091



[I 2024-12-06 17:16:34,993] Trial 75 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:16:36,563] Trial 78 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:36,808] Trial 70 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:16:37,358] Trial 74 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:16:37,564] Trial 79 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:37,860] Trial 80 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:38,217] Trial 81 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:38,949] Trial 72 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:39,708] Trial 76 fi

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:16:43,144] Trial 84 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:44,351] Trial 83 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:16:44,704] Trial 71 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:17:07,978] Trial 73 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:17:15,563] Trial 85 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077
Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:17:44,321] Trial 88 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:17:46,264] Trial 89 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:17:55,832] Trial 86 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:17:57,491] Trial 87 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:18:09,644] Trial 90 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.7993, Min: 0.7902, Max: 0.8137, Std: 0.0077


[I 2024-12-06 17:18:29,610] Trial 91 finished with value: 0.7992660532609925 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': False, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:18:34,579] Trial 94 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:18:38,460] Trial 98 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:18:42,046] Trial 102 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:18:43,269] Trial 93 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:18:43,742] Trial 101 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:18:44,643] Trial 92 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:18:45,096] Trial 97 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:18:46,308] Trial 99 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:18:50,968] Trial 96 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:18:52,312] Trial 103 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:18:54,249] Trial 100 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:18:54,983] Trial 95 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:18:55,030] Trial 105 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:19:01,750] Trial 107 finished wi

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:19:28,347] Trial 110 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:19:39,653] Trial 108 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:19:44,448] Trial 109 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:20:11,210] Trial 111 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:20:11,880] Trial 112 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:20:18,302] Trial 113 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:20:23,228] Trial 114 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:20:27,944] Trial 116 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:20:41,271] Trial 115 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:20:42,295] Trial 119 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:20:43,037] Trial 124 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:20:45,483] Trial 118 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:20:45,660] Trial 117 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:20:45,683] Trial 125 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:20:48,233] Trial 126 finished

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:21:37,532] Trial 127 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:21:43,246] Trial 128 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:22:16,492] Trial 129 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:22:24,558] Trial 130 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:22:37,073] Trial 133 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:22:38,184] Trial 132 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:22:45,889] Trial 135 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:22:46,977] Trial 134 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:22:48,685] Trial 131 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:23:06,456] Trial 138 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:23:10,402] Trial 141 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:23:12,513] Trial 136 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:23:16,027] Trial 142 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:23:18,106] Trial 140 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:23:18,756] Trial 144 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:23:20,240] Trial 137 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:23:21,592] Trial 139 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:23:22,670] Trial 143 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:23:25,035] Trial 145 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:23:33,991] Trial 146 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:23:37,424] Trial 148 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:05,112] Trial 147 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:15,271] Trial 150 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:15,491] Trial 149 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:18,468] Trial 152 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:20,078] Trial 151 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:26,337] Trial 153 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:28,667] Trial 154 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:35,569] Trial 155 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:37,604] Trial 157 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:41,518] Trial 156 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:47,274] Trial 159 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:55,110] Trial 163 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:55,243] Trial 158 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:55,669] Trial 167 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:55,774] Trial 161 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:57,499] Trial 164 fini

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:24:57,719] Trial 162 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:58,454] Trial 165 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:24:59,679] Trial 160 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:25:09,969] Trial 166 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:25:38,085] Trial 168 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:25:40,234] Trial 171 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:25:41,537] Trial 172 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:25:42,184] Trial 169 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:25:42,906] Trial 170 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:25:45,070] Trial 173 finish

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:25:53,069] Trial 174 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:25:56,899] Trial 176 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:26:09,118] Trial 177 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:26:14,415] Trial 178 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:26:14,972] Trial 175 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:26:25,695] Trial 181 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:26:26,678] Trial 179 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:26:30,446] Trial 180 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:26:33,133] Trial 184 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:26:33,282] Trial 183 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:26:34,938] Trial 182 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:26:35,290] Trial 186 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': False, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:26:35,642] Trial 185 finish

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:26:55,515] Trial 189 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:26:55,561] Trial 190 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:27:05,956] Trial 193 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:27:13,736] Trial 188 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:27:16,394] Trial 194 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:27:16,890] Trial 195 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:27:21,676] Trial 191 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:27:23,504] Trial 192 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:27:23,909] Trial 196 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:27:33,572] Trial 197 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:27:48,653] Trial 199 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:27:53,257] Trial 198 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:28:02,844] Trial 200 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:28:09,053] Trial 202 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:28:09,892] Trial 203 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:28:09,959] Trial 206 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:28:10,571] Trial 205 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:28:11,355] Trial 209 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:28:11,401] Trial 204 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:28:12,755] Trial 201 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:28:16,533] Trial 210 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:28:17,915] Trial 207 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:28:21,883] Trial 208 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091



[I 2024-12-06 17:28:43,494] Trial 213 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:28:45,464] Trial 212 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:29:11,347] Trial 216 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.



Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:29:19,868] Trial 211 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:29:28,020] Trial 214 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:29:28,432] Trial 215 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:29:28,540] Trial 217 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:29:51,815] Trial 218 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:30:08,809] Trial 225 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:30:09,029] Trial 219 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:30:09,094] Trial 220 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:30:09,304] Trial 228 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:30:11,830] Trial 222 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:30:17,196] Trial 223 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:30:19,623] Trial 230 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:30:20,648] Trial 227 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:30:20,695] Trial 221 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:30:21,493] Trial 224 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:30:23,691] Trial 226 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:30:24,372] Trial 232 fini

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:30:32,984] Trial 229 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': False}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:30:43,508] Trial 237 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:30:46,074] Trial 233 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091

Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:01,676] Trial 234 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:02,627] Trial 238 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:07,879] Trial 235 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:08,624] Trial 236 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:25,872] Trial 239 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:27,121] Trial 242 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:30,592] Trial 241 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:32,179] Trial 240 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:32,831] Trial 243 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:33,466] Trial 245 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:35,214] Trial 244 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:35,847] Trial 250 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:36,388] Trial 251 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:36,879] Trial 246 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:36,924] Trial 247 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:37,034] Trial 249 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:37,227] Trial 248 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:37,547] Trial 252 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091
Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091


[I 2024-12-06 17:31:37,718] Trial 253 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:37,772] Trial 254 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.
[I 2024-12-06 17:31:39,421] Trial 255 finished with value: 0.8025395700161599 and parameters: {'remove_mentions': True, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}. Best is trial 5 with value: 0.8025395700161599.


Cross-Validation Accuracy - Mean: 0.8025, Min: 0.7894, Max: 0.8168, Std: 0.0091

Best Cleaning Steps:
{'remove_mentions': False, 'remove_urls': True, 'lemmatize': True, 'expand_contractions': True}
Best Cross-Validation Accuracy: 0.8025

Cross-Validation Accuracy Details for Best Cleaning Steps:
Mean Accuracy: 0.8025
Min Accuracy: 0.7894
Max Accuracy: 0.8168
Std Dev of Accuracy: 0.0091

Best Test Set Accuracy:
0.8092566619915849

Best Classification Report:
              precision    recall  f1-score      support
negative       0.841223  0.939427  0.887617   908.000000
neutral        0.672566  0.513514  0.582375   296.000000
positive       0.801075  0.671171  0.730392   222.000000
accuracy       0.809257  0.809257  0.809257     0.809257
macro avg      0.771622  0.708037  0.733462  1426.000000
weighted avg   0.799964  0.809257  0.799780  1426.000000

Best Confusion Matrix:
[[853  43  12]
 [119 152  25]
 [ 42  31 149]]


# Best Combination 

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd
import re
from nltk.stem import WordNetLemmatizer

# Initialize lemmatizer and contractions
lemmatizer = WordNetLemmatizer()
contractions = {"don't": "do not", "can't": "cannot", "i'm": "i am"}

# Define the cleaning function with the best parameters
def clean_text_variant(
    text, remove_mentions=False, remove_urls=True, lemmatize=True, expand_contractions=False
):
    if remove_mentions:
        text = re.sub(r"@\w+", "", text)  # Remove mentions
    if remove_urls:
        text = re.sub(r"http[s]?://\S+", "", text)  # Remove URLs
    text = re.sub(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "", text)  # Remove emails
    text = re.sub(r"\$\d+(?:\.\d{2})?", "", text)  # Remove currency
    text = re.sub(r"[^\w\s,]", "", text, flags=re.UNICODE)  # Remove emojis
    text = re.sub(r"&[a-z]+;", "", text)  # Remove HTML escaped chars
    text = re.sub(r"[^\w\s]", "", text)  # Remove punctuation
    text = re.sub(r"[\u4e00-\u9fff]", "", text)  # Remove Chinese characters
    if expand_contractions:
        text = " ".join([contractions[word] if word in contractions else word for word in text.split()])
    text = re.sub(r"(.)\1{2,}", r"\1", text)  # Remove repeated characters
    text = " ".join([word for word in text.split() if 2 <= len(word) <= 15])  # Limit word length
    if lemmatize:
        text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])  # Lemmatize
    text = re.sub(r"\s+", " ", text).strip()  # Remove extra spaces
    return text

# Load dataset
file_path = './cleaned_tweets.csv'
tweets_data = pd.read_csv(file_path)

# Apply the best cleaning steps
tweets_data['cleaned_text'] = tweets_data['text'].apply(clean_text_variant)

# TF-IDF vectorization
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(tweets_data['cleaned_text'])
y = tweets_data['airline_sentiment']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=3, stratify=y
)

# Train a model
svm_model = SGDClassifier(
    loss="hinge", penalty="l2", alpha=1e-4, max_iter=100, tol=None, shuffle=True, random_state=3
)
svm_model.fit(X_train, y_train)

# Evaluate on the test set
y_pred = svm_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=False)
confusion = confusion_matrix(y_test, y_pred)

# Output results
print(f"Test Set Accuracy: {test_accuracy:.6f}\n")
print("Classification Report:")
print(report)
print("\nConfusion Matrix:")
print(confusion)


Test Set Accuracy: 0.809958

Classification Report:
              precision    recall  f1-score   support

    negative       0.84      0.94      0.89       908
     neutral       0.67      0.51      0.58       296
    positive       0.81      0.68      0.74       222

    accuracy                           0.81      1426
   macro avg       0.77      0.71      0.73      1426
weighted avg       0.80      0.81      0.80      1426


Confusion Matrix:
[[853  44  11]
 [119 152  25]
 [ 41  31 150]]
