# **ML Classifiers with Tf-Idf Vectorization & Oversampling**

In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive/')
    %cd 'drive/My Drive/WEB_MINING_PROJECT/ML_CLASSIFIERS'
except ImportError as e:
    pass

Mounted at /content/drive/
/content/drive/.shortcut-targets-by-id/1EZ3t7HMzSmWcvRl80eFHiMW9UP3zMZxI/WEB_MINING_PROJECT/ML_CLASSIFIERS


In [20]:
! pip install optuna
! pip install imblearn



In [21]:
import pandas as pd
import nltk
import random
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer

# Download NLTK resources
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score, make_scorer
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler, ADASYN, SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
import optuna

#not needed

from sklearn.utils.multiclass import unique_labels
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_predict, GridSearchCV
from sklearn.linear_model import LogisticRegression





[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\saman\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\saman\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\saman\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Loading Downsampled Dataset

In [22]:
# Load the dataset
df = pd.read_csv("downsampled_dataset_10k.csv")

## **Text Preprocessing**

In [23]:
# Actually we only need the content(i.e. review) & the sentiment for classification
df = df[["review_body", "sentiment"]]
df.head(10)

Unnamed: 0,review_body,sentiment
0,Great tv signal very good buy.I like it,Positive
1,Good sound. Fine Material. Simply perfect!,Positive
2,looks good as shown lots of base. BTW fast shi...,Positive
3,This player is totally awesome! I has all the ...,Positive
4,Very quick delivery and high quality. Sound is...,Positive
5,Awesome,Positive
6,"Extremely nice quality, especially for the pri...",Positive
7,Identical to the one from Verizon that we some...,Positive
8,These are so handy and sound really awesome! P...,Positive
9,Works great!,Positive


In [24]:
# Text normalization
def normalize_text(text):
    # Convert text to lowercase
    text = text.lower()
    return text

# Removing punctuation (not used)
def remove_punctuation(text):
    text = str(text)
    punctuations = string.punctuation
    return text.translate(str.maketrans('', '', punctuations))

# Removing special characters (not used)
def remove_spec_char(text):
    text = str(text)
    text = re.sub('[^a-zA-Z0-9]', ' ', text)
    text = re.sub('\s+', ' ', text)
    return text

# Tokenization & Stopword removal
def remove_stopwords(text):
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(text)
    return ' '.join([word for word in tokens if word not in stop_words])

# Lemmatization (not used)
def lemmatize_text(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()
    return ' '.join([lemmatizer.lemmatize(word) for word in tokens])
    #return lemmatized_tokens

# Stemming
def stem_words(text):
    ps = PorterStemmer()
    return ' '.join([ps.stem(word) for word in text.split()])


In [25]:
# Apply preprocessing steps

# lowercase words, remove punctuation & special characters
df["review_body"] = df["review_body"].apply(normalize_text)
#df["review_body"] = df["review_body"].apply(remove_punctuation)
#df["review_body"] = df["review_body"].apply(remove_spec_char)

#tokenization and stopwords removal, stemming or lemmatization
df["review_body"] = df["review_body"].apply(remove_stopwords)
#df["review_body"] = df["review_body"].apply(lemmatize_text)
df["review_body"] = df["review_body"].apply(stem_words)

In [26]:
df.head(10)

Unnamed: 0,review_body,sentiment
0,great tv signal good buy.i like,Positive
1,good sound . fine materi . simpli perfect !,Positive
2,look good shown lot base . btw fast ship recei...,Positive
3,"player total awesom ! featur need , ( somehow ...",Positive
4,quick deliveri high qualiti . sound great cabl...,Positive
5,awesom,Positive
6,"extrem nice qualiti , especi price . boyfriend...",Positive
7,"ident one verizon somehow misplac , fraction c...",Positive
8,handi sound realli awesom ! perfect phone game...,Positive
9,work great !,Positive


## Train/Validation/Test Split

In [27]:
# split into train, val and test data
X_train, X_test = train_test_split(df, test_size=0.2, random_state = 42, stratify=df['sentiment'])
X_val, X_test = train_test_split(X_test, test_size=0.5, random_state = 42, stratify=X_test['sentiment'])


y_train = X_train['sentiment']
y_val = X_val['sentiment']
y_test = X_test['sentiment']


X_train.drop(columns = ['sentiment'], inplace= True)
X_val.drop(columns = ['sentiment'], inplace= True)
X_test.drop(columns = ['sentiment'], inplace= True)
X_train = X_train['review_body']
X_val = X_val['review_body']
X_test = X_test['review_body']


print (f"y_train: {y_train.shape}/ x_train: {X_train.shape}")
print (f"y_train: {y_val.shape}/ x_train: {X_val.shape}")
print (f"y_test: {y_test.shape}/ x_test: {X_test.shape}")



y_train: (7999,)/ x_train: (7999,)
y_train: (1000,)/ x_train: (1000,)
y_test: (1000,)/ x_test: (1000,)


In [29]:

# Baseline for comparison: predict positive for every review
majority_class = "Positive"  
y_pred_baseline = [majority_class for _ in range(len(y_test))]

# Calculate F1-score for the baseline
f1_weighted_baseline = f1_score(y_test, y_pred_baseline, average='weighted')
f1_macro_baseline = f1_score(y_test, y_pred_baseline, average='macro')

print(f"Weighted F1-score baseline: {f1_weighted_baseline}")
print(f"Macro F1-score baseline: {f1_macro_baseline}")


Weighted F1-score baseline: 0.6604299489506523
Macro F1-score baseline: 0.2885233503497826


## Tf-Idf Vectorization  

In [None]:
# Vectorize the text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_val_tfidf = tfidf_vectorizer.transform(X_val)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


## Balancing Methods (used in Pipeline)

In [None]:
# Define oversampling and undersampling techniques
oversampler = RandomOverSampler(random_state=42)
undersampler = RandomUnderSampler(random_state=42)
balancing = undersampler

## Naive Bayes

In [None]:
# Define the objective function for hyperparameter optimization
def objective_nb(trial):
    alpha = trial.suggest_float('alpha', 1e-3, 1e3, log = True)
    clf = MultinomialNB(alpha=alpha)
    clf.fit(X_train_tfidf_resampled, y_train_resampled)
    f1 = f1_score(y_val, clf.predict(X_val_tfidf), average="weighted")
    return f1


# Define the hyperparameter optimization study
study_nb = optuna.create_study(direction='maximize')

# Define Naive Bayes pipeline with oversampling
nb_pipeline = Pipeline([
    ('balancing', balancing ),
    ('classifier', MultinomialNB())
])


# Fit and optimize Naive Bayes pipeline
nb_pipeline.fit(X_train_tfidf, y_train)
X_train_tfidf_resampled, y_train_resampled = balancing.fit_resample(X_train_tfidf, y_train)
study_nb.optimize(objective_nb, n_trials=100)



# Print the best hyperparameters and corresponding accuracies
print("Best Naive Bayes Hyperparameters:", study_nb.best_params)
print("Naive Bayes F1 Score:", study_nb.best_value)



[I 2024-05-17 01:36:54,042] A new study created in memory with name: no-name-41685eb1-0534-44d1-9469-fdc246e041fb
[I 2024-05-17 01:36:54,132] Trial 0 finished with value: 0.703905504212597 and parameters: {'alpha': 10.029664438753695}. Best is trial 0 with value: 0.703905504212597.
[I 2024-05-17 01:36:54,148] Trial 1 finished with value: 0.7258671528459518 and parameters: {'alpha': 652.5640778399356}. Best is trial 1 with value: 0.7258671528459518.
[I 2024-05-17 01:36:54,166] Trial 2 finished with value: 0.6695934210526315 and parameters: {'alpha': 0.05098742194981309}. Best is trial 1 with value: 0.7258671528459518.
[I 2024-05-17 01:36:54,186] Trial 3 finished with value: 0.7262660496383572 and parameters: {'alpha': 524.1392540483456}. Best is trial 3 with value: 0.7262660496383572.
[I 2024-05-17 01:36:54,205] Trial 4 finished with value: 0.6646316737151898 and parameters: {'alpha': 0.03664979670855345}. Best is trial 3 with value: 0.7262660496383572.
[I 2024-05-17 01:36:54,220] Trial

[I 2024-05-17 01:36:55,183] Trial 48 finished with value: 0.7248714442159483 and parameters: {'alpha': 150.37284565756605}. Best is trial 21 with value: 0.7265712133185832.
[I 2024-05-17 01:36:55,202] Trial 49 finished with value: 0.6524317356958662 and parameters: {'alpha': 0.016384693199358955}. Best is trial 21 with value: 0.7265712133185832.
[I 2024-05-17 01:36:55,232] Trial 50 finished with value: 0.7251350224994982 and parameters: {'alpha': 980.156035043917}. Best is trial 21 with value: 0.7265712133185832.
[I 2024-05-17 01:36:55,249] Trial 51 finished with value: 0.726562274575382 and parameters: {'alpha': 280.4218621339425}. Best is trial 21 with value: 0.7265712133185832.
[I 2024-05-17 01:36:55,273] Trial 52 finished with value: 0.7261719136867282 and parameters: {'alpha': 429.28470026387646}. Best is trial 21 with value: 0.7265712133185832.
[I 2024-05-17 01:36:55,300] Trial 53 finished with value: 0.7194750890114234 and parameters: {'alpha': 72.56973157889675}. Best is trial 

[I 2024-05-17 01:36:56,325] Trial 96 finished with value: 0.7248714442159483 and parameters: {'alpha': 192.9500306543533}. Best is trial 21 with value: 0.7265712133185832.
[I 2024-05-17 01:36:56,365] Trial 97 finished with value: 0.7262660496383572 and parameters: {'alpha': 548.511035180162}. Best is trial 21 with value: 0.7265712133185832.
[I 2024-05-17 01:36:56,386] Trial 98 finished with value: 0.7251350224994982 and parameters: {'alpha': 977.7407150828973}. Best is trial 21 with value: 0.7265712133185832.
[I 2024-05-17 01:36:56,416] Trial 99 finished with value: 0.726562274575382 and parameters: {'alpha': 253.4294598643823}. Best is trial 21 with value: 0.7265712133185832.


Best Naive Bayes Hyperparameters: {'alpha': 362.085767198368}
Naive Bayes F1 Score: 0.7265712133185832


### Evaluating model on Test set with best parameters

In [None]:
# Get the best hyperparameters
best_alpha = study_nb.best_params['alpha']

# Train the model with the best hyperparameters
nb_model = MultinomialNB(alpha=best_alpha)
nb_model.fit(X_train_tfidf_resampled, y_train_resampled)

# Evaluate the final model on the test set
nb_pred = nb_model.predict(X_test_tfidf)

f1_test = f1_score(y_test, nb_pred, average='weighted')
print("Naive Bayes F1 score on test set:", f1_test)
print("\nNB Classification Report:")
print(classification_report(y_test, nb_pred))

Naive Bayes F1 score on test set: 0.7223285974416853

NB Classification Report:
              precision    recall  f1-score   support

    Negative       0.53      0.65      0.58       168
     Neutral       0.16      0.54      0.25        69
    Positive       0.93      0.69      0.80       763

    accuracy                           0.68      1000
   macro avg       0.54      0.63      0.54      1000
weighted avg       0.81      0.68      0.72      1000



## SVM

In [None]:
# Define the objective function for hyperparameter optimization
def objective_svm(trial):
    C = trial.suggest_float('C', 1e-2, 1e3, log = True)
    gamma = trial.suggest_float('gamma', 1e-2, 1e3, log = True)
    clf = SVC(C=C, gamma=gamma, random_state=42)
    clf.fit(X_train_tfidf_resampled, y_train_resampled)
    f1 = f1_score(y_val, clf.predict(X_val_tfidf), average="weighted")
    return f1

# Define the hyperparameter optimization study
study_svm = optuna.create_study(direction='maximize')

# Define SVM pipeline with oversampling
svm_pipeline = Pipeline([
    ('balancing', balancing),
    ('classifier', SVC(random_state=42))
])

# Fit and optimize SVM pipeline
svm_pipeline.fit(X_train_tfidf, y_train)
X_train_tfidf_resampled, y_train_resampled = balancing.fit_resample(X_train_tfidf, y_train)
study_svm.optimize(objective_svm, n_trials=100)


# Print the best hyperparameters and corresponding accuracies
print("Best SVM Hyperparameters:", study_svm.best_params)
print("SVM F1 Score:", study_svm.best_value)



[I 2024-05-17 01:37:44,851] A new study created in memory with name: no-name-dab9881c-d21a-4237-ad8a-820228b8ffbd
[I 2024-05-17 01:37:47,980] Trial 0 finished with value: 0.10167357192217474 and parameters: {'C': 0.2417124620110078, 'gamma': 7.284492352331535}. Best is trial 0 with value: 0.10167357192217474.
[I 2024-05-17 01:37:49,772] Trial 1 finished with value: 0.14502938739917154 and parameters: {'C': 4.225113027888637, 'gamma': 191.85844859256895}. Best is trial 1 with value: 0.14502938739917154.
[I 2024-05-17 01:37:51,495] Trial 2 finished with value: 0.10167357192217474 and parameters: {'C': 0.21318647594739587, 'gamma': 15.945659438348027}. Best is trial 1 with value: 0.14502938739917154.
[I 2024-05-17 01:37:53,267] Trial 3 finished with value: 0.6946415272917568 and parameters: {'C': 407.50319594661806, 'gamma': 0.021096389957872014}. Best is trial 3 with value: 0.6946415272917568.
[I 2024-05-17 01:37:55,025] Trial 4 finished with value: 0.6963924030292153 and parameters: {'C

[I 2024-05-17 01:38:58,362] Trial 41 finished with value: 0.7175412000689477 and parameters: {'C': 20.65515391742699, 'gamma': 0.08515745589658238}. Best is trial 16 with value: 0.7180119881366218.
[I 2024-05-17 01:39:00,141] Trial 42 finished with value: 0.15025439036071642 and parameters: {'C': 5.86042490982766, 'gamma': 32.05589095699013}. Best is trial 16 with value: 0.7180119881366218.
[I 2024-05-17 01:39:01,808] Trial 43 finished with value: 0.7031357584568804 and parameters: {'C': 84.12602860349212, 'gamma': 0.05331030185453462}. Best is trial 16 with value: 0.7180119881366218.
[I 2024-05-17 01:39:03,484] Trial 44 finished with value: 0.7117615209054995 and parameters: {'C': 18.08537635213769, 'gamma': 0.15334605343028357}. Best is trial 16 with value: 0.7180119881366218.
[I 2024-05-17 01:39:05,536] Trial 45 finished with value: 0.14502938739917154 and parameters: {'C': 38.45404625283643, 'gamma': 966.5339017855636}. Best is trial 16 with value: 0.7180119881366218.
[I 2024-05-17

[I 2024-05-17 01:40:10,339] Trial 83 finished with value: 0.7231435562099351 and parameters: {'C': 3.244943670359999, 'gamma': 0.32275268372223187}. Best is trial 83 with value: 0.7231435562099351.
[I 2024-05-17 01:40:12,029] Trial 84 finished with value: 0.7182871896686763 and parameters: {'C': 4.204239116490919, 'gamma': 0.3533330203396009}. Best is trial 83 with value: 0.7231435562099351.
[I 2024-05-17 01:40:13,657] Trial 85 finished with value: 0.721199539913134 and parameters: {'C': 3.2571596076152294, 'gamma': 0.3010947255414965}. Best is trial 83 with value: 0.7231435562099351.
[I 2024-05-17 01:40:15,306] Trial 86 finished with value: 0.7179363701316671 and parameters: {'C': 3.052589277735507, 'gamma': 0.29953832247819995}. Best is trial 83 with value: 0.7231435562099351.
[I 2024-05-17 01:40:17,051] Trial 87 finished with value: 0.7163171438906627 and parameters: {'C': 2.033428996379969, 'gamma': 0.8164948136799991}. Best is trial 83 with value: 0.7231435562099351.
[I 2024-05-17

Best SVM Hyperparameters: {'C': 3.244943670359999, 'gamma': 0.32275268372223187}
SVM F1 Score: 0.7231435562099351


### Evaluating model on Test set with best parameters

In [None]:
# Get the best hyperparameters and retrain the model on the training set
best_svm_params = study_svm.best_params
svm_model = SVC(**best_svm_params, random_state=42)
svm_model.fit(X_train_tfidf_resampled, y_train_resampled)

# Evaluate the model on the test set
svm_pred = svm_model.predict(X_test_tfidf)

f1_test = f1_score(y_test, svm_pred, average='weighted')
print("SVM F1 score on test set:", f1_test)
print("\nSVM Classification Report:")
print(classification_report(y_test, svm_pred))

SVM F1 score on test set: 0.7167626169819719

SVM Classification Report:
              precision    recall  f1-score   support

    Negative       0.59      0.69      0.64       168
     Neutral       0.14      0.57      0.23        69
    Positive       0.95      0.66      0.78       763

    accuracy                           0.66      1000
   macro avg       0.56      0.64      0.55      1000
weighted avg       0.83      0.66      0.72      1000



## Random Forest

In [None]:

# Define the objective function for hyperparameter optimization

def objective_rf(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 5, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)
    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                  min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf,
                                  random_state=42)
    clf.fit(X_train_tfidf_resampled, y_train_resampled)
    f1 = f1_score(y_val, clf.predict(X_val_tfidf), average="weighted")
    return f1

# Define the hyperparameter optimization study
study_rf = optuna.create_study(direction='maximize')


# Define Random Forest pipeline with oversampling
rf_pipeline = Pipeline([
    ('balancing', balancing),
    ('classifier', RandomForestClassifier(random_state=42))
])


# Fit and optimize Random Forest pipeline
rf_pipeline.fit(X_train_tfidf, y_train)
X_train_tfidf_resampled, y_train_resampled = balancing.fit_resample(X_train_tfidf, y_train)
study_rf.optimize(objective_rf, n_trials=100)

# Print the best hyperparameters and corresponding accuracies
print("Best Random Forest Hyperparameters:", study_rf.best_params)
print("Random Forest F1 Score:", study_rf.best_value)


[I 2024-05-17 01:44:30,742] A new study created in memory with name: no-name-e20d8bd6-59cb-472d-b460-d0704f995f12
[I 2024-05-17 01:44:46,876] Trial 0 finished with value: 0.7187586755007453 and parameters: {'n_estimators': 63, 'max_depth': 40, 'min_samples_split': 14, 'min_samples_leaf': 8}. Best is trial 0 with value: 0.7187586755007453.
[I 2024-05-17 01:44:48,595] Trial 1 finished with value: 0.7241253603427285 and parameters: {'n_estimators': 137, 'max_depth': 15, 'min_samples_split': 16, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.7241253603427285.
[I 2024-05-17 01:44:49,784] Trial 2 finished with value: 0.7139346767238228 and parameters: {'n_estimators': 120, 'max_depth': 26, 'min_samples_split': 19, 'min_samples_leaf': 10}. Best is trial 1 with value: 0.7241253603427285.
[I 2024-05-17 01:44:52,662] Trial 3 finished with value: 0.7277510559687963 and parameters: {'n_estimators': 157, 'max_depth': 49, 'min_samples_split': 14, 'min_samples_leaf': 4}. Best is trial 3 with v

[I 2024-05-17 01:45:42,327] Trial 36 finished with value: 0.7282200188367852 and parameters: {'n_estimators': 190, 'max_depth': 15, 'min_samples_split': 13, 'min_samples_leaf': 1}. Best is trial 7 with value: 0.7430127833933715.
[I 2024-05-17 01:45:43,652] Trial 37 finished with value: 0.7287086527234755 and parameters: {'n_estimators': 173, 'max_depth': 9, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 7 with value: 0.7430127833933715.
[I 2024-05-17 01:45:44,815] Trial 38 finished with value: 0.7152432726947215 and parameters: {'n_estimators': 185, 'max_depth': 5, 'min_samples_split': 14, 'min_samples_leaf': 3}. Best is trial 7 with value: 0.7430127833933715.
[I 2024-05-17 01:45:46,990] Trial 39 finished with value: 0.7353335888501741 and parameters: {'n_estimators': 199, 'max_depth': 17, 'min_samples_split': 17, 'min_samples_leaf': 5}. Best is trial 7 with value: 0.7430127833933715.
[I 2024-05-17 01:45:49,510] Trial 40 finished with value: 0.7238848167292755 and param

[I 2024-05-17 01:47:06,767] Trial 72 finished with value: 0.7264266863067997 and parameters: {'n_estimators': 152, 'max_depth': 6, 'min_samples_split': 14, 'min_samples_leaf': 3}. Best is trial 7 with value: 0.7430127833933715.
[I 2024-05-17 01:47:09,260] Trial 73 finished with value: 0.7296672291604501 and parameters: {'n_estimators': 135, 'max_depth': 33, 'min_samples_split': 12, 'min_samples_leaf': 4}. Best is trial 7 with value: 0.7430127833933715.
[I 2024-05-17 01:47:12,055] Trial 74 finished with value: 0.7379883870862652 and parameters: {'n_estimators': 160, 'max_depth': 30, 'min_samples_split': 14, 'min_samples_leaf': 3}. Best is trial 7 with value: 0.7430127833933715.
[I 2024-05-17 01:47:15,447] Trial 75 finished with value: 0.7296567845147411 and parameters: {'n_estimators': 171, 'max_depth': 29, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 7 with value: 0.7430127833933715.
[I 2024-05-17 01:47:17,224] Trial 76 finished with value: 0.7243458585927323 and para

Best Random Forest Hyperparameters: {'n_estimators': 158, 'max_depth': 30, 'min_samples_split': 13, 'min_samples_leaf': 3}
Random Forest F1 Score: 0.7430456474111872


### Evaluating model on Test set with best parameters

In [None]:
# Get the best hyperparameters and retrain the model on the training set
best_rf_params = study_rf.best_params
rf_model = RandomForestClassifier(**best_rf_params, random_state=42)
rf_model.fit(X_train_tfidf_resampled, y_train_resampled)

# Evaluate the model on the test set
rf_pred = rf_model.predict(X_test_tfidf)

f1_test = f1_score(y_test, rf_pred, average='weighted')
print("Random Forest F1 score on test set:", f1_test)
print("\nRandom Forest Classification Report:")
print(classification_report(y_test, rf_pred))

Random Forest F1 score on test set: 0.7474046589852413

Random Forest Classification Report:
              precision    recall  f1-score   support

    Negative       0.59      0.62      0.60       168
     Neutral       0.16      0.46      0.24        69
    Positive       0.92      0.75      0.83       763

    accuracy                           0.71      1000
   macro avg       0.56      0.61      0.56      1000
weighted avg       0.81      0.71      0.75      1000

