In [1]:
#Import our dependencies
import pandas as pd
import numpy as np
import gradio as gr
import utils as up
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.datasets import make_blobs 
from sklearn.metrics import classification_report
from sklearn.compose import ColumnTransformer
from sentence_transformers import SentenceTransformer
from imblearn.under_sampling import RandomUnderSampler
import matplotlib.pyplot as plt

#Models to use in our pipeline
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier


In [2]:
#Import and read news articles
articles_df = pd.read_csv("news_articles.csv")

In [3]:
articles_df.head()

Unnamed: 0,author,published,title,text,language,site_url,main_img_url,type,label,title_without_stopwords,text_without_stopwords,hasImage
0,Barracuda Brigade,2016-10-26T21:41:00.000+03:00,muslims busted they stole millions in govt ben...,print they should pay all the back all the mon...,english,100percentfedup.com,http://bb4sp.com/wp-content/uploads/2016/10/Fu...,bias,Real,muslims busted stole millions govt benefits,print pay back money plus interest entire fami...,1.0
1,reasoning with facts,2016-10-29T08:47:11.259+03:00,re why did attorney general loretta lynch plea...,why did attorney general loretta lynch plead t...,english,100percentfedup.com,http://bb4sp.com/wp-content/uploads/2016/10/Fu...,bias,Real,attorney general loretta lynch plead fifth,attorney general loretta lynch plead fifth bar...,1.0
2,Barracuda Brigade,2016-10-31T01:41:49.479+02:00,breaking weiner cooperating with fbi on hillar...,red state \nfox news sunday reported this mor...,english,100percentfedup.com,http://bb4sp.com/wp-content/uploads/2016/10/Fu...,bias,Real,breaking weiner cooperating fbi hillary email ...,red state fox news sunday reported morning ant...,1.0
3,Fed Up,2016-11-01T05:22:00.000+02:00,pin drop speech by father of daughter kidnappe...,email kayla mueller was a prisoner and torture...,english,100percentfedup.com,http://100percentfedup.com/wp-content/uploads/...,bias,Real,pin drop speech father daughter kidnapped kill...,email kayla mueller prisoner tortured isis cha...,1.0
4,Fed Up,2016-11-01T21:56:00.000+02:00,fantastic trumps point plan to reform healthc...,email healthcare reform to make america great ...,english,100percentfedup.com,http://100percentfedup.com/wp-content/uploads/...,bias,Real,fantastic trumps point plan reform healthcare ...,email healthcare reform make america great sin...,1.0


In [4]:
articles_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2096 entries, 0 to 2095
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   author                   2096 non-null   object 
 1   published                2096 non-null   object 
 2   title                    2096 non-null   object 
 3   text                     2050 non-null   object 
 4   language                 2095 non-null   object 
 5   site_url                 2095 non-null   object 
 6   main_img_url             2095 non-null   object 
 7   type                     2095 non-null   object 
 8   label                    2095 non-null   object 
 9   title_without_stopwords  2094 non-null   object 
 10  text_without_stopwords   2046 non-null   object 
 11  hasImage                 2095 non-null   float64
dtypes: float64(1), object(11)
memory usage: 196.6+ KB


In [5]:
articles_df.nunique()

author                      491
published                  2006
title                      1784
text                       1941
language                      5
site_url                     68
main_img_url               1229
type                          8
label                         2
title_without_stopwords    1780
text_without_stopwords     1937
hasImage                      2
dtype: int64

In [6]:
articles_df["label"].value_counts()

label
Fake    1294
Real     801
Name: count, dtype: int64

In [7]:
# Convert the "title" column from the news articles DataFrame to a list.
title_list = articles_df["title"].tolist()
title_list

['muslims busted they stole millions in govt benefits',
 're why did attorney general loretta lynch plead the fifth',
 'breaking weiner cooperating with fbi on hillary email investigation',
 'pin drop speech by father of daughter kidnapped and killed by isis i have voted for donald j trump  percentfedupcom',
 'fantastic trumps  point plan to reform healthcare begins with a bombshell  percentfedupcom',
 'hillary goes absolutely berserk on protester at rally video',
 'breaking nypd ready to make arrests in weiner casehillary visited pedophile island at least  timesmoney laundering underage sex payforplayproof of inappropriate handling classified information  percentfedupcom',
 'wow whistleblower tells chilling story of massive voter fraud trump campaign readies lawsuit against fl sec of elections in critical district video  percentfedupcom',
 'breaking clinton clearedwas this a coordinated last minute trick to energize hillarys base  percentfedupcom',
 'evil hillary supporters yell fck t

In [8]:
# Convert the "text" column from the news articles DataFrame to a list.
text_list = articles_df["text"].tolist()
text_list

['print they should pay all the back all the money plus interest the entire family and everyone who came in with them need to be deported asap why did it take two years to bust them \nhere we go again another group stealing from the government and taxpayers a group of somalis stole over four million in government benefits over just  months \nweve reported on numerous cases like this one where the muslim refugeesimmigrants commit fraud by scamming our systemits way out of control more related',
 'why did attorney general loretta lynch plead the fifth barracuda brigade  print the administration is blocking congressional probe into cash payments to iran of course she needs to plead the th she either cant recall refuses to answer or just plain deflects the question straight up corruption at its finest \npercentfedupcom  talk about covering your ass loretta lynch did just that when she plead the fifth to avoid incriminating herself over payments to irancorrupt to the core attorney general l

In [9]:
# Convert the "title_without_stopwords" column from the news articles DataFrame to a list.
title_without_stopwords_list = articles_df["title_without_stopwords"].tolist()
title_without_stopwords_list

['muslims busted stole millions govt benefits',
 'attorney general loretta lynch plead fifth',
 'breaking weiner cooperating fbi hillary email investigation',
 'pin drop speech father daughter kidnapped killed isis voted donald j trump percentfedupcom',
 'fantastic trumps point plan reform healthcare begins bombshell percentfedupcom',
 'hillary goes absolutely berserk protester rally video',
 'breaking nypd ready make arrests weiner casehillary visited pedophile island least timesmoney laundering underage sex payforplayproof inappropriate handling classified information percentfedupcom',
 'wow whistleblower tells chilling story massive voter fraud trump campaign readies lawsuit fl sec elections critical district video percentfedupcom',
 'breaking clinton clearedwas coordinated last minute trick energize hillarys base percentfedupcom',
 'evil hillary supporters yell fck trumpburn truck daddy fishing yr son trump bumperstickers video percentfedupcom',
 'yikes hillary goes railspulls howa

In [10]:
# Convert the "text_without_stopwords" column from the news articles DataFrame to a list.
text_without_stopwords_list = articles_df["text_without_stopwords"].tolist()
text_without_stopwords_list

['print pay back money plus interest entire family everyone came need deported asap take two years bust go another group stealing government taxpayers group somalis stole four million government benefits months weve reported numerous cases like one muslim refugeesimmigrants commit fraud scamming systemits way control related',
 'attorney general loretta lynch plead fifth barracuda brigade print administration blocking congressional probe cash payments iran course needs plead th either cant recall refuses answer plain deflects question straight corruption finest percentfedupcom talk covering ass loretta lynch plead fifth avoid incriminating payments irancorrupt core attorney general loretta lynch declining comply investigation leading members congress obama administrations secret efforts send iran billion cash earlier year prompting accusations lynch pleaded fifth amendment avoid incriminating payments according lawmakers communications exclusively obtained washington free beacon sen ma

In [11]:
#Create an instance of the label encoder
le = LabelEncoder()

#Copy datafram
encoded_articles_df = articles_df.copy()
LabelEncoder().fit_transform

# Fit and transform the label encoder for each column
for column in encoded_articles_df:
    encoded_articles_df[column] = le.fit_transform(encoded_articles_df[column])

encoded_articles_df.head()

Unnamed: 0,author,published,title,text,language,site_url,main_img_url,type,label,title_without_stopwords,text_without_stopwords,hasImage
0,39,77,958,1445,0,0,282,0,1,1028,1391,1
1,472,929,1213,1877,0,0,283,0,1,51,69,1
2,39,1039,130,1477,0,0,284,0,1,131,1433,1
3,131,1146,1119,686,0,0,3,0,1,1194,455,1
4,131,1217,466,679,0,0,10,0,1,517,442,1


In [12]:
encoded_articles_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2096 entries, 0 to 2095
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype
---  ------                   --------------  -----
 0   author                   2096 non-null   int32
 1   published                2096 non-null   int32
 2   title                    2096 non-null   int32
 3   text                     2096 non-null   int32
 4   language                 2096 non-null   int32
 5   site_url                 2096 non-null   int32
 6   main_img_url             2096 non-null   int32
 7   type                     2096 non-null   int32
 8   label                    2096 non-null   int32
 9   title_without_stopwords  2096 non-null   int32
 10  text_without_stopwords   2096 non-null   int32
 11  hasImage                 2096 non-null   int64
dtypes: int32(11), int64(1)
memory usage: 106.6 KB


In [13]:
encoded_articles_df.shape

(2096, 12)

In [14]:
encoded_articles_df["label"].value_counts()

label
0    1294
1     801
2       1
Name: count, dtype: int64

In [15]:
encoded_articles_df["label"].unique()

array([1, 0, 2])

In [16]:
y_encoded_df = encoded_articles_df["label"]
y_encoded_df.head()

0    1
1    1
2    1
3    1
4    1
Name: label, dtype: int32

In [17]:
X_encoded_df = encoded_articles_df.drop(["label"], axis = 1)
X_encoded_df.head()

Unnamed: 0,author,published,title,text,language,site_url,main_img_url,type,title_without_stopwords,text_without_stopwords,hasImage
0,39,77,958,1445,0,0,282,0,1028,1391,1
1,472,929,1213,1877,0,0,283,0,51,69,1
2,39,1039,130,1477,0,0,284,0,131,1433,1
3,131,1146,1119,686,0,0,3,0,1194,455,1
4,131,1217,466,679,0,0,10,0,517,442,1


In [18]:
#Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded_df, y_encoded_df, random_state=1)

In [19]:
#Create the model
model = LogisticRegression()

In [20]:
#Fit the model to the training data
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [21]:
# Calculate the mean_squared_error and the r-squared value
# for the testing data

# Use our model to make predictions
predicted = model.predict(X_test)

# Score the predictions with mse and r2
mse = mean_squared_error(y_test, predicted)
r2 = r2_score(y_test, predicted)
rmse = np.sqrt(mean_squared_error(y_test, predicted))

print(f"mean squared error (MSE): {mse}")
print(f"R-squared (R2): {r2}")
print(f"Root mean squarted error (RMSE): {rmse}")

mean squared error (MSE): 0.30343511450381677
R-squared (R2): -0.3128900094547742
Root mean squarted error (RMSE): 0.550849448128812


In [22]:
# Call the `score()` method on the model to show the R2 score
model.score(X_test, y_test)

0.6965648854961832

In [23]:
def text_classification(articles_df):
   
    # Set the features variable to the title message column.
    articles_df = articles_df.dropna()
    X = articles_df['text']  

    # Set the target variable to the "label" column.
    y = articles_df['label']

    # Split data into training and testing and set the test_size = 33%
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    print(y_train.info())

    # Build a pipeline to transform the test set to compare to the training set.
    text_classification = Pipeline([('tfidf', TfidfVectorizer(stop_words='english')),
                     ('clf', LinearSVC()),
])

    # Fit the model to the transformed training data and return model.

    model = text_classification.fit(X_train, y_train)
    return model 

In [24]:
# Call the title_classification function with the DataFrame and set the result to the "title_clf" variable
text_classification = text_classification(articles_df)
text_classification

<class 'pandas.core.series.Series'>
Index: 1370 entries, 1638 to 1127
Series name: label
Non-Null Count  Dtype 
--------------  ----- 
1370 non-null   object
dtypes: object(1)
memory usage: 21.4+ KB
None




In [25]:
# Create a function called `text_prediction` that takes in the text and predicts the whether the text is "fake" or "real". 
# The function should return the Text, and say whether the text is "fake" or "real".
def text_prediction(text):

    # Create a variable that will hold the prediction of a new text.
    text_predictions = text_classification.predict([text])

    # Using a conditional if the prediction is "real" return the message:
    # f'The text message: "{text}", is fake.' Else, return f'The text message: "{text}", is real.'

    if text_predictions[0] == 'real':
        return f'The text: "{text}", is real.'
    else:
        return f'The text: "{text}", is fake.'

In [26]:
articles_df['label'].value_counts()

label
Fake    1294
Real     801
Name: count, dtype: int64

In [27]:
# Create a title_app that takes a textbox for the inputs and has a textbox for the output.  
# Povide labels for each textbox. 

app = gr.Interface(
        fn=text_prediction,
inputs = [
gr.Textbox(label="What is the text you want to test?")], 
outputs=gr.Textbox(lines=10, label="Our app has determined: ", show_copy_button=True))


# Launch the app.
#app.launch(show_error=True)

app.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://a8f181040ae0b44bac.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [28]:
# Create the random forest classifier model


#randomforest_model = RandomForestClassifier(n_estimators=128, random_state=1)
randomforest_model = RandomForestClassifier(max_depth=5)

In [29]:
# Fit the model to the training data
randomforest_model.fit(X_train, y_train)

In [30]:
# Validate the model by checking the model accuracy with model.score
print(f"Training Data Score: {randomforest_model.score(X_train, y_train)}")
print(f"Testing Data Score: {randomforest_model.score(X_test, y_test)}")

Training Data Score: 0.9930025445292621
Testing Data Score: 0.9790076335877863


In [31]:
# Make predictions and produce the classification report for the randome forest model
predictions = randomforest_model.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.97      1.00      0.98       334
           1       1.00      0.94      0.97       190

    accuracy                           0.98       524
   macro avg       0.98      0.97      0.98       524
weighted avg       0.98      0.98      0.98       524



In [32]:
articles_df.dtypes

author                      object
published                   object
title                       object
text                        object
language                    object
site_url                    object
main_img_url                object
type                        object
label                       object
title_without_stopwords     object
text_without_stopwords      object
hasImage                   float64
dtype: object

In [33]:
def read_process(articles_df, features, target):
    
    # Drop missing values
    articles_df = articles_df.dropna()
    X = articles_df[features]
    y = articles_df[target]
    
    # Check for categorical variables
    categorical_columns = X.select_dtypes(include=['object', 'category']).columns
    numerical_columns = X.select_dtypes(exclude=['object', 'category']).columns

    # Handle categorical columns variables (if needed)
    X = pd.get_dummies(X, columns=categorical_columns, dtype=float)  # Binary classification assumed here
    
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    # # Return data with both numerical and categorical columns separated
    # return X, y, numerical_columns, categorical_columns
    return X, y_encoded

def model_generator(articles_df, features, target):
    X, y = read_process(articles_df, features, target)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    # Column transformer to handle scaling and encoding
    # preprocessor = ColumnTransformer(
    #     transformers=[
    #         ('num', StandardScaler(), numerical_columns),
    #         ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    #     ]
    # )


    models = {
        "Logistic Regression": LogisticRegression(),
        "SVR": SVR(),
        "Random Forest": RandomForestClassifier(),
        "Gradient Boosting": GradientBoostingClassifier(),
        "Decision Tree": DecisionTreeClassifier()
    }

    results = {}



    for name, model in models.items():
        pipeline = Pipeline([
            ("scale", StandardScaler()),  # Apply preprocessor to handle encoding and scaling
            (name, model)
        ])
        
        pipeline.fit(X_train, y_train)
        y_predictions = pipeline.predict(X_test).reshape(-1, 1)
        score = pipeline.score(X_test, y_test)
        results[name] = score
        print(f"{name} Score: {score}")
    
    return results


In [37]:
features = articles_df.drop(["label", "type"], axis=1).columns.to_list()
target = "label"

In [38]:
articles_df['label'].value_counts()

label
Fake    1294
Real     801
Name: count, dtype: int64

In [39]:
read_process(articles_df, features, target)

(      hasImage  author_# 1 NWO Hatr  author_-NO AUTHOR-  \
 0          1.0                  0.0                 0.0   
 1          1.0                  0.0                 0.0   
 2          1.0                  0.0                 0.0   
 3          1.0                  0.0                 0.0   
 4          1.0                  0.0                 0.0   
 ...        ...                  ...                 ...   
 2041       0.0                  0.0                 0.0   
 2042       1.0                  0.0                 0.0   
 2043       1.0                  0.0                 0.0   
 2044       1.0                  0.0                 0.0   
 2045       0.0                  0.0                 0.0   
 
       author_4 Goals For The Neomasculinity Movement During Trumps First Term  \
 0                                                   0.0                          
 1                                                   0.0                          
 2                           

In [40]:
#Generate accuracy score by model
model_generator(articles_df, features, target)

Logistic Regression Score: 0.87890625
SVR Score: 0.36108573844194336
Random Forest Score: 0.998046875
Gradient Boosting Score: 0.998046875
Decision Tree Score: 0.998046875


{'Logistic Regression': 0.87890625,
 'SVR': 0.36108573844194336,
 'Random Forest': 0.998046875,
 'Gradient Boosting': 0.998046875,
 'Decision Tree': 0.998046875}

OPTIMIZATION - HYPERPARAMETERS

In [41]:
# Create KNN classifier
from sklearn.neighbors import KNeighborsClassifier
random_tuned_model = KNeighborsClassifier()

In [42]:
# Create the parameter object for the randomized search estimator.
# Try adjusting n_neighbors with values of 1 through 19. 
# Adjust leaf_size by using a range from 1 to 500.
# Include both uniform and distance options for weights.

param_grid = {
     'n_neighbors': np.arange(1,20,2),
     'weights': ['uniform', 'distance'],
     'leaf_size': np.arange(1, 500)
 }
param_grid


{'n_neighbors': array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19]),
 'weights': ['uniform', 'distance'],
 'leaf_size': array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
         27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
         40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
         53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
         66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
         79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
         92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
        105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
        118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
        131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
        144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
        1

In [43]:
# Create the randomized search estimator
from sklearn.model_selection import RandomizedSearchCV
random_clf = RandomizedSearchCV(random_tuned_model, param_grid, random_state=0, verbose=3)

In [44]:
# Fit the model by using the randomized search estimator.
random_clf.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.737 total time=   0.0s
[CV 2/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.721 total time=   0.0s
[CV 3/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.697 total time=   0.0s
[CV 4/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.704 total time=   0.0s
[CV 5/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.704 total time=   0.0s
[CV 1/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.778 total time=   0.0s
[CV 2/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.717 total time=   0.0s
[CV 3/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.748 total time=   0.0s
[CV 4/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.701 total time=   0.0s
[CV 5/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.739 total time=   0.0s
[CV 1/5] 



[CV 4/5] END leaf_size=164, n_neighbors=5, weights=uniform;, score=0.691 total time=   0.0s
[CV 5/5] END leaf_size=164, n_neighbors=5, weights=uniform;, score=0.739 total time=   0.0s
[CV 1/5] END leaf_size=243, n_neighbors=19, weights=distance;, score=0.746 total time=   0.0s
[CV 2/5] END leaf_size=243, n_neighbors=19, weights=distance;, score=0.721 total time=   0.0s
[CV 3/5] END leaf_size=243, n_neighbors=19, weights=distance;, score=0.736 total time=   0.0s
[CV 4/5] END leaf_size=243, n_neighbors=19, weights=distance;, score=0.697 total time=   0.0s
[CV 5/5] END leaf_size=243, n_neighbors=19, weights=distance;, score=0.729 total time=   0.0s
[CV 1/5] END leaf_size=462, n_neighbors=5, weights=distance;, score=0.778 total time=   0.0s
[CV 2/5] END leaf_size=462, n_neighbors=5, weights=distance;, score=0.717 total time=   0.0s
[CV 3/5] END leaf_size=462, n_neighbors=5, weights=distance;, score=0.748 total time=   0.0s
[CV 4/5] END leaf_size=462, n_neighbors=5, weights=distance;, score

In [45]:
# List the best parameters for this dataset
print(random_clf.best_params_)

{'weights': 'distance', 'n_neighbors': 5, 'leaf_size': 493}


In [46]:
# Print the classification report for the best model
grid_y_pred = random_clf.predict(X_test)
print(classification_report(y_test, grid_y_pred))

              precision    recall  f1-score   support

           0       0.78      0.78      0.78       334
           1       0.61      0.60      0.61       190

    accuracy                           0.72       524
   macro avg       0.69      0.69      0.69       524
weighted avg       0.72      0.72      0.72       524



In [47]:
# Make predictions with the hypertuned model
random_tuned_pred = random_clf.predict(X_test)

In [48]:
# Calculate the classification report
print(classification_report(y_test, random_tuned_pred))

              precision    recall  f1-score   support

           0       0.78      0.78      0.78       334
           1       0.61      0.60      0.61       190

    accuracy                           0.72       524
   macro avg       0.69      0.69      0.69       524
weighted avg       0.72      0.72      0.72       524

