# BeigeSage

This notebook uses the Beige Sage model to assign labels to all of the Beige Books chunks in the corpus.

### Reading in data

In [None]:
# Import necessary libraries
import pandas as pd
import os
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import shutil

# Load the sentiment scores CSV
excel_path = r"C:/Users/MCOB PHD 14/Dropbox/Charlie's Dissertation/Beige Books/manual_sentiment.csv"
sentiment_data = pd.read_csv(excel_path)

# Define the label function
def label_sentiment(score):
    if score <= -0.3:
        return 0  # Negative
    elif score <= 0.2:
        return 1  # Mixed
    else:
        return 2  # Positive

# Define path where text files are stored
text_files_dir = r"C:/Users/MCOB PHD 14/Dropbox/Charlie's Dissertation/Beige Books/chunks"

# Load the text files and create a DataFrame
text_data = {}
for filename in os.listdir(text_files_dir):
    if filename.endswith('.txt'):
        file_path = os.path.join(text_files_dir, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            text_data[filename] = file.read()

text_df = pd.DataFrame(list(text_data.items()), columns=['file_names', 'text'])

### Load BeigeSage

In [6]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch

# Define the path where the model and tokenizer are saved
saved_model_path = 'C:/Users/MCOB PHD 14/Desktop/bbFinal/Notebooks/BeigeSage'

# Load the saved tokenizer
tokenizer = RobertaTokenizer.from_pretrained(saved_model_path)

# Load the saved model
model = RobertaForSequenceClassification.from_pretrained(saved_model_path)

# Set the model to evaluation mode
model.eval()


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [None]:
# Define a function to tokenize and predict sentiment
def predict_sentiment_with_status(texts):
    predictions = []
    total_texts = len(texts)
    for idx, text in enumerate(tqdm(texts, desc="Predicting Sentiment", ncols=100)):
        # Tokenize the text
        inputs = tokenizer(
            text,
            return_tensors="pt",       # Return as PyTorch tensors
            truncation=True,           # Truncate longer sequences
            padding='max_length',      # Pad to max length
            max_length=512             # Set maximum length
        )
        
        # Perform prediction
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Get the predicted label
        predicted_label = torch.argmax(outputs.logits, dim=1).item()
        predictions.append(predicted_label)
        
        # Print status update for every 100 texts
        if (idx + 1) % 100 == 0 or (idx + 1) == total_texts:
            print(f"Processed {idx + 1}/{total_texts} texts")

    return predictions

In [11]:
from tqdm import tqdm

# Apply the prediction function to the 'text' column of test_data DataFrame
test_texts = text_df['text'].tolist()  # Convert text column to list
text_df['predicted_label'] = predict_sentiment_with_status(test_texts)

# Step 4: Map the numerical labels back to the class names (optional)
label_map = {0: "Negative", 1: "Mixed", 2: "Positive"}
text_df['predicted_class'] = text_df['predicted_label'].map(label_map)

# Display the DataFrame with predictions
print(text_df[['text', 'predicted_label', 'predicted_class']])

Predicting Sentiment:  87%|████████████████████████████▋    | 25650/29521 [3:22:04<29:19,  2.20it/s]

Processed 25650/29521 texts


Predicting Sentiment:  87%|████████████████████████████▋    | 25660/29521 [3:22:09<28:12,  2.28it/s]

Processed 25660/29521 texts


Predicting Sentiment:  87%|████████████████████████████▋    | 25670/29521 [3:22:13<28:16,  2.27it/s]

Processed 25670/29521 texts


Predicting Sentiment:  87%|████████████████████████████▋    | 25680/29521 [3:22:18<29:18,  2.18it/s]

Processed 25680/29521 texts


Predicting Sentiment:  87%|████████████████████████████▋    | 25690/29521 [3:22:22<28:43,  2.22it/s]

Processed 25690/29521 texts


Predicting Sentiment:  87%|████████████████████████████▋    | 25700/29521 [3:22:27<28:48,  2.21it/s]

Processed 25700/29521 texts


Predicting Sentiment:  87%|████████████████████████████▋    | 25710/29521 [3:22:31<28:52,  2.20it/s]

Processed 25710/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25720/29521 [3:22:36<29:51,  2.12it/s]

Processed 25720/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25730/29521 [3:22:40<28:02,  2.25it/s]

Processed 25730/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25740/29521 [3:22:45<28:25,  2.22it/s]

Processed 25740/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25750/29521 [3:22:49<28:15,  2.22it/s]

Processed 25750/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25760/29521 [3:22:54<28:30,  2.20it/s]

Processed 25760/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25770/29521 [3:22:58<28:04,  2.23it/s]

Processed 25770/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25780/29521 [3:23:03<27:50,  2.24it/s]

Processed 25780/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25790/29521 [3:23:07<27:48,  2.24it/s]

Processed 25790/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25800/29521 [3:23:12<28:05,  2.21it/s]

Processed 25800/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25810/29521 [3:23:16<27:41,  2.23it/s]

Processed 25810/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25820/29521 [3:23:21<28:34,  2.16it/s]

Processed 25820/29521 texts


Predicting Sentiment:  87%|████████████████████████████▊    | 25830/29521 [3:23:26<29:08,  2.11it/s]

Processed 25830/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25840/29521 [3:23:30<28:16,  2.17it/s]

Processed 25840/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25850/29521 [3:23:35<27:05,  2.26it/s]

Processed 25850/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25860/29521 [3:23:39<27:49,  2.19it/s]

Processed 25860/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25870/29521 [3:23:44<27:13,  2.23it/s]

Processed 25870/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25880/29521 [3:23:48<27:14,  2.23it/s]

Processed 25880/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25890/29521 [3:23:53<27:19,  2.21it/s]

Processed 25890/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25900/29521 [3:23:57<27:20,  2.21it/s]

Processed 25900/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25910/29521 [3:24:02<27:20,  2.20it/s]

Processed 25910/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25920/29521 [3:24:06<27:12,  2.21it/s]

Processed 25920/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25930/29521 [3:24:11<26:59,  2.22it/s]

Processed 25930/29521 texts


Predicting Sentiment:  88%|████████████████████████████▉    | 25940/29521 [3:24:15<26:42,  2.23it/s]

Processed 25940/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 25950/29521 [3:24:20<26:58,  2.21it/s]

Processed 25950/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 25960/29521 [3:24:25<27:19,  2.17it/s]

Processed 25960/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 25970/29521 [3:24:29<26:46,  2.21it/s]

Processed 25970/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 25980/29521 [3:24:34<26:37,  2.22it/s]

Processed 25980/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 25990/29521 [3:24:38<26:54,  2.19it/s]

Processed 25990/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 26000/29521 [3:24:43<27:42,  2.12it/s]

Processed 26000/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 26010/29521 [3:24:48<30:59,  1.89it/s]

Processed 26010/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 26020/29521 [3:24:54<32:27,  1.80it/s]

Processed 26020/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 26030/29521 [3:24:59<31:34,  1.84it/s]

Processed 26030/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 26040/29521 [3:25:05<30:30,  1.90it/s]

Processed 26040/29521 texts


Predicting Sentiment:  88%|█████████████████████████████    | 26050/29521 [3:25:10<30:44,  1.88it/s]

Processed 26050/29521 texts


Predicting Sentiment:  88%|█████████████████████████████▏   | 26060/29521 [3:25:15<29:26,  1.96it/s]

Processed 26060/29521 texts


Predicting Sentiment:  88%|█████████████████████████████▏   | 26070/29521 [3:25:20<28:47,  2.00it/s]

Processed 26070/29521 texts


Predicting Sentiment:  88%|█████████████████████████████▏   | 26080/29521 [3:25:25<29:24,  1.95it/s]

Processed 26080/29521 texts


Predicting Sentiment:  88%|█████████████████████████████▏   | 26090/29521 [3:25:30<29:25,  1.94it/s]

Processed 26090/29521 texts


Predicting Sentiment:  88%|█████████████████████████████▏   | 26100/29521 [3:25:35<28:18,  2.01it/s]

Processed 26100/29521 texts


Predicting Sentiment:  88%|█████████████████████████████▏   | 26110/29521 [3:25:40<29:51,  1.90it/s]

Processed 26110/29521 texts


Predicting Sentiment:  88%|█████████████████████████████▏   | 26120/29521 [3:25:45<27:47,  2.04it/s]

Processed 26120/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▏   | 26130/29521 [3:25:50<26:50,  2.11it/s]

Processed 26130/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▏   | 26140/29521 [3:25:55<27:05,  2.08it/s]

Processed 26140/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▏   | 26150/29521 [3:26:00<27:02,  2.08it/s]

Processed 26150/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▏   | 26160/29521 [3:26:04<26:49,  2.09it/s]

Processed 26160/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26170/29521 [3:26:09<26:42,  2.09it/s]

Processed 26170/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26180/29521 [3:26:14<26:21,  2.11it/s]

Processed 26180/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26190/29521 [3:26:19<26:37,  2.08it/s]

Processed 26190/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26200/29521 [3:26:24<26:22,  2.10it/s]

Processed 26200/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26210/29521 [3:26:28<26:20,  2.10it/s]

Processed 26210/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26220/29521 [3:26:33<26:01,  2.11it/s]

Processed 26220/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26230/29521 [3:26:38<26:43,  2.05it/s]

Processed 26230/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26240/29521 [3:26:43<26:50,  2.04it/s]

Processed 26240/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26250/29521 [3:26:48<26:31,  2.06it/s]

Processed 26250/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26260/29521 [3:26:53<26:10,  2.08it/s]

Processed 26260/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▎   | 26270/29521 [3:26:57<26:14,  2.06it/s]

Processed 26270/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26280/29521 [3:27:02<26:03,  2.07it/s]

Processed 26280/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26290/29521 [3:27:07<26:04,  2.07it/s]

Processed 26290/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26300/29521 [3:27:12<25:58,  2.07it/s]

Processed 26300/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26310/29521 [3:27:17<25:40,  2.08it/s]

Processed 26310/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26320/29521 [3:27:22<25:29,  2.09it/s]

Processed 26320/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26330/29521 [3:27:26<25:04,  2.12it/s]

Processed 26330/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26340/29521 [3:27:31<24:54,  2.13it/s]

Processed 26340/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26350/29521 [3:27:36<25:55,  2.04it/s]

Processed 26350/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26360/29521 [3:27:41<25:47,  2.04it/s]

Processed 26360/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26370/29521 [3:27:46<25:09,  2.09it/s]

Processed 26370/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▍   | 26380/29521 [3:27:50<25:16,  2.07it/s]

Processed 26380/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▌   | 26390/29521 [3:27:55<25:01,  2.08it/s]

Processed 26390/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▌   | 26400/29521 [3:28:00<25:20,  2.05it/s]

Processed 26400/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▌   | 26410/29521 [3:28:05<25:49,  2.01it/s]

Processed 26410/29521 texts


Predicting Sentiment:  89%|█████████████████████████████▌   | 26420/29521 [3:28:10<24:45,  2.09it/s]

Processed 26420/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▌   | 26430/29521 [3:28:15<24:43,  2.08it/s]

Processed 26430/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▌   | 26440/29521 [3:28:19<24:03,  2.13it/s]

Processed 26440/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▌   | 26450/29521 [3:28:24<24:21,  2.10it/s]

Processed 26450/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▌   | 26460/29521 [3:28:29<24:13,  2.11it/s]

Processed 26460/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▌   | 26470/29521 [3:28:34<23:51,  2.13it/s]

Processed 26470/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▌   | 26480/29521 [3:28:38<24:00,  2.11it/s]

Processed 26480/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▌   | 26490/29521 [3:28:43<23:24,  2.16it/s]

Processed 26490/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▌   | 26500/29521 [3:28:48<24:18,  2.07it/s]

Processed 26500/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26510/29521 [3:28:53<23:47,  2.11it/s]

Processed 26510/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26520/29521 [3:28:58<24:05,  2.08it/s]

Processed 26520/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26530/29521 [3:29:03<23:33,  2.12it/s]

Processed 26530/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26540/29521 [3:29:07<24:09,  2.06it/s]

Processed 26540/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26550/29521 [3:29:12<23:18,  2.12it/s]

Processed 26550/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26560/29521 [3:29:17<22:30,  2.19it/s]

Processed 26560/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26570/29521 [3:29:21<23:27,  2.10it/s]

Processed 26570/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26580/29521 [3:29:26<23:42,  2.07it/s]

Processed 26580/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26590/29521 [3:29:31<23:32,  2.08it/s]

Processed 26590/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26600/29521 [3:29:36<23:05,  2.11it/s]

Processed 26600/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▋   | 26610/29521 [3:29:41<23:20,  2.08it/s]

Processed 26610/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26620/29521 [3:29:45<22:13,  2.18it/s]

Processed 26620/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26630/29521 [3:29:50<22:37,  2.13it/s]

Processed 26630/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26640/29521 [3:29:55<22:35,  2.13it/s]

Processed 26640/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26650/29521 [3:30:00<22:39,  2.11it/s]

Processed 26650/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26660/29521 [3:30:04<22:25,  2.13it/s]

Processed 26660/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26670/29521 [3:30:09<22:13,  2.14it/s]

Processed 26670/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26680/29521 [3:30:14<22:39,  2.09it/s]

Processed 26680/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26690/29521 [3:30:19<22:30,  2.10it/s]

Processed 26690/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26700/29521 [3:30:23<22:08,  2.12it/s]

Processed 26700/29521 texts


Predicting Sentiment:  90%|█████████████████████████████▊   | 26710/29521 [3:30:28<22:16,  2.10it/s]

Processed 26710/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▊   | 26720/29521 [3:30:33<22:08,  2.11it/s]

Processed 26720/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26730/29521 [3:30:38<22:01,  2.11it/s]

Processed 26730/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26740/29521 [3:30:43<21:58,  2.11it/s]

Processed 26740/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26750/29521 [3:30:48<22:21,  2.07it/s]

Processed 26750/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26760/29521 [3:30:52<22:18,  2.06it/s]

Processed 26760/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26770/29521 [3:30:57<21:28,  2.13it/s]

Processed 26770/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26780/29521 [3:31:02<21:47,  2.10it/s]

Processed 26780/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26790/29521 [3:31:07<21:47,  2.09it/s]

Processed 26790/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26800/29521 [3:31:11<21:28,  2.11it/s]

Processed 26800/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26810/29521 [3:31:16<21:22,  2.11it/s]

Processed 26810/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26820/29521 [3:31:21<21:26,  2.10it/s]

Processed 26820/29521 texts


Predicting Sentiment:  91%|█████████████████████████████▉   | 26830/29521 [3:31:26<21:08,  2.12it/s]

Processed 26830/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26840/29521 [3:31:30<20:40,  2.16it/s]

Processed 26840/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26850/29521 [3:31:35<21:06,  2.11it/s]

Processed 26850/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26860/29521 [3:31:40<20:49,  2.13it/s]

Processed 26860/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26870/29521 [3:31:45<20:42,  2.13it/s]

Processed 26870/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26880/29521 [3:31:49<20:58,  2.10it/s]

Processed 26880/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26890/29521 [3:31:54<20:56,  2.09it/s]

Processed 26890/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26900/29521 [3:31:59<20:51,  2.09it/s]

Processed 26900/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26910/29521 [3:32:04<21:12,  2.05it/s]

Processed 26910/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26920/29521 [3:32:09<21:43,  2.00it/s]

Processed 26920/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26930/29521 [3:32:14<21:38,  2.00it/s]

Processed 26930/29521 texts


Predicting Sentiment:  91%|██████████████████████████████   | 26940/29521 [3:32:19<22:24,  1.92it/s]

Processed 26940/29521 texts


Predicting Sentiment:  91%|██████████████████████████████▏  | 26950/29521 [3:32:24<20:23,  2.10it/s]

Processed 26950/29521 texts


Predicting Sentiment:  91%|██████████████████████████████▏  | 26960/29521 [3:32:29<21:17,  2.01it/s]

Processed 26960/29521 texts


Predicting Sentiment:  91%|██████████████████████████████▏  | 26970/29521 [3:32:34<21:30,  1.98it/s]

Processed 26970/29521 texts


Predicting Sentiment:  91%|██████████████████████████████▏  | 26980/29521 [3:32:39<20:39,  2.05it/s]

Processed 26980/29521 texts


Predicting Sentiment:  91%|██████████████████████████████▏  | 26990/29521 [3:32:44<20:23,  2.07it/s]

Processed 26990/29521 texts


Predicting Sentiment:  91%|██████████████████████████████▏  | 27000/29521 [3:32:49<20:26,  2.06it/s]

Processed 27000/29521 texts


Predicting Sentiment:  91%|██████████████████████████████▏  | 27010/29521 [3:32:54<20:27,  2.05it/s]

Processed 27010/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▏  | 27020/29521 [3:32:58<19:55,  2.09it/s]

Processed 27020/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▏  | 27030/29521 [3:33:03<19:31,  2.13it/s]

Processed 27030/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▏  | 27040/29521 [3:33:08<19:54,  2.08it/s]

Processed 27040/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▏  | 27050/29521 [3:33:13<19:51,  2.07it/s]

Processed 27050/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▏  | 27060/29521 [3:33:18<20:11,  2.03it/s]

Processed 27060/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27070/29521 [3:33:23<20:25,  2.00it/s]

Processed 27070/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27080/29521 [3:33:28<19:59,  2.04it/s]

Processed 27080/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27090/29521 [3:33:33<20:11,  2.01it/s]

Processed 27090/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27100/29521 [3:33:38<19:40,  2.05it/s]

Processed 27100/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27110/29521 [3:33:43<19:24,  2.07it/s]

Processed 27110/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27120/29521 [3:33:47<20:30,  1.95it/s]

Processed 27120/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27130/29521 [3:33:53<20:20,  1.96it/s]

Processed 27130/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27140/29521 [3:33:58<19:20,  2.05it/s]

Processed 27140/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27150/29521 [3:34:02<19:05,  2.07it/s]

Processed 27150/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27160/29521 [3:34:07<19:00,  2.07it/s]

Processed 27160/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▎  | 27170/29521 [3:34:12<18:31,  2.11it/s]

Processed 27170/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27180/29521 [3:34:17<18:46,  2.08it/s]

Processed 27180/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27190/29521 [3:34:22<18:38,  2.08it/s]

Processed 27190/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27200/29521 [3:34:26<18:28,  2.09it/s]

Processed 27200/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27210/29521 [3:34:31<17:44,  2.17it/s]

Processed 27210/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27220/29521 [3:34:36<17:59,  2.13it/s]

Processed 27220/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27230/29521 [3:34:41<18:06,  2.11it/s]

Processed 27230/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27240/29521 [3:34:45<17:44,  2.14it/s]

Processed 27240/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27250/29521 [3:34:50<17:41,  2.14it/s]

Processed 27250/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27260/29521 [3:34:55<18:03,  2.09it/s]

Processed 27260/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27270/29521 [3:35:00<17:43,  2.12it/s]

Processed 27270/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▍  | 27280/29521 [3:35:04<17:36,  2.12it/s]

Processed 27280/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▌  | 27290/29521 [3:35:09<17:18,  2.15it/s]

Processed 27290/29521 texts


Predicting Sentiment:  92%|██████████████████████████████▌  | 27300/29521 [3:35:14<17:41,  2.09it/s]

Processed 27300/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27310/29521 [3:35:19<17:38,  2.09it/s]

Processed 27310/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27320/29521 [3:35:23<17:28,  2.10it/s]

Processed 27320/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27330/29521 [3:35:28<16:52,  2.16it/s]

Processed 27330/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27340/29521 [3:35:33<17:01,  2.14it/s]

Processed 27340/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27350/29521 [3:35:38<17:37,  2.05it/s]

Processed 27350/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27360/29521 [3:35:42<17:10,  2.10it/s]

Processed 27360/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27370/29521 [3:35:47<17:58,  1.99it/s]

Processed 27370/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27380/29521 [3:35:52<16:56,  2.11it/s]

Processed 27380/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▌  | 27390/29521 [3:35:57<17:07,  2.07it/s]

Processed 27390/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27400/29521 [3:36:02<16:50,  2.10it/s]

Processed 27400/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27410/29521 [3:36:06<16:32,  2.13it/s]

Processed 27410/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27420/29521 [3:36:11<16:45,  2.09it/s]

Processed 27420/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27430/29521 [3:36:16<16:33,  2.10it/s]

Processed 27430/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27440/29521 [3:36:21<16:12,  2.14it/s]

Processed 27440/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27450/29521 [3:36:26<16:35,  2.08it/s]

Processed 27450/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27460/29521 [3:36:30<16:14,  2.11it/s]

Processed 27460/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27470/29521 [3:36:35<15:47,  2.16it/s]

Processed 27470/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27480/29521 [3:36:40<16:48,  2.02it/s]

Processed 27480/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27490/29521 [3:36:45<16:11,  2.09it/s]

Processed 27490/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▋  | 27500/29521 [3:36:50<16:08,  2.09it/s]

Processed 27500/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27510/29521 [3:36:54<16:04,  2.08it/s]

Processed 27510/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27520/29521 [3:36:59<15:41,  2.12it/s]

Processed 27520/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27530/29521 [3:37:04<15:34,  2.13it/s]

Processed 27530/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27540/29521 [3:37:08<15:17,  2.16it/s]

Processed 27540/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27550/29521 [3:37:13<15:25,  2.13it/s]

Processed 27550/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27560/29521 [3:37:18<15:21,  2.13it/s]

Processed 27560/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27570/29521 [3:37:23<15:37,  2.08it/s]

Processed 27570/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27580/29521 [3:37:28<15:27,  2.09it/s]

Processed 27580/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27590/29521 [3:37:32<14:53,  2.16it/s]

Processed 27590/29521 texts


Predicting Sentiment:  93%|██████████████████████████████▊  | 27600/29521 [3:37:37<14:58,  2.14it/s]

Processed 27600/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▊  | 27610/29521 [3:37:42<15:33,  2.05it/s]

Processed 27610/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▊  | 27620/29521 [3:37:46<15:13,  2.08it/s]

Processed 27620/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27630/29521 [3:37:51<15:15,  2.07it/s]

Processed 27630/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27640/29521 [3:37:56<14:45,  2.12it/s]

Processed 27640/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27650/29521 [3:38:01<14:46,  2.11it/s]

Processed 27650/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27660/29521 [3:38:06<14:53,  2.08it/s]

Processed 27660/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27670/29521 [3:38:10<14:34,  2.12it/s]

Processed 27670/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27680/29521 [3:38:15<14:42,  2.09it/s]

Processed 27680/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27690/29521 [3:38:20<14:36,  2.09it/s]

Processed 27690/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27700/29521 [3:38:25<14:42,  2.06it/s]

Processed 27700/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27710/29521 [3:38:30<14:31,  2.08it/s]

Processed 27710/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27720/29521 [3:38:35<14:17,  2.10it/s]

Processed 27720/29521 texts


Predicting Sentiment:  94%|██████████████████████████████▉  | 27730/29521 [3:38:40<14:26,  2.07it/s]

Processed 27730/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27740/29521 [3:38:44<14:32,  2.04it/s]

Processed 27740/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27750/29521 [3:38:49<14:25,  2.05it/s]

Processed 27750/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27760/29521 [3:38:54<14:03,  2.09it/s]

Processed 27760/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27770/29521 [3:38:59<14:11,  2.06it/s]

Processed 27770/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27780/29521 [3:39:04<14:11,  2.04it/s]

Processed 27780/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27790/29521 [3:39:09<13:46,  2.09it/s]

Processed 27790/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27800/29521 [3:39:13<13:59,  2.05it/s]

Processed 27800/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27810/29521 [3:39:18<13:36,  2.10it/s]

Processed 27810/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27820/29521 [3:39:23<13:21,  2.12it/s]

Processed 27820/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27830/29521 [3:39:28<13:28,  2.09it/s]

Processed 27830/29521 texts


Predicting Sentiment:  94%|███████████████████████████████  | 27840/29521 [3:39:32<13:06,  2.14it/s]

Processed 27840/29521 texts


Predicting Sentiment:  94%|███████████████████████████████▏ | 27850/29521 [3:39:37<13:04,  2.13it/s]

Processed 27850/29521 texts


Predicting Sentiment:  94%|███████████████████████████████▏ | 27860/29521 [3:39:42<12:52,  2.15it/s]

Processed 27860/29521 texts


Predicting Sentiment:  94%|███████████████████████████████▏ | 27870/29521 [3:39:47<13:05,  2.10it/s]

Processed 27870/29521 texts


Predicting Sentiment:  94%|███████████████████████████████▏ | 27880/29521 [3:39:51<12:37,  2.17it/s]

Processed 27880/29521 texts


Predicting Sentiment:  94%|███████████████████████████████▏ | 27890/29521 [3:39:56<12:44,  2.13it/s]

Processed 27890/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▏ | 27900/29521 [3:40:01<12:51,  2.10it/s]

Processed 27900/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▏ | 27910/29521 [3:40:06<12:43,  2.11it/s]

Processed 27910/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▏ | 27920/29521 [3:40:10<12:20,  2.16it/s]

Processed 27920/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▏ | 27930/29521 [3:40:15<12:30,  2.12it/s]

Processed 27930/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▏ | 27940/29521 [3:40:20<12:40,  2.08it/s]

Processed 27940/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▏ | 27950/29521 [3:40:25<11:57,  2.19it/s]

Processed 27950/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 27960/29521 [3:40:30<13:23,  1.94it/s]

Processed 27960/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 27970/29521 [3:40:34<12:21,  2.09it/s]

Processed 27970/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 27980/29521 [3:40:39<11:55,  2.15it/s]

Processed 27980/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 27990/29521 [3:40:44<12:10,  2.10it/s]

Processed 27990/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 28000/29521 [3:40:49<12:25,  2.04it/s]

Processed 28000/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 28010/29521 [3:40:53<11:59,  2.10it/s]

Processed 28010/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 28020/29521 [3:40:58<12:03,  2.08it/s]

Processed 28020/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 28030/29521 [3:41:03<11:42,  2.12it/s]

Processed 28030/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 28040/29521 [3:41:08<11:26,  2.16it/s]

Processed 28040/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 28050/29521 [3:41:12<11:19,  2.17it/s]

Processed 28050/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▎ | 28060/29521 [3:41:17<11:36,  2.10it/s]

Processed 28060/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28070/29521 [3:41:22<11:10,  2.16it/s]

Processed 28070/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28080/29521 [3:41:27<11:22,  2.11it/s]

Processed 28080/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28090/29521 [3:41:31<11:17,  2.11it/s]

Processed 28090/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28100/29521 [3:41:36<11:21,  2.08it/s]

Processed 28100/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28110/29521 [3:41:41<11:09,  2.11it/s]

Processed 28110/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28120/29521 [3:41:46<11:10,  2.09it/s]

Processed 28120/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28130/29521 [3:41:50<10:47,  2.15it/s]

Processed 28130/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28140/29521 [3:41:55<10:37,  2.17it/s]

Processed 28140/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28150/29521 [3:42:00<10:49,  2.11it/s]

Processed 28150/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28160/29521 [3:42:04<10:47,  2.10it/s]

Processed 28160/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▍ | 28170/29521 [3:42:09<10:20,  2.18it/s]

Processed 28170/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▌ | 28180/29521 [3:42:14<10:22,  2.16it/s]

Processed 28180/29521 texts


Predicting Sentiment:  95%|███████████████████████████████▌ | 28190/29521 [3:42:18<10:24,  2.13it/s]

Processed 28190/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28200/29521 [3:42:23<10:26,  2.11it/s]

Processed 28200/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28210/29521 [3:42:28<10:23,  2.10it/s]

Processed 28210/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28220/29521 [3:42:32<09:55,  2.19it/s]

Processed 28220/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28230/29521 [3:42:37<09:48,  2.19it/s]

Processed 28230/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28240/29521 [3:42:42<09:48,  2.18it/s]

Processed 28240/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28250/29521 [3:42:46<09:51,  2.15it/s]

Processed 28250/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28260/29521 [3:42:51<09:46,  2.15it/s]

Processed 28260/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28270/29521 [3:42:56<09:53,  2.11it/s]

Processed 28270/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28280/29521 [3:43:00<10:00,  2.07it/s]

Processed 28280/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▌ | 28290/29521 [3:43:05<09:42,  2.11it/s]

Processed 28290/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28300/29521 [3:43:10<09:29,  2.14it/s]

Processed 28300/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28310/29521 [3:43:15<10:05,  2.00it/s]

Processed 28310/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28320/29521 [3:43:20<10:21,  1.93it/s]

Processed 28320/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28330/29521 [3:43:25<09:31,  2.09it/s]

Processed 28330/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28340/29521 [3:43:31<15:57,  1.23it/s]

Processed 28340/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28350/29521 [3:43:36<09:28,  2.06it/s]

Processed 28350/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28360/29521 [3:43:41<09:08,  2.12it/s]

Processed 28360/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28370/29521 [3:43:46<09:11,  2.09it/s]

Processed 28370/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28380/29521 [3:43:51<08:58,  2.12it/s]

Processed 28380/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28390/29521 [3:43:55<08:49,  2.14it/s]

Processed 28390/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▋ | 28400/29521 [3:44:00<09:07,  2.05it/s]

Processed 28400/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▊ | 28410/29521 [3:44:05<08:38,  2.14it/s]

Processed 28410/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▊ | 28420/29521 [3:44:10<08:43,  2.10it/s]

Processed 28420/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▊ | 28430/29521 [3:44:14<08:42,  2.09it/s]

Processed 28430/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▊ | 28440/29521 [3:44:19<08:16,  2.18it/s]

Processed 28440/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▊ | 28450/29521 [3:44:24<08:01,  2.23it/s]

Processed 28450/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▊ | 28460/29521 [3:44:28<08:06,  2.18it/s]

Processed 28460/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▊ | 28470/29521 [3:44:33<08:07,  2.16it/s]

Processed 28470/29521 texts


Predicting Sentiment:  96%|███████████████████████████████▊ | 28480/29521 [3:44:38<08:02,  2.16it/s]

Processed 28480/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▊ | 28490/29521 [3:44:42<08:10,  2.10it/s]

Processed 28490/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▊ | 28500/29521 [3:44:47<07:51,  2.16it/s]

Processed 28500/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▊ | 28510/29521 [3:44:52<07:54,  2.13it/s]

Processed 28510/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28520/29521 [3:44:56<07:47,  2.14it/s]

Processed 28520/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28530/29521 [3:45:01<07:43,  2.14it/s]

Processed 28530/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28540/29521 [3:45:06<07:38,  2.14it/s]

Processed 28540/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28550/29521 [3:45:11<07:41,  2.10it/s]

Processed 28550/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28560/29521 [3:45:15<07:34,  2.12it/s]

Processed 28560/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28570/29521 [3:45:20<07:29,  2.12it/s]

Processed 28570/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28580/29521 [3:45:25<07:32,  2.08it/s]

Processed 28580/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28590/29521 [3:45:30<07:30,  2.07it/s]

Processed 28590/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28600/29521 [3:45:34<07:21,  2.09it/s]

Processed 28600/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28610/29521 [3:45:39<07:07,  2.13it/s]

Processed 28610/29521 texts


Predicting Sentiment:  97%|███████████████████████████████▉ | 28620/29521 [3:45:44<07:08,  2.10it/s]

Processed 28620/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28630/29521 [3:45:49<07:03,  2.10it/s]

Processed 28630/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28640/29521 [3:45:54<07:00,  2.10it/s]

Processed 28640/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28650/29521 [3:45:58<07:08,  2.03it/s]

Processed 28650/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28660/29521 [3:46:03<06:39,  2.16it/s]

Processed 28660/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28670/29521 [3:46:08<06:39,  2.13it/s]

Processed 28670/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28680/29521 [3:46:12<06:36,  2.12it/s]

Processed 28680/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28690/29521 [3:46:17<06:14,  2.22it/s]

Processed 28690/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28700/29521 [3:46:22<06:23,  2.14it/s]

Processed 28700/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28710/29521 [3:46:26<06:19,  2.13it/s]

Processed 28710/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28720/29521 [3:46:31<06:15,  2.13it/s]

Processed 28720/29521 texts


Predicting Sentiment:  97%|████████████████████████████████ | 28730/29521 [3:46:36<06:18,  2.09it/s]

Processed 28730/29521 texts


Predicting Sentiment:  97%|████████████████████████████████▏| 28740/29521 [3:46:40<05:53,  2.21it/s]

Processed 28740/29521 texts


Predicting Sentiment:  97%|████████████████████████████████▏| 28750/29521 [3:46:45<06:04,  2.12it/s]

Processed 28750/29521 texts


Predicting Sentiment:  97%|████████████████████████████████▏| 28760/29521 [3:46:50<06:36,  1.92it/s]

Processed 28760/29521 texts


Predicting Sentiment:  97%|████████████████████████████████▏| 28770/29521 [3:46:55<06:31,  1.92it/s]

Processed 28770/29521 texts


Predicting Sentiment:  97%|████████████████████████████████▏| 28780/29521 [3:47:00<06:19,  1.95it/s]

Processed 28780/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▏| 28790/29521 [3:47:05<06:01,  2.02it/s]

Processed 28790/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▏| 28800/29521 [3:47:12<07:28,  1.61it/s]

Processed 28800/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▏| 28810/29521 [3:47:17<05:48,  2.04it/s]

Processed 28810/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▏| 28820/29521 [3:47:22<05:32,  2.11it/s]

Processed 28820/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▏| 28830/29521 [3:47:27<05:36,  2.06it/s]

Processed 28830/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▏| 28840/29521 [3:47:32<05:27,  2.08it/s]

Processed 28840/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▏| 28850/29521 [3:47:36<05:20,  2.09it/s]

Processed 28850/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28860/29521 [3:47:41<05:21,  2.05it/s]

Processed 28860/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28870/29521 [3:47:46<05:05,  2.13it/s]

Processed 28870/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28880/29521 [3:47:51<05:06,  2.09it/s]

Processed 28880/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28890/29521 [3:47:56<04:56,  2.13it/s]

Processed 28890/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28900/29521 [3:48:01<04:56,  2.10it/s]

Processed 28900/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28910/29521 [3:48:05<04:54,  2.08it/s]

Processed 28910/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28920/29521 [3:48:10<04:48,  2.09it/s]

Processed 28920/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28930/29521 [3:48:15<04:42,  2.09it/s]

Processed 28930/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28940/29521 [3:48:20<04:32,  2.13it/s]

Processed 28940/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28950/29521 [3:48:25<04:28,  2.13it/s]

Processed 28950/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▎| 28960/29521 [3:48:29<04:30,  2.07it/s]

Processed 28960/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 28970/29521 [3:48:34<04:18,  2.13it/s]

Processed 28970/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 28980/29521 [3:48:39<04:26,  2.03it/s]

Processed 28980/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 28990/29521 [3:48:44<04:01,  2.20it/s]

Processed 28990/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 29000/29521 [3:48:48<03:56,  2.20it/s]

Processed 29000/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 29010/29521 [3:48:53<03:59,  2.14it/s]

Processed 29010/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 29020/29521 [3:48:58<03:49,  2.18it/s]

Processed 29020/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 29030/29521 [3:49:02<03:48,  2.15it/s]

Processed 29030/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 29040/29521 [3:49:07<04:16,  1.87it/s]

Processed 29040/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 29050/29521 [3:49:12<03:27,  2.27it/s]

Processed 29050/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 29060/29521 [3:49:16<03:28,  2.21it/s]

Processed 29060/29521 texts


Predicting Sentiment:  98%|████████████████████████████████▍| 29070/29521 [3:49:21<03:27,  2.18it/s]

Processed 29070/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29080/29521 [3:49:26<03:28,  2.12it/s]

Processed 29080/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29090/29521 [3:49:30<03:16,  2.19it/s]

Processed 29090/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29100/29521 [3:49:35<03:01,  2.32it/s]

Processed 29100/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29110/29521 [3:49:39<03:08,  2.18it/s]

Processed 29110/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29120/29521 [3:49:44<03:04,  2.17it/s]

Processed 29120/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29130/29521 [3:49:48<02:59,  2.18it/s]

Processed 29130/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29140/29521 [3:49:53<02:54,  2.18it/s]

Processed 29140/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29150/29521 [3:49:57<02:52,  2.16it/s]

Processed 29150/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29160/29521 [3:50:02<02:43,  2.21it/s]

Processed 29160/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29170/29521 [3:50:07<02:47,  2.09it/s]

Processed 29170/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▌| 29180/29521 [3:50:12<02:39,  2.13it/s]

Processed 29180/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29190/29521 [3:50:16<02:35,  2.13it/s]

Processed 29190/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29200/29521 [3:50:21<02:33,  2.09it/s]

Processed 29200/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29210/29521 [3:50:26<02:26,  2.12it/s]

Processed 29210/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29220/29521 [3:50:30<02:20,  2.14it/s]

Processed 29220/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29230/29521 [3:50:35<02:15,  2.15it/s]

Processed 29230/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29240/29521 [3:50:40<02:09,  2.17it/s]

Processed 29240/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29250/29521 [3:50:44<02:03,  2.20it/s]

Processed 29250/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29260/29521 [3:50:49<02:00,  2.17it/s]

Processed 29260/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29270/29521 [3:50:54<01:54,  2.19it/s]

Processed 29270/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29280/29521 [3:50:58<01:52,  2.15it/s]

Processed 29280/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▋| 29290/29521 [3:51:03<01:47,  2.15it/s]

Processed 29290/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▊| 29300/29521 [3:51:08<01:41,  2.17it/s]

Processed 29300/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▊| 29310/29521 [3:51:12<01:40,  2.10it/s]

Processed 29310/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▊| 29320/29521 [3:51:17<01:33,  2.15it/s]

Processed 29320/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▊| 29330/29521 [3:51:22<01:31,  2.10it/s]

Processed 29330/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▊| 29340/29521 [3:51:27<01:26,  2.08it/s]

Processed 29340/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▊| 29350/29521 [3:51:31<01:18,  2.17it/s]

Processed 29350/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▊| 29360/29521 [3:51:36<01:14,  2.15it/s]

Processed 29360/29521 texts


Predicting Sentiment:  99%|████████████████████████████████▊| 29370/29521 [3:51:41<01:09,  2.17it/s]

Processed 29370/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▊| 29380/29521 [3:51:45<01:04,  2.18it/s]

Processed 29380/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▊| 29390/29521 [3:51:50<01:03,  2.08it/s]

Processed 29390/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▊| 29400/29521 [3:51:55<00:58,  2.07it/s]

Processed 29400/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29410/29521 [3:51:59<00:52,  2.10it/s]

Processed 29410/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29420/29521 [3:52:04<00:47,  2.11it/s]

Processed 29420/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29430/29521 [3:52:09<00:43,  2.08it/s]

Processed 29430/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29440/29521 [3:52:14<00:38,  2.09it/s]

Processed 29440/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29450/29521 [3:52:18<00:33,  2.10it/s]

Processed 29450/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29460/29521 [3:52:23<00:29,  2.10it/s]

Processed 29460/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29470/29521 [3:52:28<00:24,  2.07it/s]

Processed 29470/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29480/29521 [3:52:33<00:20,  2.03it/s]

Processed 29480/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29490/29521 [3:52:38<00:14,  2.11it/s]

Processed 29490/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29500/29521 [3:52:42<00:09,  2.15it/s]

Processed 29500/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29510/29521 [3:52:47<00:05,  2.16it/s]

Processed 29510/29521 texts


Predicting Sentiment: 100%|████████████████████████████████▉| 29520/29521 [3:52:52<00:00,  2.11it/s]

Processed 29520/29521 texts


Predicting Sentiment: 100%|█████████████████████████████████| 29521/29521 [3:52:52<00:00,  2.11it/s]

Processed 29521/29521 texts
                                                    text  predicted_label  \
0      December 9 , 1970 The economy of the Southeast...                1   
1      be dropping 300 employees by the end of the ye...                0   
2      percent above the year-ago level . This prompt...                1   
3      lowering of interest rates '' , '' improving c...                1   
4      November 11 , 1970 Our directors report a weak...                0   
...                                                  ...              ...   
29516  Beige Book Report : St Louis January 17 , 2024...                1   
29517  modestly since our previous report . Multiple ...                1   
29518  numbers in early 2024 . Manufacturing Manufact...                2   
29519  Rock metros . In the Louisville metro area , y...                1   
29520  expanded significantly compared with 2022 , re...                0   

      predicted_class  
0               Mixed  




In [None]:
df_no_text = text_df.drop(columns=['text'])
#df_no_text.to_csv('sentiment_all_Beige_Book_chunks.csv', index=False)

Unnamed: 0,file_names,predicted_label,predicted_class
0,1970_at (1)_chunk_1.txt,1,Mixed
1,1970_at (1)_chunk_2.txt,0,Negative
2,1970_at (1)_chunk_3.txt,1,Mixed
3,1970_at (1)_chunk_4.txt,1,Mixed
4,1970_at (2)_chunk_1.txt,0,Negative
...,...,...,...
29516,2024_sl (8)_chunk_1.txt,1,Mixed
29517,2024_sl (8)_chunk_2.txt,1,Mixed
29518,2024_sl (8)_chunk_3.txt,2,Positive
29519,2024_sl (8)_chunk_4.txt,1,Mixed


In [17]:
# Aggregate by month

# Step 1: Extract Year and Report Number
df_no_text["Year"] = df_no_text["file_names"].str.extract(r"(\d{4})").astype(int)
df_no_text["Report_Number"] = df_no_text["file_names"].str.extract(r"\((\d+)\)").astype(int)

# Step 2: Map Report Numbers to Months
    # Got months from Fed website and put them in a spreadsheet
    # See C:\Users\MCOB PHD 14\Desktop\bbFinal\Notebooks\BB_publication_months.xlsx
def get_month(year, report_number):
    # Define mappings for report numbers to months
    if 1970 <= year <= 1978:
        month_mapping = {1: "12", 2: "11", 3: "10", 4: "09", 5: "08", 6: "07", 7: "06", 8: "05", 9: "04", 
                         10: "03", 11: "02", 12: "01"}
    elif year == 1979:
        month_mapping = {1: "11", 2: "10", 3: "09", 4: "08", 5: "07", 6: "05", 7: "04", 8: "03", 9: "01"}
    elif year == 1980:
        month_mapping = {1: "12", 2: "11", 3: "10", 4: "09", 5: "08", 6: "07", 7: "05", 8: "04", 9: "03", 10: "01"}
    elif year in {1981, 2017, 2023}:
        month_mapping = {1: "11", 2: "10", 3: "09", 4: "07", 5: "05", 6: "04", 7: "03", 8: "01"}
    elif year == 2012:
        month_mapping = {1: "11", 2: "10", 3: "08", 4: "07", 5: "06", 6: "04", 7: "02", 8: "01"}
    elif year in {2018, 2020, 2024}:
        month_mapping = {1: "12", 2: "10", 3: "09", 4: "07", 5: "05", 6: "04", 7: "03", 8: "01"}
    elif year in {2002, 2003, 2005, 2006, 2007, 2011, 2016, 2019, 2022}:
        month_mapping = {1: "11", 2: "10", 3: "09", 4: "07", 5: "06", 6: "04", 7: "03", 8: "01"}
    elif year in {2004, 2008, 2009, 2010, 2013, 2014, 2015, 2021}:
        month_mapping = {1: "12", 2: "10", 3: "09", 4: "07", 5: "06", 6: "04", 7: "03", 8: "01"}
    elif year in {1988, 2001}:
        month_mapping = {1: "11", 2: "10", 3: "09", 4: "08", 5: "06", 6: "05", 7: "03", 8: "01"}
    elif year == 1983:
        month_mapping = {1: "12", 2: "11", 3: "09", 4: "08", 5: "07", 6: "05", 7: "03", 8: "02"}
    elif year in {1984, 1985, 1986, 1987, 1990, 1991, 1996, 1997}:
        month_mapping = {1: "12", 2: "10", 3: "09", 4: "08", 5: "06", 6: "05", 7: "03", 8: "01"}
    else:
        month_mapping = {1: "12", 2: "11", 3: "09", 4: "08", 5: "06", 6: "05", 7: "03", 8: "01"}
    return month_mapping.get(report_number, "Unknown")

df_no_text["Month"] = df_no_text.apply(lambda row: get_month(row["Year"], row["Report_Number"]), axis=1)

# Step 3: Convert Year and Month to Date-Time Format
df_no_text["DATE"] = df_no_text.apply(lambda row: f"{row['Year']}-{row['Month']}-01", axis=1)
df_no_text["DATE"] = pd.to_datetime(df_no_text["DATE"], errors="coerce")

df_no_text.head()


Unnamed: 0,file_names,predicted_label,predicted_class,Year,Report_Number,Month,DATE
0,1970_at (1)_chunk_1.txt,1,Mixed,1970,1,12,1970-12-01
1,1970_at (1)_chunk_2.txt,0,Negative,1970,1,12,1970-12-01
2,1970_at (1)_chunk_3.txt,1,Mixed,1970,1,12,1970-12-01
3,1970_at (1)_chunk_4.txt,1,Mixed,1970,1,12,1970-12-01
4,1970_at (2)_chunk_1.txt,0,Negative,1970,2,11,1970-11-01


In [24]:
df_no_text.head()

Unnamed: 0,file_names,predicted_label,predicted_class,Year,Report_Number,Month,DATE
0,1970_at (1)_chunk_1.txt,1,Mixed,1970,1,12,1970-12-01
1,1970_at (1)_chunk_2.txt,0,Negative,1970,1,12,1970-12-01
2,1970_at (1)_chunk_3.txt,1,Mixed,1970,1,12,1970-12-01
3,1970_at (1)_chunk_4.txt,1,Mixed,1970,1,12,1970-12-01
4,1970_at (2)_chunk_1.txt,0,Negative,1970,2,11,1970-11-01


In [33]:
# Step 2: Aggregate counts by Year, Month, DATE, and predicted_class
aggregated = (
    df_no_text.groupby(['Year', 'Month', 'DATE', 'predicted_class'])
    .size()
    .reset_index(name='class_count')
)

# Step 3: Calculate total counts per month
total_counts = (
    df_no_text.groupby(['Year', 'Month', 'DATE'])
    .size()
    .reset_index(name='total_count')
)

# Step 4: Merge total counts with the sentiment class counts
merged = pd.merge(aggregated, total_counts, on=['Year', 'Month', 'DATE'])

# Step 5: Pivot table to create separate columns for each sentiment class
pivoted = merged.pivot_table(
    index=['Year', 'Month', 'DATE', 'total_count'],  # Include total_count in the pivot table
    columns='predicted_class',
    values='class_count',
    fill_value=0
).reset_index()

# Step 6: Rename columns for clarity
pivoted.rename(columns={
    'Negative': 'count_negative',
    'Positive': 'count_positive',
    'Mixed': 'count_mixed'
}, inplace=True)

# Step 7: Calculate fractions (proportions) of each sentiment class
pivoted['fraction_negative'] = pivoted['count_negative'] / pivoted['total_count']
pivoted['fraction_positive'] = pivoted['count_positive'] / pivoted['total_count']
pivoted['fraction_mixed'] = pivoted['count_mixed'] / pivoted['total_count']

# Step 8: Calculate the sentiment scores
# 1. Fraction of negative instances divided by fraction of positive instances
pivoted['negative_positive_ratio'] = (
    pivoted['fraction_negative'] / pivoted['fraction_positive']
)

# 2. Normalized Sentiment Index
pivoted['normalized_sentiment'] = (
    pivoted['fraction_positive'] - pivoted['fraction_negative']
) / (pivoted['fraction_positive'] + pivoted['fraction_negative'] + pivoted['fraction_mixed'])

# Handle potential division by zero (e.g., if there are no positive instances)
pivoted.replace([float('inf'), -float('inf')], float('nan'), inplace=True)

# Display the final DataFrame
pivoted


predicted_class,Year,Month,DATE,total_count,count_mixed,count_negative,count_positive,fraction_negative,fraction_positive,fraction_mixed,negative_positive_ratio,normalized_sentiment
0,1970,05,1970-05-01,42,10.0,30.0,2.0,0.714286,0.047619,0.238095,15.000000,-0.666667
1,1970,06,1970-06-01,49,12.0,31.0,6.0,0.632653,0.122449,0.244898,5.166667,-0.510204
2,1970,07,1970-07-01,45,16.0,27.0,2.0,0.600000,0.044444,0.355556,13.500000,-0.555556
3,1970,08,1970-08-01,43,15.0,24.0,4.0,0.558140,0.093023,0.348837,6.000000,-0.465116
4,1970,09,1970-09-01,42,18.0,13.0,11.0,0.309524,0.261905,0.428571,1.181818,-0.047619
...,...,...,...,...,...,...,...,...,...,...,...,...
465,2023,11,2023-11-01,74,35.0,32.0,7.0,0.432432,0.094595,0.472973,4.571429,-0.337838
466,2024,01,2024-01-01,74,36.0,18.0,20.0,0.243243,0.270270,0.486486,0.900000,0.027027
467,2024,03,2024-03-01,75,46.0,13.0,16.0,0.173333,0.213333,0.613333,0.812500,0.040000
468,2024,04,2024-04-01,78,44.0,12.0,22.0,0.153846,0.282051,0.564103,0.545455,0.128205


In [41]:
# Drop predicted_class, Year, Month
final_df = pivoted.drop(columns=['Year', 'Month'])

# Save the final DataFrame to a CSV file
final_df.to_csv('sentiment_all_Beige_Book_chunks.csv', index=False)


In [42]:
final_df.describe()

predicted_class,DATE,total_count,count_mixed,count_negative,count_positive,fraction_negative,fraction_positive,fraction_mixed,negative_positive_ratio,normalized_sentiment
count,470,470.0,470.0,470.0,470.0,470.0,470.0,470.0,468.0,470.0
mean,1995-08-24 14:27:03.829787264,62.810638,24.065957,12.861702,25.882979,0.21217,0.408085,0.379744,2.121734,0.195915
min,1970-05-01 00:00:00,40.0,4.0,0.0,0.0,0.0,0.0,0.086957,0.0,-0.805195
25%,1980-08-08 18:00:00,53.0,17.0,5.0,13.25,0.084746,0.245283,0.303628,0.153263,-0.045995
50%,1995-01-30 12:00:00,60.0,22.0,9.0,26.0,0.150342,0.438262,0.37037,0.348913,0.284367
75%,2009-09-23 12:00:00,74.0,29.0,17.0,36.75,0.285714,0.581208,0.460526,1.181818,0.490296
max,2024-05-01 00:00:00,90.0,50.0,63.0,62.0,0.818182,0.847222,0.666667,63.0,0.805556
std,,12.272505,9.113002,11.222124,14.972652,0.179236,0.208894,0.108998,6.805527,0.373689
