In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
from tqdm.auto import tqdm
import json
import os
from huggingface_hub import login
from joblib import Parallel, delayed
import multiprocessing


In [8]:
data_cleaned = pd.read_csv('df1.csv')

In [9]:
data_cleaned['clean_text'] = data_cleaned['clean_text'].astype(str) 

In [10]:
data_cleaned['review_full'] = data_cleaned['review_full'].astype(str) 

In [11]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm
import concurrent.futures
import psutil
import os

# Define the zero-shot classification pipeline using RoBERTa
classifier = pipeline("zero-shot-classification", model="roberta-large-mnli")

# Define the labels for classification
labels = ["very bad review", "bad review", "average review", "good review", "very good review"]

# Function to classify a single piece of text
def classify_text(text):
    result = classifier(text, labels)
    return result['labels'][0]  # The label with the highest score

# Check available memory
def get_available_memory():
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    return mem_info.rss / (1024 ** 2)  # Return memory usage in MB

# Assuming `data_cleaned` is your DataFrame with a column 'clean_text'
# Define chunk size
chunk_size = 1000  # Adjust as needed

# Number of threads to use (adjust as needed)
num_threads = 4  # Use a small number of threads to limit CPU usage

# Wrap your pandas apply with tqdm for a progress bar
tqdm.pandas()

# Function to classify text in parallel
def parallel_classify_texts(texts, num_threads):
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        results = list(tqdm(executor.map(classify_text, texts), total=len(texts)))
    return results

# Process the DataFrame in chunks and save each chunk to a CSV
def process_and_save_chunks(data, chunk_size, num_threads, output_dir):
    os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist
    chunks = [data[i:i + chunk_size] for i in range(0, data.shape[0], chunk_size)]

    for i, chunk in enumerate(chunks):
        chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)

        # Optionally, convert labels to ratings
        def convert_label_to_rating(label):
            label_to_rating = {
                "very bad review": 1,
                "bad review": 2,
                "average review": 3,
                "good review": 4,
                "very good review": 5
            }
            return label_to_rating[label]

        chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)

        # Save the chunk to a CSV file
        chunk.to_csv(os.path.join(output_dir, f'chunk_{i}.csv'), index=False)

        # Print progress
        print(f'Saved chunk {i} to CSV.')

# Assuming `data_cleaned` is your DataFrame with a column 'clean_text'
process_and_save_chunks(data_cleaned, chunk_size, num_threads, output_dir='processed_chunks_RoBERTa_lde')

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1000/1000 [09:00<00:00,  1.85it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Saved chunk 0 to CSV.


100%|██████████| 1000/1000 [08:47<00:00,  1.90it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 1 to CSV.


100%|██████████| 1000/1000 [08:36<00:00,  1.94it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 2 to CSV.


100%|██████████| 1000/1000 [08:09<00:00,  2.04it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 3 to CSV.


100%|██████████| 1000/1000 [07:53<00:00,  2.11it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 4 to CSV.


100%|██████████| 1000/1000 [07:42<00:00,  2.16it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 5 to CSV.


100%|██████████| 1000/1000 [07:52<00:00,  2.12it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 6 to CSV.


100%|██████████| 1000/1000 [08:08<00:00,  2.05it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 7 to CSV.


100%|██████████| 1000/1000 [08:00<00:00,  2.08it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 8 to CSV.


100%|██████████| 1000/1000 [08:50<00:00,  1.89it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 9 to CSV.


100%|██████████| 1000/1000 [08:26<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 10 to CSV.


100%|██████████| 1000/1000 [08:15<00:00,  2.02it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 11 to CSV.


100%|██████████| 1000/1000 [08:04<00:00,  2.06it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 12 to CSV.


100%|██████████| 1000/1000 [07:59<00:00,  2.09it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 13 to CSV.


100%|██████████| 1000/1000 [07:35<00:00,  2.19it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 14 to CSV.


100%|██████████| 1000/1000 [07:53<00:00,  2.11it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 15 to CSV.


100%|██████████| 1000/1000 [08:09<00:00,  2.04it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 16 to CSV.


100%|██████████| 1000/1000 [07:41<00:00,  2.16it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 17 to CSV.


100%|██████████| 1000/1000 [08:10<00:00,  2.04it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 18 to CSV.


100%|██████████| 1000/1000 [08:38<00:00,  1.93it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 19 to CSV.


100%|██████████| 1000/1000 [08:23<00:00,  1.99it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 20 to CSV.


100%|██████████| 1000/1000 [09:20<00:00,  1.79it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 21 to CSV.


100%|██████████| 1000/1000 [08:01<00:00,  2.08it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 22 to CSV.


100%|██████████| 1000/1000 [08:27<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 23 to CSV.


100%|██████████| 1000/1000 [08:17<00:00,  2.01it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 24 to CSV.


100%|██████████| 1000/1000 [08:43<00:00,  1.91it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 25 to CSV.


100%|██████████| 1000/1000 [09:01<00:00,  1.85it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 26 to CSV.


100%|██████████| 1000/1000 [08:53<00:00,  1.88it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 27 to CSV.


100%|██████████| 1000/1000 [08:36<00:00,  1.93it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 28 to CSV.


100%|██████████| 1000/1000 [08:51<00:00,  1.88it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 29 to CSV.


100%|██████████| 1000/1000 [08:45<00:00,  1.90it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 30 to CSV.


100%|██████████| 1000/1000 [08:57<00:00,  1.86it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 31 to CSV.


100%|██████████| 1000/1000 [08:20<00:00,  2.00it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 32 to CSV.


100%|██████████| 1000/1000 [08:25<00:00,  1.98it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 33 to CSV.


100%|██████████| 1000/1000 [08:52<00:00,  1.88it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 34 to CSV.


100%|██████████| 1000/1000 [08:21<00:00,  1.99it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 35 to CSV.


100%|██████████| 1000/1000 [08:39<00:00,  1.92it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 36 to CSV.


100%|██████████| 1000/1000 [08:18<00:00,  2.01it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 37 to CSV.


100%|██████████| 1000/1000 [08:34<00:00,  1.95it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 38 to CSV.


100%|██████████| 1000/1000 [08:11<00:00,  2.03it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 39 to CSV.


100%|██████████| 1000/1000 [08:28<00:00,  1.96it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 40 to CSV.


100%|██████████| 1000/1000 [08:46<00:00,  1.90it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 41 to CSV.


100%|██████████| 1000/1000 [09:18<00:00,  1.79it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 42 to CSV.


100%|██████████| 1000/1000 [08:43<00:00,  1.91it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 43 to CSV.


100%|██████████| 1000/1000 [08:40<00:00,  1.92it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 44 to CSV.


100%|██████████| 1000/1000 [08:23<00:00,  1.99it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 45 to CSV.


100%|██████████| 1000/1000 [08:38<00:00,  1.93it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 46 to CSV.


100%|██████████| 1000/1000 [08:48<00:00,  1.89it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 47 to CSV.


100%|██████████| 1000/1000 [08:30<00:00,  1.96it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 48 to CSV.


100%|██████████| 1000/1000 [08:08<00:00,  2.05it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 49 to CSV.


100%|██████████| 1000/1000 [08:26<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 50 to CSV.


100%|██████████| 1000/1000 [08:26<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 51 to CSV.


100%|██████████| 1000/1000 [08:16<00:00,  2.01it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 52 to CSV.


100%|██████████| 1000/1000 [08:03<00:00,  2.07it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 53 to CSV.


100%|██████████| 1000/1000 [08:05<00:00,  2.06it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 54 to CSV.


100%|██████████| 1000/1000 [07:55<00:00,  2.10it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 55 to CSV.


100%|██████████| 1000/1000 [08:26<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 56 to CSV.


100%|██████████| 1000/1000 [08:31<00:00,  1.96it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 57 to CSV.


100%|██████████| 1000/1000 [08:17<00:00,  2.01it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 58 to CSV.


100%|██████████| 1000/1000 [08:46<00:00,  1.90it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 59 to CSV.


100%|██████████| 1000/1000 [08:23<00:00,  1.99it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 60 to CSV.


100%|██████████| 1000/1000 [08:19<00:00,  2.00it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 61 to CSV.


100%|██████████| 1000/1000 [08:27<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 62 to CSV.


100%|██████████| 1000/1000 [08:25<00:00,  1.98it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 63 to CSV.


100%|██████████| 1000/1000 [08:28<00:00,  1.96it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 64 to CSV.


100%|██████████| 1000/1000 [08:26<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 65 to CSV.


100%|██████████| 1000/1000 [08:31<00:00,  1.95it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 66 to CSV.


100%|██████████| 1000/1000 [08:46<00:00,  1.90it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 67 to CSV.


100%|██████████| 1000/1000 [08:45<00:00,  1.90it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 68 to CSV.


100%|██████████| 1000/1000 [08:21<00:00,  1.99it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 69 to CSV.


100%|██████████| 1000/1000 [08:34<00:00,  1.94it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 70 to CSV.


100%|██████████| 1000/1000 [08:38<00:00,  1.93it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 71 to CSV.


100%|██████████| 1000/1000 [08:34<00:00,  1.94it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 72 to CSV.


100%|██████████| 1000/1000 [08:43<00:00,  1.91it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 73 to CSV.


100%|██████████| 1000/1000 [08:27<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 74 to CSV.


100%|██████████| 1000/1000 [08:43<00:00,  1.91it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 75 to CSV.


100%|██████████| 1000/1000 [08:51<00:00,  1.88it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 76 to CSV.


100%|██████████| 1000/1000 [08:42<00:00,  1.91it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 77 to CSV.


100%|██████████| 1000/1000 [08:22<00:00,  1.99it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 78 to CSV.


100%|██████████| 1000/1000 [08:57<00:00,  1.86it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 79 to CSV.


100%|██████████| 1000/1000 [08:35<00:00,  1.94it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 80 to CSV.


100%|██████████| 1000/1000 [08:28<00:00,  1.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 81 to CSV.


100%|██████████| 1000/1000 [08:41<00:00,  1.92it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 82 to CSV.


100%|██████████| 1000/1000 [08:35<00:00,  1.94it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 83 to CSV.


100%|██████████| 1000/1000 [08:24<00:00,  1.98it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 84 to CSV.


100%|██████████| 1000/1000 [08:25<00:00,  1.98it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 85 to CSV.


100%|██████████| 1000/1000 [08:24<00:00,  1.98it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 86 to CSV.


100%|██████████| 1000/1000 [08:19<00:00,  2.00it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


Saved chunk 87 to CSV.


100%|██████████| 424/424 [03:42<00:00,  1.90it/s]

Saved chunk 88 to CSV.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_label'] = parallel_classify_texts(chunk['review_full'], num_threads)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_rating'] = chunk['predicted_label'].apply(convert_label_to_rating)


In [13]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm
import concurrent.futures
import os

# Define the zero-shot classification pipeline using RoBERTa
classifier = pipeline("zero-shot-classification", model="roberta-large-mnli")

# Define the cuisine labels for classification
cuisine_labels = ["Italian", "Chinese", "Mexican", "Indian", "French", "Japanese", "American", "Thai", "Spanish", "Greek"]

# Function to classify a single piece of text
def classify_text(row):
    text = f"Restaurant Name: {row['restaurant_name']} - Review Title: {row['title_review']} - Review Full: {row['review_full']}"
    result = classifier(text, cuisine_labels)
    return result['labels'][0]  # The label with the highest score

# Check available memory
def get_available_memory():
    import psutil
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    return mem_info.rss / (1024 ** 2)  # Return memory usage in MB

# Define chunk size
chunk_size = 1000  # Adjust as needed

# Number of threads to use (adjust as needed)
num_threads = 4  # Use a small number of threads to limit CPU usage

# Wrap your pandas apply with tqdm for a progress bar
tqdm.pandas()

# Function to classify text in parallel
def parallel_classify_texts(df, num_threads):
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        results = list(tqdm(executor.map(classify_text, [row for _, row in df.iterrows()]), total=len(df)))
    return results

# Process the DataFrame in chunks and save each chunk to a CSV
def process_and_save_chunks(data, chunk_size, num_threads, output_dir, file_name):
    os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist
    chunks = [data[i:i + chunk_size] for i in range(0, data.shape[0], chunk_size)]

    for i, chunk in enumerate(chunks):
        chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)
        output_file_path = os.path.join(output_dir, f'{os.path.splitext(file_name)[0]}_chunk_{i}.csv')
        # Save the chunk to a CSV file
        chunk.to_csv(output_file_path, index=False)
        # Print progress
        print(f'Saved chunk {i} of {file_name} to CSV.')

# Process all CSV files in a directory
def process_all_csvs(input_dir, output_dir):
    for file_name in os.listdir(input_dir):
        if file_name.endswith('.csv'):
            file_path = os.path.join(input_dir, file_name)
            print(f'Processing file: {file_path}')
            data = pd.read_csv(file_path)
            process_and_save_chunks(data, chunk_size, num_threads, output_dir, file_name)

# Define input and output directories
input_dir = 'processed_chunks_RoBERTa_lde'
output_dir = 'processed_chunks_RoBERTa_cuisine'

# Process all CSV files in the input directory
process_all_csvs(input_dir, output_dir)

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Processing file: processed_chunks_RoBERTa_lde/chunk_49.csv


100%|██████████| 1000/1000 [17:54<00:00,  1.07s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_49.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_17.csv


100%|██████████| 1000/1000 [16:27<00:00,  1.01it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_17.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_16.csv


100%|██████████| 1000/1000 [17:29<00:00,  1.05s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_16.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_28.csv


100%|██████████| 1000/1000 [18:34<00:00,  1.11s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_28.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_14.csv


100%|██████████| 1000/1000 [16:41<00:00,  1.00s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_14.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_15.csv


100%|██████████| 1000/1000 [17:29<00:00,  1.05s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_15.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_29.csv


100%|██████████| 1000/1000 [19:08<00:00,  1.15s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_29.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_11.csv


100%|██████████| 1000/1000 [18:13<00:00,  1.09s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_11.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_39.csv


100%|██████████| 1000/1000 [18:05<00:00,  1.09s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_39.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_38.csv


100%|██████████| 1000/1000 [18:26<00:00,  1.11s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_38.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_10.csv


100%|██████████| 1000/1000 [18:29<00:00,  1.11s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_10.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_12.csv


100%|██████████| 1000/1000 [17:50<00:00,  1.07s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_12.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_13.csv


100%|██████████| 1000/1000 [17:41<00:00,  1.06s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['predicted_cuisine'] = parallel_classify_texts(chunk, num_threads)


Saved chunk 0 of chunk_13.csv to CSV.
Processing file: processed_chunks_RoBERTa_lde/chunk_36.csv


 33%|███▎      | 327/1000 [06:09<12:40,  1.13s/it]


KeyboardInterrupt: 

In [14]:
def read_all_csvs_in_folder(folder_path):
    # List to hold the individual DataFrames
    df_list = []

    # Iterate over all files in the folder
    for filename in os.listdir(folder_path):
        # Check if the file is a CSV file
        if filename.endswith('.csv'):
            # Construct the full file path
            file_path = os.path.join(folder_path, filename)
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            # Append the DataFrame to the list
            df_list.append(df)

    # Concatenate all DataFrames in the list into a single DataFrame
    combined_df = pd.concat(df_list, ignore_index=True)

    return combined_df

In [15]:
lde_roberta = read_all_csvs_in_folder('processed_chunks_RoBERTa_lde')
cuisine_roberta = read_all_csvs_in_folder('processed_chunks_RoBERTa_cuisine')

lde_deberta = read_all_csvs_in_folder('processed_chunks_DeBERTa_lde')
cuisine_deberta = read_all_csvs_in_folder('processed_chunks_DeBERTa_cuisine')

lde_roberta['model_used']='RoBERTa'
cuisine_roberta['model_used']='RoBERTa'

lde_deberta['model_used']='DeBERTa'
cuisine_deberta['model_used']='DeBERTa'

pd.concat([lde_roberta, lde_deberta]).to_csv('data_final_no_cuisine.csv', index=False)
pd.concat([cuisine_roberta, cuisine_deberta]).to_csv('data_final_cuisine.csv', index=False)