In [1]:
import torch
from torch.utils.data import DataLoader
import pandas as pd
import os
import gc
import importlib
import multiprocessing

import sentiment_utils
importlib.reload(sentiment_utils)
from sentiment_utils import SentimentAnalysisModel, CommentsDataset, process_all_batches

In [2]:
multiprocessing.set_start_method("fork", force=True)
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [7]:
#I did the first and last file outside the loop to make sure it worked fine

input_file_path = '../data/2_beer_reviews_with_ids_1.csv'

df = pd.read_csv(input_file_path)
df.head()

Unnamed: 0,id,text
0,9126242,"A - dark brown/black with crimson highlights, ..."
1,9126243,"12oz bottle into mug.Clear, but very dark brow..."
2,9126244,Looks more like a stout than a bock. But still...
3,9126245,Date on the bottle 6BNPours mahogany with 4 fi...
4,9126246,Pours out a rich looking purplish ruby colored...


In [8]:
sentiment_model = SentimentAnalysisModel(model_path="../../models/sentiment_model_1")

batch_size = 16
dataset = CommentsDataset(df["text"].astype(str).tolist())
data_loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=12,
    pin_memory=True,
    persistent_workers=True
)

df["sentiment_bert"] = process_all_batches(data_loader, sentiment_model)
df.head()

Processing Batches: 100%|██████████| 8200/8200 [1:05:17<00:00,  2.09batch/s]


Unnamed: 0,id,text,sentiment_bert
0,9126242,"A - dark brown/black with crimson highlights, ...",0.559397
1,9126243,"12oz bottle into mug.Clear, but very dark brow...",0.217033
2,9126244,Looks more like a stout than a bock. But still...,0.591588
3,9126245,Date on the bottle 6BNPours mahogany with 4 fi...,-0.294586
4,9126246,Pours out a rich looking purplish ruby colored...,0.187019


In [9]:
torch.mps.empty_cache()
gc.collect()

213

In [10]:
output_file_path = '../data/2_beer_reviews_with_sent_1.csv'
df.to_csv(output_file_path, index=False)

In [None]:
df.head(6000)

In [3]:
for i in range(10,15) :
    input_file_path = f'../data/2_beer_reviews_with_ids_{i}.csv'

    df = pd.read_csv(input_file_path)
    df.head()

    sentiment_model = SentimentAnalysisModel(model_path="../../models/sentiment_model_1")

    batch_size = 16
    dataset = CommentsDataset(df["text"].astype(str).tolist())
    data_loader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=12,
        pin_memory=True,
        persistent_workers=True
    )

    df["sentiment_bert"] = process_all_batches(data_loader, sentiment_model)
    df.head()

    torch.mps.empty_cache()
    gc.collect()

    output_file_path = f'../data/2_beer_reviews_with_sent_{i}.csv'
    df.to_csv(output_file_path, index=False)

Processing Batches: 100%|██████████| 8200/8200 [1:33:11<00:00,  1.47batch/s]
Processing Batches: 100%|██████████| 8200/8200 [1:33:52<00:00,  1.46batch/s]
Processing Batches: 100%|██████████| 8200/8200 [1:30:17<00:00,  1.51batch/s]
Processing Batches: 100%|██████████| 8200/8200 [1:31:37<00:00,  1.49batch/s]
Processing Batches: 100%|██████████| 8199/8199 [1:33:16<00:00,  1.47batch/s]


In [None]:
input_file_path = '../data/beer_reviews_with_ids_15.csv'

df = pd.read_csv(input_file_path)
df.head()

In [None]:
sentiment_model = SentimentAnalysisModel(model_path="../../models/sentiment_model_1")

batch_size = 16
dataset = CommentsDataset(df["text"].astype(str).tolist())
data_loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=12,
    pin_memory=True,
    persistent_workers=True
)

df["sentiment_bert"] = process_all_batches(data_loader, sentiment_model)
df.head()

In [None]:
output_file_path = '../data/beer_reviews_with_sent_15.csv'
df.to_csv(output_file_path, index=False)

In [None]:
torch.mps.empty_cache()
gc.collect()