# Movie Ratings Sentiment Analysis

### **Import Library**

In [15]:
import random
import numpy as np
import pandas as pd
import torch
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm

from transformers import BertForSequenceClassification, BertConfig, BertTokenizer
from nltk.tokenize import TweetTokenizer

In [16]:
###
# common functions
###
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def metrics_to_string(metric_dict):
    string_list = []
    for key, value in metric_dict.items():
        string_list.append('{}:{:.2f}'.format(key, value))
    return ' '.join(string_list)

In [17]:
# Set random seed
set_seed(25012025)

In [19]:
# Load Tokenizer and Config
tokenizer = BertTokenizer.from_pretrained('google-bert/bert-base-uncased')
config = BertConfig.from_pretrained('google-bert/bert-base-uncased')
# config.num_labels = DocumentSentimentDataset.NUM_LABELS

# Instantiate model
model = BertForSequenceClassification.from_pretrained('google-bert/bert-base-uncased', config=config)

Error while downloading from https://cdn-lfs.hf.co/bert-base-uncased/68d45e234eb4a928074dfd868cead0219ab85354cc53d20e772753c6bb9169d3?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1737888650&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczNzg4ODY1MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9iZXJ0LWJhc2UtdW5jYXNlZC82OGQ0NWUyMzRlYjRhOTI4MDc0ZGZkODY4Y2VhZDAyMTlhYjg1MzU0Y2M1M2QyMGU3NzI3NTNjNmJiOTE2OWQzP3Jlc3BvbnNlLWNvbnRlbnQtZGlzcG9zaXRpb249KiJ9XX0_&Signature=prBykTquNIRJLuXuTUZSXKXGwgX-3c3u5tTfgXesiUc2wDcMq-pRhXZmpp%7ELF-7xI5lgDj9e1zMlbgQR7dbXEwaFtzo9IJNEl25kZeDxd4WlpU2Fo7boRjA1HFN6kIuqU0A%7EmNdwIp8oNFQwe12PlXlBTp115bn2zPlQwa42fP5MHjHD1sshCzFFh%7EpQC4CFWK2-CH9f2wLz8VCqrfE20k9Y1hQpB1PubzidXGgMD2NA3nEWgTKGevuBoxCD6LfdT4wcokErAY6T%7E31Umu9sKf7YnP0nqddcByLciVORnwn3GLPA6OYCwQwHQa%7EkREHlHDnun9EOmcUv2AJ8KWdPDA__&Key-Pair-Id=K3RPWS32NSSJCE: HTTPSConnectionPool(host=

KeyboardInterrupt: 

## Data Acquisition

### Load Dataset

In [1]:
import kagglehub

path = kagglehub.dataset_download("yasserh/imdb-movie-ratings-sentiment-analysis")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/yasserh/imdb-movie-ratings-sentiment-analysis?dataset_version_number=1...


100%|██████████| 20.6M/20.6M [00:17<00:00, 1.22MB/s]

Extracting files...





Path to dataset files: /home/ardi/.cache/kagglehub/datasets/yasserh/imdb-movie-ratings-sentiment-analysis/versions/1


In [13]:
dataset_path = f"{path}/movie.csv"
df = pd.read_csv(dataset_path)

df.head()

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1
