In [1]:
!pip install rouge-score
!pip install --upgrade openpyxl
!pip install pandas openpyxl



In [2]:
import torch
import random
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_name = "Qwen/Qwen2.5-7B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.66s/it]
Some parameters are on the meta device because they were offloaded to the disk.


In [4]:
df = pd.read_csv('/Users/darshan/Documents/GitHub/ipl-sentiment-trader/reddit/2024/1.csv')

In [5]:
def generate_model_response(system_prompt, user_prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    return response[0].strip().lower()

In [6]:
def analyze_comment_sentiments(dataset, sample_size=50):
    comments_list = dataset["Comment"].dropna().tolist()
    
    if sample_size:
        import random
        comments_list = random.sample(comments_list, min(sample_size, len(comments_list)))
    
    sentiments = []
    i = 0
    exceptions = 0
    
    for comment in comments_list:
        try:
            system_prompt = "You will be analyzing sentiment from Indian Premier League (IPL) 2024 cricket match comments from the same Reddit thread. The comments are timestamped. Some comments may be made by trolls and not connected to the game."
            user_prompt = f"""What is the sentiment expressed in the following IPL match thread comment?
            Select sentiment value from positive, negative, neutral, or irrelevant. Return only the sentiment value.
            Comment: {comment}"""
            
            sentiment = generate_model_response(system_prompt, user_prompt)
            sentiments.append({
                'comment': comment,
                'sentiment': sentiment
            })
            
            i += 1
            print(f"Processed comment {i}: {sentiment}")
            
        except Exception as e:
            print("===================")
            print("Exception occurred:", e)
            exceptions += 1
            print("Total exception count:", exceptions)
    
    return pd.DataFrame(sentiments)

In [7]:
results = analyze_comment_sentiments(df, sample_size=50)

print("\nSentiment Distribution:")
print(results['sentiment'].value_counts())

sentiment_percentages = (results['sentiment'].value_counts() / len(results) * 100).round(2)
print("\nSentiment Distribution (%):")
print(sentiment_percentages)

Processed comment 1: negative
Processed comment 2: negative
Processed comment 3: neutral
Processed comment 4: irrelevant
Processed comment 5: negative
Processed comment 6: negative
Processed comment 7: irrelevant
Processed comment 8: irrelevant
Processed comment 9: neutral
Processed comment 10: irrelevant
Processed comment 11: negative
Processed comment 12: negative
Processed comment 13: negative
Processed comment 14: negative
Processed comment 15: negative
Processed comment 16: neutral
Processed comment 17: negative
Processed comment 18: neutral
Processed comment 19: positive
Processed comment 20: neutral
Processed comment 21: negative
Processed comment 22: negative
Processed comment 23: positive
Processed comment 24: neutral
Processed comment 25: irrelevant
Processed comment 26: negative
Processed comment 27: neutral
Processed comment 28: negative
Processed comment 29: positive
Processed comment 30: neutral
Processed comment 31: neutral
Processed comment 32: neutral
Processed comment

In [8]:
results.head()

Unnamed: 0,comment,sentiment
0,Why not santner bruh seriously,negative
1,Deserved mf had no other plans for him,negative
2,Shastri with his mandatory 2 pegs,neutral
3,Where is the skip button??,irrelevant
4,Rawat was flabbergasted,negative
