In [38]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import datetime as dt
import warnings
import torch
from datasets import load_from_disk
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import (LoraConfig, AutoPeftModelForCausalLM, PeftModel)
from trl import SFTTrainer
import os

In [39]:
## Loading the datasets
Tier_1  = pd.read_pickle('./curated_datasets/Tier_1.pickle')
Tier_2  = pd.read_pickle('./curated_datasets/Tier_2.pickle')
# Removing any shared comments from politics (I think we can keep politics comments by the way and random sample the user to get what we need)
Tier_1=Tier_1[Tier_1['sub']!='politics']

In [40]:
# Some more filters on tiers if needed
Tier_1_10plus = Tier_1['author'].value_counts()
Tier_1_10plus = Tier_1['author'].value_counts().reset_index()
Tier_1_10plus = Tier_1_10plus[Tier_1_10plus['count']>=10]
Tier_1_a10plus  = Tier_1.loc[Tier_1['author'].isin(Tier_1_10plus['author'].to_list())]
Tier_1_a10plus
#len(Tier_1_10plus['author'].to_list())

Unnamed: 0,Index,id,author,created_utc,parent_id,link_id,body,Y,YM,sub,subreddit_id,Bias,Cred,submission_body
948718,948718,cyi64u7,ultimis,1451638295,t3_3yz5f1,3yz5f1,I read recently on this subject that in politi...,2016,2016-01,Conservative,t5_2qh6p,0.674677,0.384738,Donald Trump Is Smart To Remind Voters Of Clin...
948757,948757,cyi9cdm,Abakala,1451654458,t3_3z0cfe,3z0cfe,This is what I've been saying for a long time....,2016,2016-01,Conservative,t5_2qh6p,0.504332,0.588008,Krauthammer's Take: Trump Promising 'Success W...
948775,948775,cyiau4n,MiyegomboBayartsogt,1451660278,t3_3yzt2a,3yzt2a,"Seemingly without effort, Donald Trump's presi...",2016,2016-01,Conservative,t5_2qh6p,0.468367,0.503747,Black Power Extremist Louis Farrakhan Warns Hi...
948826,948826,cyigkcw,VirginWizard69,1451673375,t3_3z0z09,3z0z09,I loves me some Whittle. \n\n,2016,2016-01,Conservative,t5_2qh6p,0.706065,0.293663,WATCH: Bill Whittle’s last monologue of the ye...
948830,948830,cyih1q0,VirginWizard69,1451674231,t3_3yzaci,3yzaci,Good read.,2016,2016-01,Conservative,t5_2qh6p,0.635442,0.446968,What You Need to Know About “The Big Short” (m...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394169,394169,ecz1x66,jackgoffigen,1546293621,t3_abb6fg,abb6fg,"Silly sheriff, stealing federal money is for p...",2018,2018-12,Republican,,-0.538336,0.557574,Alabama sherif steals $1.5 million dollars of ...
394267,394267,eczrshg,Stephanstewart101,1546317127,t3_abb6fg,abb6fg,Is it really stealing if you are taking it fro...,2019,2019-01,Republican,,-0.538336,0.557574,Alabama sherif steals $1.5 million dollars of ...
394467,394467,ed1n60n,jackgoffigen,1546386807,t3_abc35u,abc35u,Would you trump hating liberals stop commentin...,2019,2019-01,Republican,,0.480137,0.589825,Republican Congressman Calls on Trump to Use O...
394489,394489,ed2d2fa,wowokletstalkabit,1546411599,t3_abb6fg,abb6fg,What he did was legal..,2019,2019-01,Republican,,-0.538336,0.557574,Alabama sherif steals $1.5 million dollars of ...


In [41]:
os.environ["CUDA_VISIBLE_DEVICES"] = "5"

print(f'running on cuda device {torch.cuda.current_device()}')

### Loading the Llama2 7b - chat model
base_model_name = "NousResearch/Llama-2-7b-chat-hf"

llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map='sequential'
    #max_memory=max_memory,
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

print(base_model.hf_device_map)

base_text_gen = pipeline(task="text-generation", model=base_model, tokenizer=llama_tokenizer, max_new_tokens=5)

query_head = "You are a helpful, respectful, and honest assistant that detects the stance of a comment with respect to its parent. Stance detection is the process of determining whether the author of a comment is in support of or against a given parent. You are provided with:\n post: the text you that is the root of discussion.\n parent:  the text which the comment is a reply towards.\n comment: text that you identify the stance from.\n\nYou will return the stance of the comment against the parent. Only return the stance against the parent and not the original post. Always answer from the possible options given below: \n support: The comment has a positive or supportive attitude towards the post, either explicitly or implicitly. \n against: The comment opposes or criticizes the post, either explicitly or implicitly. \n none: The comment is neutral or does not have a stance towards the post. \n unsure: It is not possible to make a decision based on the information at hand."
#query = "<SYS> query_head </SYS>" + "\n\n" + "post: " + row['submission_text'] + "\n" + "parent: " + row['body_parent'] + "\n" + "comment: " + row['body_child'] + "\n" + "stance: "
#query = "[INST] " + query + "[/INST]"
'''
query_tail="""post: Trump praises very smart Putin
comment: Maybe Trump is what democrats and republicans need to heal their relationship and start acting like adults again. We have a common cause: keeping this guy from doing permanent damage to a country we love.  The extremism that has entered US politics is stopping us from actually finding any common ground. \n\nIt's a big MAYBE, but it's possible.
stance:"""

query_head = query_head.strip()

output = base_text_gen(f"<s>[INST] {query_head + query_tail} [/INST]")
print(output[0]['generated_text'])
'''

DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1702400430266/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=1, num_gpus=

CUDA call was originally invoked at:

  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/traitlets/config/application.py", line 1077, in launch_instance
    app.start()
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 737, in start
    self.io_loop.start()
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start
    self.asyncio_loop.run_forever()
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
    self._run_once()
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
    handle._run()
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 524, in dispatch_queue
    await self.process_one()
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 513, in process_one
    await dispatch(*args)
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 418, in dispatch_shell
    await result
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 758, in execute_request
    reply_content = await reply_content
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 426, in do_execute
    res = shell.run_cell(
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell
    return super().run_cell(*args, **kwargs)
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3048, in run_cell
    result = self._run_cell(
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3103, in _run_cell
    result = runner(coro)
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
    coro.send(None)
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3308, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3490, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_131853/1238825697.py", line 7, in <module>
    import torch
  File "<frozen importlib._bootstrap>", line 1176, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/torch/__init__.py", line 1332, in <module>
    _C._initExtension(manager_path())
  File "<frozen importlib._bootstrap>", line 1176, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/torch/cuda/__init__.py", line 244, in <module>
    _lazy_call(_check_capability)
  File "/home/yeb96/reddit-misinformation/reddit-misinformation/.conda/lib/python3.11/site-packages/torch/cuda/__init__.py", line 241, in _lazy_call
    _queued_calls.append((callable, traceback.format_stack()))
