In [None]:
# !pip install transformers==4.38.1
# !pip install accelerate==0.27.2

In [None]:
# Other: 
# !pip install torch==2.0.1 accelerate==0.15.0 transformers==4.28.1
# !conda install pytorch torchvision torchaudio cudatoolkit=11.7 -c pytorch

In [None]:
import pandas as pd

# Local
import api_keys
from huggingface_hub import login
login(token=api_keys.huggingface)

import torch
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device    

# Check if CUDA is available
if torch.cuda.is_available():
    # Print number of GPUs available
    print("Number of GPUs available:", torch.cuda.device_count())

    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}:")
        print(f"\tName: {torch.cuda.get_device_name(i)}")
        print(f"\tCuda version: {print(torch.version.cuda)}")
        print(f"\tCompute Capability: {torch.cuda.get_device_capability(i)}")
        print(f"\tTotal Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9} GB")
        print(torch.cuda.get_device_properties(i))
        # Additional details can be accessed via `torch.cuda.get_device_properties(i)`

else:
    print("CUDA is not available. Please check your installation and if your hardware supports CUDA.")



import re
def obtain_json(responses):
    jsons = []
    for response in responses:
        matches = re.findall(r'\{.*?\}', response)

        # Assuming there's at least one match and it's safe to evaluate
        if matches:
            # Convert the first match to dictionary
            dictionary = eval(matches[0])
            jsons.append(dictionary)
        else:
            jsons.append(response)
    return jsons



def find_json_in_string(string: str) -> str:
    """Finds the JSON object in a string.

    Parameters
    ----------
    string : str
        The string to search for a JSON object.

    Returns
    -------
    json_string : str
    """
    start = string.find("{")
    end = string.rfind("}")
    if start != -1 and end != -1:
        json_string = string[start : end + 1]
    else:
        json_string = "{}"
    return json_string

In [None]:
!python3 -m nvitop -1

In [None]:
try: del tokenizer
except: pass
torch.cuda.empty_cache()
!python3 -m nvitop -1


In [None]:
# TODO: See how they use it for text classification: (from probs or output layer directly?)
# https://colab.research.google.com/github/bigscience-workshop/petals/blob/main/examples/prompt-tuning-sst2.ipynb
import transformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# model_name = "google/gemma-2b-it"
model_name = "google/gemma-7b-it"
# model_name = "paulml/OGNO-7B"
# model_name = 'microsoft/phi-2'
# model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = 0 

if 'gemma' in model_name:
	# Gemma
	model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True)
	model = model.to(device)
elif 'llama' in model_name:
	# Have to restart session after updating transformers
	from transformers import AutoTokenizer, LlamaForCausalLM
	model = LlamaForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True)
elif "paulml/OGNO-7B" in model_name:
    pipeline = transformers.pipeline(
    "text-generation",
    model=model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    )
elif "microsoft/phi-2" in model_name:
    model = AutoModelForCausalLM.from_pretrained(model_name,  trust_remote_code=True,torch_dtype='auto', low_cpu_mem_usage=True)
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)



    
    
    
    


# # alternative
# # Llama
# pipeline = transformers.pipeline(
#     "text-generation",
#     model=model,
#     torch_dtype=torch.float16,
#     device_map="auto",
# )

# sequences = pipeline(
#     prompt,
#     do_sample=True,
#     top_k=10,
#     num_return_sequences=1,
#     eos_token_id=tokenizer.eos_token_id,
#     max_length=200,
# )
# for seq in sequences:
#     print(f"Result: {seq['generated_text']}")

In [None]:
!python3 -m nvitop -1


In [None]:
!nvidia-sim

In [None]:


import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

pd.set_option("display.max_columns", None)

location = 'openmind'

if location == 'openmind':
  input_dir = '/nese/mit/group/sig/projects/dlow/ctl/datasets/train10_subset_30/'
  output_dir = 'home/dlow/'
elif location =='local':
  input_dir = './data/ctl/'
  output_dir = '/home/dlow/datum/lexicon/data/output/'


train = pd.read_csv(input_dir+'train10_train_30perc_messages_texter_metadata.csv')
test = pd.read_csv(input_dir+'train10_test_15perc_messages_texter_metadata.csv')

In [None]:
ctl_tags13 = ['self_harm',
 'suicide',
 'bully',
 'abuse_physical',
 'abuse_sexual',
 'relationship',
 'bereavement',
 'isolated',
 'anxiety',
 'depressed',
 'gender',
 'eating',
 'substance']

In [None]:
# cols = ctl_tags13+['message']
# test_tags = test[cols]

# for tag in ctl_tags13:
#     test_tag_i_1 = test_tags[test_tags[tag]==1]
#     test_tag_i_0 = test_tags[test_tags[tag]==0]
    
    

    
    
    


In [None]:

def create_binary_dataset(df_metadata, dv = 'suicide', n_per_dv = 3000):
    df_metadata_tag_1 = df_metadata[df_metadata[dv]==1].sample(n=n_per_dv,random_state=123)
    df_metadata_tag_0 = df_metadata[df_metadata[dv]==0].sample(n=n_per_dv,random_state=123)
    assert df_metadata_tag_1.shape[0] == n_per_dv
    assert df_metadata_tag_0.shape[0] == n_per_dv

    df_metadata_tag = pd.concat([df_metadata_tag_1, df_metadata_tag_0]).sample(frac=1).reset_index(drop=True)

    return df_metadata_tag


# for dv in ctl_tags13:
dv = 'suicide'
df_i = create_binary_dataset(test, dv = dv, n_per_dv = 100)
df_i[ctl_tags13].sum()


In [None]:
6*150/60, 'minutes'

In [None]:
documents = [
    "I need help. I've been feeling depressed for a while because I work nights and it's taking a toll on my mental health. I'm scheduled to work tonight and the next 2 nights, but I don't think I can go in tonight. I'm having suicidal thoughts and won't be able to focus on my job. I'm worried about calling off since I'm already at the [scrubbed] amount of absences, and I don't want to get fired. I've attempted suicide a couple times in the past, and I've taken pills each time. I do have access to them, but I don't plan on taking anything. I want to avoid hurting myself because I know it doesn't help and it only makes my family worry. I just have such a hard time going to work when I feel this bad. I just don't know what to do about work. I know my safety is more important but I  don't want to lose my job, I already am struggling financially.",
        "I need help. I've been feeling depressed for a while because I work nights and it's taking a toll on my mental health. I'm scheduled to work tonight and the next 2 nights, but I don't think I can go in tonight. I'm having suicidal thoughts and won't be able to focus on my job. I'm worried about calling off since I'm already at the [scrubbed] amount of absences, and I don't want to get fired. I've attempted suicide a couple times in the past, and I've taken pills each time. I do have access to them, but I don't plan on taking anything. I want to avoid hurting myself because I know it doesn't help and it only makes my family worry. I just have such a hard time going to work when I feel this bad. I just don't know what to do about work. I know my safety is more important but I  don't want to lose my job, I already am struggling financially.",
    
#     "talk. done. okay thank you so much. there's a lot so i'm sorry if this is to much , but me and my grandma have been argue a lot and it's been going on sense i was 6-7 i believe , this morning i woke up and my ex boyfriend told me he likes my bestfriend . . also my dad committed suicide back in 2017 , he meant the world to me and i have just been thinking about giving up alot lately , there's always being suicide thoughts in my mind sense my dad passed , i go to counseling and therapy and i take medication , i have smoked weird and vapes and that was a horrible Decision to make .. i have smoked weed *. i hate myself more and more every day from smoking weed and vapes .. yea , i have f's in every single one of my classes except gym and i feel like a failure of a student , i'm really trying to get my [scrubbed]s up but at some points i just gi be up cus i feel like i'll never make it to the 8th [scrubbed]. give up *. yes i do , many times i think about it but i'm scared to do it . i don't wanna hurt my family and friends . i don't really have a plan , i've tried at one time tho i would cut my self . my dad hung himself ... i haven't in about 3 weeks. of course my name is [scrubbed]. yes it does. yea , i just really miss my old relationship with my grandma . and i miss my boyfriend . ex now but he always made me happy literally every little thing he did always made me happy . just the little things mattered to me . every time i was with him i was so happy and i just don't know what to do anymore . i never thought i would lose him .. well here's one more thing . i live with my grandma and grandpa and my uncle sexual assaulted me when he lived here .. my grandma only knows and it's my grandpas son , that kills me every day i was only 3 and he was 13 . im sorry if this is tmi but that's what happened and he tried to kiss me in walmart . i'm sorry , i was eating. watch tik [scrubbed] and play games. yea. okay. a little better. thank you.",
#     "talk. done. okay thank you so much. there's a lot so i'm sorry if this is to much , but me and my grandma have been argue a lot and it's been going on sense i was 6-7 i believe , this morning i woke up and my ex boyfriend told me he likes my bestfriend . . also my dad committed suicide back in 2017 , he meant the world to me and i have just been thinking about giving up alot lately , there's always being suicide thoughts in my mind sense my dad passed , i go to counseling and therapy and i take medication , i have smoked weird and vapes and that was a horrible Decision to make .. i have smoked weed *. i hate myself more and more every day from smoking weed and vapes .. yea , i have f's in every single one of my classes except gym and i feel like a failure of a student , i'm really trying to get my [scrubbed]s up but at some points i just gi be up cus i feel like i'll never make it to the 8th [scrubbed]. give up *. yes i do , many times i think about it but i'm scared to do it . i don't wanna hurt my family and friends . i don't really have a plan , i've tried at one time tho i would cut my self . my dad hung himself ... i haven't in about 3 weeks. of course my name is [scrubbed]. yes it does. yea , i just really miss my old relationship with my grandma . and i miss my boyfriend . ex now but he always made me happy literally every little thing he did always made me happy . just the little things mattered to me . every time i was with him i was so happy and i just don't know what to do anymore . i never thought i would lose him .. well here's one more thing . i live with my grandma and grandpa and my uncle sexual assaulted me when he lived here .. my grandma only knows and it's my grandpas son , that kills me every day i was only 3 and he was 13 . im sorry if this is tmi but that's what happened and he tried to kiss me in walmart . i'm sorry , i was eating. watch tik [scrubbed] and play games. yea. okay. a little better. thank you.",
]
constructs = [
    'Sexual abuse',
    'suicidal ideation',
    'substance use'
#     'relationship issues',
#     'suicidal ideation'
]

In [None]:
prompt_names = dict(zip(ctl_tags13, ['']*len(ctl_tags13)))
prompt_names = {'self_harm': '',
 'suicide': 'suicidal ideation',
 'bully': '',
 'abuse_physical': '',
 'abuse_sexual': '',
 'relationship': '',
 'bereavement': '',
 'isolated': '',
 'anxiety': '',
 'depressed': '',
 'gender': '',
 'eating': '',
 'substance': ''}

In [None]:

documents = df_i['message'].values
y_test = df_i[dv].values
construct = prompt_names.get(dv)
print(construct)


In [None]:
# "related to" vs "might be expressing". 

In [None]:

max_length = 2500 

# documents = ['No one cares about me']
import time
responses = []
time_elapsed_all = []

for document, y_test_i in tqdm(zip(documents, y_test)):
    

    start = time.time()

    prompt = """
    You are a text classification assistant for a {context}.

    The text to classify is:
    ```
    {document}
    ```
    
    

    Assign a probability for following labels and return using this JSON format (do not provide additional notes, explanations or warnings, just that JSON output):

    {{'At least one of the sentences in text is related to {construct}': <your_probability>, 'None of the sentences in text are related to {construct}': <your_probability>}}
    

    json:
    """


    prompt = prompt.format(context = 'Crisis Text Line service', document = document, construct = construct)
    print(prompt)


    if 'gemma' in model_name:
        # Gemma
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(**input_ids, max_length = max_length)
        tokenizer.decode(outputs[0])
        # Find the length of the input_ids to know where the original prompt ends
        prompt_length = input_ids["input_ids"].shape[1]
        # Decode only the generated part, skipping the prompt
        response = tokenizer.decode(outputs[0][prompt_length:], skip_special_tokens=True)

    elif 'llama' in model_name:
        inputs = tokenizer(prompt, return_tensors="pt")

        # Generate
        generate_ids = model.generate(inputs.input_ids, max_length=max_length)
        response = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

    elif "paulml/OGNO-7B" in model_name:
        messages = [{"role": "user", "content": prompt}]
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
        response = outputs[0]["generated_text"] 
    elif 'microsoft/phi-2' in model_name:     
        inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)

        outputs = model.generate(**inputs, max_length=200)
        response = tokenizer.batch_decode(outputs)[0]


        
        
    responses.append(response)
        
    print('y_test_i', y_test_i, '=======')
    print(response)


    end = time.time()
    time_elapsed = end - start
    print(time_elapsed)
    print()
    time_elapsed_all.append(time_elapsed)

print(responses)











In [None]:
import numpy as np
print(np.mean(time_elapsed_all), np.std(time_elapsed_all))


In [None]:
json_responses = obtain_json(responses)

In [None]:
y_pred_proba_1 = [list(n.values())[0] for n in json_responses]
y_pred_proba_1
y_pred = np.array([n>=0.5 for n in y_pred_proba_1])*1
y_pred



In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import (
	ConfusionMatrixDisplay,
	auc,
	confusion_matrix,
	f1_score,
	precision_recall_curve,
	precision_score,
	recall_score,
	roc_auc_score,
)
from sklearn import metrics
from scipy.stats import pearsonr, spearmanr

def cm(y_true, y_pred, output_dir, model_name, ts, classes = ["SITB-", "SITB+"], save=True):
	cm = confusion_matrix(y_true, y_pred, normalize=None)
	cm_df = pd.DataFrame(cm, index=classes , columns=classes )
	cm_df_meaning = pd.DataFrame([["TN", "FP"], ["FN", "TP"]], index=classes , columns=classes )

	cm_norm = confusion_matrix(y_true, y_pred, normalize="all")
	cm_norm = (cm_norm * 100).round(2)
	cm_df_norm = pd.DataFrame(cm_norm, index=classes , columns=classes )

	
	plt.rcParams["figure.figsize"] = [4, 4]
	ConfusionMatrixDisplay(cm_norm, display_labels=classes ).plot()
	plt.tight_layout()
	
	if save:
		plt.savefig(output_dir + f"cm_{model_name}_{ts}.png", dpi = 300)
		cm_df_meaning.to_csv(output_dir + f"cm_meaning_{model_name}_{ts}.csv")
		cm_df.to_csv(output_dir + f"cm_{model_name}_{ts}.csv")
		cm_df_norm.to_csv(output_dir + f"cm_norm_{model_name}_{ts}.csv")

	return cm_df_meaning, cm_df, cm_df_norm


def classification_report(y_true, y_pred, y_pred_proba_1, output_dir,gridsearch=None,
										best_params=None,feature_vector=None,model_name=None,round_to = 2, ts = None, save_results=False):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    np.set_printoptions(suppress=True)
    roc_auc = roc_auc_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    # calculate precision and recall for each threshold
    lr_precision, lr_recall, thresholds = precision_recall_curve(y_true, y_pred_proba_1)

    # TODO: add best threshold
    fscore = (2 * lr_precision * lr_recall) / (lr_precision + lr_recall)
    fscore[np.isnan(fscore)] = 0
    ix = np.argmax(fscore)
    best_threshold = thresholds[ix].item()

    pr_auc = auc(lr_recall, lr_precision)
    # AU P-R curve is also approximated by avg. precision
    # avg_pr = metrics.average_precision_score(y_true,y_pred_proba_1)

    sensitivity = recall_score(y_true, y_pred)
    specificity = tn / (tn + fp)  # OR: recall_score(y_true,y_pred, pos_label=0)
    precision = precision_score(y_true, y_pred)

    results = pd.DataFrame(
        [feature_vector,model_name, sensitivity, specificity, precision, f1, roc_auc, pr_auc, best_threshold, gridsearch, best_params],
        index=["Feature vector","Model", "Sensitivity", "Specificity", "Precision", "F1", "ROC AUC", "PR AUC", "Best th PR AUC", "Gridsearch", "Best parameters"],
    ).T.round(2)
    if save_results:
        results.to_csv(output_dir + f"results_{model_name}_{ts}.csv")
    return results







In [None]:
amount_of_preds = len(y_pred)
cm_df_meaning, cm_df, cm_df_norm = cm(y_test[:amount_of_preds], y_pred, output_dir, model_name, None, classes = [f"Other", f"{dv}"], save=False)


results = classification_report(y_test[:amount_of_preds], y_pred, y_pred_proba_1, None,gridsearch=None,
    best_params=None,feature_vector=None,model_name=model_name,round_to = 2, ts = None, save_results=False)
results

In [None]:
jsons = [find_json_in_string(n) for n in responses]

In [None]:
jsons

In [None]:
!python --version #3.11.7

In [None]:
import pandas as pd
import accelerate
print(accelerate.__version__)
import transformers
print(transformers.__version__)

# Local
import api_keys




In [None]:
# Get api key
from huggingface_hub import login
login(token=api_keys.huggingface)

In [None]:
import torch
torch.cuda.is_available()

# Check if CUDA is available
if torch.cuda.is_available():
    # Print number of GPUs available
    print("Number of GPUs available:", torch.cuda.device_count())

    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}:")
        print(f"\tName: {torch.cuda.get_device_name(i)}")
        print(f"\tCompute Capability: {torch.cuda.get_device_capability(i)}")
        print(f"\tTotal Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9} GB")
        print(torch.cuda.get_device_properties(i))
        # Additional details can be accessed via `torch.cuda.get_device_properties(i)`



else:
    print("CUDA is not available. Please check your installation and if your hardware supports CUDA.")

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device    

In [None]:
import torch
if torch.cuda.is_available():
    # Print CUDA version
    print(torch.version.cuda)
    # Additionally, to get the name of the CUDA device PyTorch is using:
    print(torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. Check your installation.")

In [None]:
torch.__version__

In [None]:
!module load openmind8/cuda/11.7

In [None]:
# pip install torch==x.x.x+cu11.7 


In [None]:
# !nvcc --version


In [None]:




def find_json_in_string(string: str) -> str:
    """Finds the JSON object in a string.

    Parameters
    ----------
    string : str
        The string to search for a JSON object.

    Returns
    -------
    json_string : str
    """
    start = string.find("{")
    end = string.rfind("}")
    if start != -1 and end != -1:
        json_string = string[start : end + 1]
    else:
        json_string = "{}"
    return json_string

In [None]:
!seff 35521836

In [None]:
model = 0

In [None]:
!nvidia-smi

In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

pd.set_option("display.max_columns", None)

location = 'openmind'

if location == 'openmind':
  input_dir = '/nese/mit/group/sig/projects/dlow/ctl/datasets/train10_subset_30/'
  output_dir = 'home/dlow/'
elif location =='local':
  input_dir = './data/ctl/'
  output_dir = '/home/dlow/datum/lexicon/data/output/'


train = pd.read_csv(input_dir+'train10_train_30perc_messages_texter_metadata.csv')
test = pd.read_csv(input_dir+'train10_test_15perc_messages_texter_metadata.csv')

In [None]:
train['abuse_physical'].value_counts()

In [None]:
# from huggingface_hub import login

# api_keys.huggingface



#TODO: move to api_keys  


# Models

"google/gemma-2b-it" 5GB

"google/gemma-7b-it" 10GB

"meta-llama/Llama-2-7b-chat-hf" 13.5GB

"meta-llama/Llama-2-13b-chat-hf"

"meta-llama/Llama-2-70b-chat-hf"


In [None]:
# !pip install auto_gptq

In [None]:
!nvidia-sim

In [None]:
!nvitop

In [None]:
# !pip install transformers==4.37.2

In [None]:
from importlib import reload

# reload(auto_gptq)

import auto_gptq
print(auto_gptq.__version__)
import torch
print(torch.__version__)
import transformers
print(transformers.__version__)
import accelerate
print(accelerate.__version__)


In [None]:

# AutoGPTQ: 0.2.1
# PyTorch: 2.1.0.dev20230520
# Transformers: 4.30.0.dev0
# Accelerate: 0.20.0.dev0

In [None]:
# !pip install auto_gptq==0.7

In [None]:
from transformers import AutoTokenizer, TextGenerationPipeline
from auto_gptq import AutoGPTQForCausalLM

MODEL = "TheBloke/WizardLM-7B-uncensored-GPTQ" # 4GB

import logging

logging.basicConfig(
    format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
)

device = "cuda:0"

tokenizer = AutoTokenizer.from_pretrained(MODEL, use_fast=True)
# download quantized model from Hugging Face Hub and load to the first GPU
model = AutoGPTQForCausalLM.from_quantized(MODEL,
#         model_basename=model_basename,
        device=device,
        use_safetensors=True,
        use_triton=False)

# inference with model.generate
prompt = "Tell me about AI"
prompt_template=f'''### Human: {prompt}
### Assistant:'''

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=256, min_new_tokens=100)
print(tokenizer.decode(output[0]))

In [None]:
from importlib import reload

reload(auto_gptq)

In [None]:
!pip install auto-gptq
!pip install optimum

In [None]:
!nvitop -1 

In [None]:
# !pip install --upgrade auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/
# !pip install optimum


In [None]:
!python -m torch.utils.collect_env


In [None]:
!python -m pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/main/quant_cuda-0.0.0-cp310-cp310-win_amd64.whl --force-reinstall

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name_or_path = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GPTQ"
# To use a different branch, change revision
# For example: revision="gptq-4bit-32g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)




prompt = "Write a story about llamas"
system_message = "You are a story writing assistant"
prompt_template=f'''<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant
'''

print("\n\n*** Generate:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=100)
print(tokenizer.decode(output[0]))

# # Inference can also be done using transformers' pipeline

# print("*** Pipeline:")
# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     max_new_tokens=512,
#     do_sample=True,
#     temperature=0.7,
#     top_p=0.95,
#     top_k=40,
#     repetition_penalty=1.1
# )

# print(pipe(prompt_template)[0]['generated_text'])

In [None]:
!pip uninstall -y auto-gptq
!git clone https://github.com/PanQiWei/AutoGPTQ
!cd AutoGPTQ
!git checkout v0.5.1
!pip install .

In [None]:
from transformers import AutoTokenizer, TextGenerationPipeline
from auto_gptq import AutoGPTQForCausalLM

MODEL = "TheBloke/open-llama-7b-open-instruct-GPTQ"
model_basename = "open-llama-7B-open-instruct-GPTQ-4bit-128g.no-act.order"

import logging

logging.basicConfig(
    format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
)

device = "cuda:0"

tokenizer = AutoTokenizer.from_pretrained(MODEL, use_fast=True)
# download quantized model from Hugging Face Hub and load to the first GPU
model = AutoGPTQForCausalLM.from_quantized(MODEL,
#         model_basename=model_basename,
        device=device,
        use_safetensors=True,
        use_triton=False)

# inference with model.generate
prompt = "Tell me about AI"
prompt_template=f'''### Human: {prompt}
### Assistant:'''

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=256, min_new_tokens=100)
print(tokenizer.decode(output[0]))

In [None]:
from transformers import AutoTokenizer, pipeline, logging
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
import argparse



use_triton = False

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
#         model_basename=model_basename,
        use_safetensors=True,
        trust_remote_code=True,
        device="cuda:0",
        use_triton=use_triton,
        quantize_config=None)

print("\n\n*** Generate:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
print(tokenizer.decode(output[0]))

In [None]:
error: metadata-generation-failed


In [None]:
# pip install accelerate
from transformers import AutoTokenizer, AutoModelForCausalLM
# from transformers import LlamaForCausalLM, LlamaTokenizer # llama
# import transformers # llama
# import torch

# TODO: See how they use it for text classification: (from probs or output layer directly?)
# https://colab.research.google.com/github/bigscience-workshop/petals/blob/main/examples/prompt-tuning-sst2.ipynb


# model_name = "google/gemma-2b-it"
model_name = "google/gemma-7b-it"
# model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
responses = []
if 'gemma' in model_name:
  # Gemma
  model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
  model = model.to(device)
elif 'llama' in model_name:
  # Have to restart session after updating transformers
  from transformers import AutoTokenizer, LlamaForCausalLM

  model = LlamaForCausalLM.from_pretrained(model_name)


  # # alternative
  # # Llama
  # pipeline = transformers.pipeline(
  #     "text-generation",
  #     model=model,
  #     torch_dtype=torch.float16,
  #     device_map="auto",
  # )

  # sequences = pipeline(
  #     prompt,
  #     do_sample=True,
  #     top_k=10,
  #     num_return_sequences=1,
  #     eos_token_id=tokenizer.eos_token_id,
  #     max_length=200,
  # )
  # for seq in sequences:
  #     print(f"Result: {seq['generated_text']}")












In [None]:
train[train['suicide']==1].sample(n=3)['message'].tolist()

In [None]:
max_length = 1000

document = "Help me please. Everything hurts. Why is it worse at night? God I hate everything. I can't really take it anymore. I'm so over it. I hate feeling like this, I don't WANT to feel like this. Thank you. My feelings.. Depression. I don't have clinical but lately I've been feeling like shit, especially at night. It really really is. I just want to be that happy optimistic person full of [scrubbed] that spreads confetti all over my friends and families lives. And I usually can be but I want to be like that all the time and I'm just so fucking tired of everything.. Yeah, that makes sense. It sounds about right. Yeah. Loneliness is something I feel a lot. I have amazing friends to vent to, and that I love and they love me, but I'm still really lonely. I guess confusion is a big feeling I feel as well. I'm lonely and confused. Yes. Drowning myself. Or stabbing myself. Yes I do. The Saturday coming up. No one will be home. Then I can call the cops and tell them I'm going to kill my self so my family doesn't have to see my body. Yes i am. About suicide? No. Your welcome.. Yeah. I don't need them to worry about me. Yeah I do. My family and friends are my world. My dad would. And my best friends [scrubbed] and [scrubbed]. That's something I wonder. Am I really their world. I'm kinda a bitch. Or I can be. Like I said, I try to be really upbeat, but I have some unflattering moments. If I'm their world? No. It seems a bit possessive. Well they're always here for me and willing to talk and let me vent, so that means a lot to me. Especially since we all know we can come to each other. I vent to them, they vent to me. Yeah, I think they do too. I'm not sure. Yes I think so. Maybe distractions. Like YouTube or something. Yeah I think so. I also write in my [scrubbed] a lot. That helps. The online chat thing sounds like it would be a good support system. Thank you so much. And yeah, I've reached out here multiple times. I'm feeling stable now.. That sounds good [scrubbed]. Thank you. Thank you so much."
construct = 'suicide'


In [None]:
# 'related to {construct}': <your_probability>, 'not related to {construct}': <your_probability>

In [None]:
%%time
prompt = """
You are a text classification assistant.

The text to classify:
```
{document}
```

Assign a probability for following labels and return in a JSON format:

'related to {construct} at any point': <your_probability>, 'not related to {construct}': <your_probability>

Do not provide additional text or explanations, just that JSON output.
"""


prompt = prompt.format(document = document, construct = construct)
print(prompt)

In [None]:
%%time 

responses = []
if 'gemma' in model_name:
  # Gemma
  input_ids = tokenizer(prompt, return_tensors="pt").to(device)
  outputs = model.generate(**input_ids, max_length = max_length)
  tokenizer.decode(outputs[0])
  # Find the length of the input_ids to know where the original prompt ends
  prompt_length = input_ids["input_ids"].shape[1]
  # Decode only the generated part, skipping the prompt
  response = tokenizer.decode(outputs[0][prompt_length:], skip_special_tokens=True)
  responses.append(response)
elif 'llama' in model_name:
  inputs = tokenizer(prompt, return_tensors="pt")

  # Generate
  generate_ids = model.generate(inputs.input_ids, max_length=max_length)
  response = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
  responses.append(response)


print(responses)

In [None]:
responses

In [None]:
# Have to restart session after updating transformers
from transformers import AutoTokenizer, LlamaForCausalLM

model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt")

# Generate
generate_ids = model.generate(inputs.input_ids, max_length=30)
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

In [None]:
%%time


In [None]:

eval(find_json_in_string(response))


In [None]:

a = '1'

print(a)

In [None]:

print(generated_text_only)

In [None]:
print(generated_text_only)==None

In [None]:
tokenizer.decode(outputs[0])


In [None]:
pip install transformers
huggingface-cli login
!pip -i install accelerate==0.27.2

In [None]:
from transformers import pipeline, Conversation
import torch

base_model_name = "bardsai/jaskier-7b-dpo-v5.6"
chatbot = pipeline("conversational", model=base_model_name, torch_dtype=torch.float16, device_map="auto")


In [None]:

print(conversation.messages[-1]["content"])

In [None]:
%%time
prompt = """
You are a text classification assistant.
The text to classify:
```
The table is round
```
Assign a probability for each possible label: 'loneliness' or 'not loneliness' and return in a JSON format
For instance, return this in JSON format:
'loneliness': <your_score>,
'not loneliness': <your_score>
Do not provide additional text or explanations, just the JSON output.
"""

conversation = Conversation(prompt)
conversation = chatbot(conversation)
print(conversation.messages[-1]["content"])


In [None]:

!pip install accelerate



In [None]:
!pip install --upgrade transformers==4.38.1

In [None]:

from transformers import AutoTokenizer, AutoModelForCausalLM



In [None]:
# Setup the environment
!pip install --upgrade huggingface_hub
!pip install git+https://github.com/huggingface/transformers -U
!pip install accelerate
!pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:

!pip install kaggle_secrets

In [None]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
access_token_read = UserSecretsClient().get_secret(api_keys.huggingface)
login(token = access_token_read)

In [None]:


from transformers import AutoTokenizer, AutoModelForCausalLM
# Load the model
tokenizer = AutoTokenizer.from_pretrained("/kaggle/input/gemma/transformers/2b/2")
model = AutoModelForCausalLM.from_pretrained("/kaggle/input/gemma/transformers/2b/2")


In [None]:
# Use the model
input_text = "What is the best thing about Kaggle?"
input_ids = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))