In [14]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
from accelerate import Accelerator
from peft import prepare_model_for_kbit_training, prepare_model_for_int8_training


# Initialize the tokenizer and model
#model_id = "mistralai/Mistral-7B-Instruct-v0.2"
model_id = "teknium/OpenHermes-2-Mistral-7B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
precision = "fp4"
path=f"N:\\AI\\text-generation-webui-main\\models\\teknium_OpenHermes-2-Mistral-7B\\"
#path=f"N:\\AI\\mistral-7B-instruct\\"

# if the model variable exists, delete it to free up memory before loading the new model
if 'model' in locals():
    model = None

if (precision == "fp16"):
    model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float16).to("cuda")
elif (precision == "fp8"):
    model = AutoModelForCausalLM.from_pretrained(path, load_in_8bit=True, device_map='cuda')
elif (precision == "fp4"):
    model = AutoModelForCausalLM.from_pretrained(path, load_in_4bit=True, device_map='cuda')

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [01:20<00:00, 40.27s/it]


In [7]:
# Example input text
additional_context = "[INST] The following is a conversation with an AI assistant. The assistant is helpful and concise. The assistant does not respond to the question, and only does as the question says. [/INST]"
input_text = "Please generate a single sentence with only the \"Mental Filters\" cognitive distortion, from the perspective of the person with the distortion."

# Generate outputs
def generate_response(text):
    try:
        # Append a prompt to the user's input
        inputs = tokenizer(text, return_tensors="pt").to("cuda")
        outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, use_cache=True, top_k=40, top_p=0.1, temperature=0.7, repetition_penalty=1.2, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, bos_token_id=tokenizer.bos_token_id)
        responseIn = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        #text = responseIn + "\n[INST] Please generate another sentence. [/INST]"
        #inputs = tokenizer(text, return_tensors="pt").to("cuda")
        #outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, use_cache=True, top_k=40, top_p=0.1, temperature=0.7, repetition_penalty=1.2, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, bos_token_id=tokenizer.bos_token_id)
        #responseIn = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Remove the prompt from the start of the response
        response = responseIn[len(text):]
    except Exception as e:
        response = "Sorry, I encountered an error. Please try again."
        print(e)
    return response

# Generate a response
#outputs = generate_response(additional_context+"\n"+"[INST] " + input_text + " [/INST]")

#for i in range(5):
#    outputs = generate_response(outputs+"\n"+"[INST] " + "Please generate another sentence with only the \"Mental Filters\" cognitive distortion." + " [/INST]")

# Decode the generated outputs
#print(outputs)

In [21]:
import pandas as pd

generate_data = [
                    ("Magnification", 5), 
                    ("Labeling", 5)
                ]

# Create a DataFrame from the history list
df = pd.DataFrame([], columns=["Distorted part","Dominant Distortion"])

for c in range(len(generate_data)):

    # Generate a response
    main_input = additional_context+"\n"+"[INST] " + input_text + " [/INST]"
    output = generate_response(main_input)


    # Create a list to store inputs and outputs
    history = []

    # Add the initial input and output to the history
    history.append((main_input, output))

    for i in range(generate_data[c][1]):
        # Generate a new response
        input_string = ""
        for j in range(len(history)):
            input_string += history[j][0] + "\n" + history[j][1] + "\n"
        input_string = input_string + "[INST] " + "Please generate another completely new sentence with only the \"" + generate_data[c][0] + "\" cognitive distortion." + " [/INST]\n"

        if input_string.__len__() > 8192:
            # Remove the string from the start of the end of the first string
            input_string = input_string[0:main_input.__len__()] + input_string[input_string.find("[INST]", main_input.__len__()+(input_string.__len__() - 8192)):]


        new_output = generate_response(input_string)

        if new_output[0] == '\n':
            new_output = new_output[1:]


        # Add the new input and output to the history
        history.append(("[INST] Please generate another completely new sentence with only the \"" + generate_data[c][0] + "\" cognitive distortion. [/INST]", new_output))

        # Concatenate the new row to the DataFrame
        df = pd.concat([df, pd.DataFrame({"Distorted part": [new_output], "Dominant Distortion": [generate_data[c][0]]})], ignore_index=True)

# save the dataframe to a csv file
df.to_csv("distorted_parts.csv", index=False)

In [33]:
import numpy as np

# function to generate a record based on 3 random records from the original data
def generate_record(original_data, input_text, distortion):
    distorion_data = original_data[original_data["Dominant Distortion"] == distortion]
    random_indices = np.random.choice(len(distorion_data), 3, replace=False)

    # Get the 3 random records
    rand_records = []
    for index in random_indices:
        rand_records.append(distorion_data.iloc[index]["Distorted part"])

    # Generate a response
    main_input = additional_context+"\n"+"[INST] " + input_text + " [/INST]\n\n" + rand_records[0] + "\n\n" + rand_records[1] + "\n\n" + rand_records[2] + "\n\n"

    output = generate_response(main_input)

    if len(output) == 0:
        output = "Sorry, I encountered an error. Please try again."

    if output[0] == '\n':
        output = output[1:]

    return generate_response(main_input)
    

In [2]:
pasta = [8+5+3.5+2+2+0.16+8+6.5+4]
pasta_people = [("me", 8), 
                ("josh f", 5), 
                ("josh a", 3.5), 
                ("rachel", 2), 
                ("emma", 0.16), 
                ("daniel", 8), 
                ("her", 2), 
                ("katrina", 6.5),
                ("nick", 4)]

print(sum(pasta)/len(pasta_people))

#me, josh f, josh a, her, rachel, emma, daniel, katrina
#                ("her", 2), 

4.351111111111111


In [34]:
import pandas as pd

data = pd.read_csv('Annotated_data.csv')
data = data.dropna()

# print the distortion types
print(data["Dominant Distortion"].unique())

inputText = "Please generate four similar sentences with only the \"Mental Filters\" cognitive distortion within it, from the perspective of the person with the distortion."
distortion = "Mental filter"

print(generate_record(data, inputText, distortion) + "\nEOF")

['Labeling' 'Fortune-telling' 'Mind Reading' 'Magnification'
 'Overgeneralization' 'Mental filter' 'Emotional Reasoning'
 'Personalization' 'All-or-nothing thinking' 'Should statements']
[INST] The following is a conversation with an AI assistant. The assistant is helpful and concise. The assistant does not respond to the question, and only does as the question says. [/INST]
[INST] Please generate four similar sentences with only the "Mental Filters" cognitive distortion within it, from the perspective of the person with the distortion. [/INST]

My current bf went out of town & kept blatantly choosing things other than me that were hurtful.oo drunk & the ex fingered me. I didn’t even kiss him. I stopped him before things went to far & had him get me an uber ride home.

I just started sophomore year and things suck. My parents get mad at me for every little detail of my life. I have okay grades and shitty friends. Worst of all I hate everything that is breathing, heck I hate object too.