In [None]:
!pip install -U transformers
!pip install bitsandbytes
!pip install accelerate



In [None]:
dir = "drive/MyDrive/state"

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

In [None]:
class Model:
    def __init__(self, model_name="mistral"):
        self.model_name = model_name
        self.tokenizer = None
        self.model = None

        # define configuration for quantization
        # useful for low memory requirement
        self.bnb_config = BitsAndBytesConfig(
            load_in_4bit=(model_name == "mixtral"), # store the model's information in tiny 4-bit pieces instead of the usual 32-bit chunks
            load_in_8bit=(model_name == "mistral"), # store the model's information in tiny 8-bit pieces
            bnb_4bit_compute_dtype=torch.float16 # do calculations using a slightly smaller data format called "float16" instead of the usual "float32"
        )

        if model_name == "mistral":
            self.get_mistral_model()
        else:
            self.get_mixtral_model()

    def get_mistral_model(self):
        model_name = "mistralai/Mistral-7B-v0.1"

        self.tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=os.path.join(dir, "mistral_data"))
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=self.bnb_config,
            device_map="auto",
            cache_dir=os.path.join(dir, "mistral_data")
        )

    def get_mixtral_model(self):
        model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"

        self.tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=os.path.join(dir, "mixtral_data"))
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=self.bnb_config,
            device_map="auto",
            cache_dir=os.path.join(dir, "mixtral_data")
        )

    def generate(self, prompt):
        input = self.tokenizer(prompt, return_tensors="pt").to(device)
        output = self.model.generate(**input, max_new_tokens=50)

        return self.tokenizer.decode(output[0], skip_special_tokens=(self.model_name == "mixtral"))

In [None]:
import re

In [None]:
def get_state(output, words):
    temp = output.lower().split(words)[-1]
    lines = temp.split("\n")
    state = ""

    for line in lines:
        if any(keyword in line for keyword in ["thought", "feeling", "action", "other"]):
            state += "\n" + line

            if "other" in line:
                break

    return state

In [None]:
def get_categories(output, words):
    categories = dict.fromkeys(['thought', 'feeling', 'action', 'other'], [])
    state = get_state(output, words)
    temp = re.split(r"\s?(thought|feeling|action|other)s?\W{2,}", state)

    for i in range(len(temp)):
        if any(keyword in temp[i] for keyword in ["thought", "feeling", "action", "other"]):
            categories[temp[i]] = [] if temp[i + 1] == 'none' else re.split(r"\s*,\s*", temp[i + 1].rstrip("\n"))

    return categories, state

In [None]:
def get_sentence(output, state, start_datetime, end_datetime):
    temp = re.split(state, output, re.IGNORECASE)[-1]
    lines = temp.split("\n")

    for line in lines:
        if "output" in line or "Output" in line:
            sentence = re.sub(r"[oO]utput\W+", '', line)

            return f"{start_datetime}, " + (f"{end_datetime}, " if end_datetime else '') + sentence

In [None]:
from datetime import datetime

In [None]:
class Task(Model):
    def __init__(self, model_name="mistral"):
        super().__init__(model_name)
        self.start_datetime = ''
        self.end_datetime = ''
        self.state = None

    def semantic_classification(self, words, start_datetime='', end_datetime=''):
        self.start_datetime = start_datetime if start_datetime else datetime.now().strftime("%H:%M %d/%m/%Y")
        self.end_datetime = end_datetime

        prompt = """You will be given a list of unordered words that describe various aspects of a person's state in their daily life. Your first objective is to review the words and connect them into coherent phrases that accurately represent daily life activities or states. Once you have identified potential phrases, categorize each phrase and any remaining individual words STRICTLY into ONE of the following categories with a focus on daily life context: "Thoughts," "Feelings," "Actions," or "Others." Follow these contextual guidelines:

1. Phrase Formation in Daily Life Context:
Look for connections between words that naturally fit together to describe daily life scenarios, states, or activities.
Create phrases that reflect these daily life experiences before categorization.

2. Categories Defined with Daily Life Focus:
Thoughts: Cognitive states or mental activities related to daily life.
Feelings: Emotions or moods one may experience throughout the day.
Actions: Physical activities or tasks are done as part of a routine.
Others: Words or phrases that do not represent thoughts, feelings, or actions in the context of daily life.

3. Categorizing with Context:
Determine the best category for each word or phrase, grounded in the context of an individual's day-to-day experience.

4. Contextual Examples:
Given "morning," "refreshed," and "yoga," create a phrase reflecting a morning routine, then categorize:
"refreshed" captures a feeling, categorized under "Feelings."
"morning yoga" is a daily activity, so it belongs in "Actions."

5. Ambiguity and Contextual Judgement:
For ambiguous situations, consider the context of daily life to guide the categorization; otherwise, use the "Others" category.

For Example:

1. Input: water, joy, plant, weeds, think, summer
Output:
Thoughts: think
Feelings: joy
Actions: water plants, remove weeds
Others: summer

2. Input: deadline, teamwork, report, finalize, schedule, results, feedback, analyze, implement, coordinate
Output:
Thoughts: analyze results
Feelings: none
Actions: finalize report by deadline, schedule and coordinate teamwork, and feedback to implement
Others: none

Based on the above provided explanation and examples find the categories for the following without creating any additional examples:

Input: {}
Output:""".format(words)
        categories, self.state = get_categories(self.generate(prompt), words)

        return categories

    def sent_generation(self):
        prompt = """Imagine you have a snapshot of a person's past moment, encompassing all the elements from their experience, which comprises their "thoughts", "feelings", "actions", and "environment/surroundings (others)". Your task is to weave together these elements into a single, coherent sentence in the past continuous tense. This sentence should reflect the essence of the moment, conveying the atmosphere and the emotions present. It is vital to include every provided category in your sentence to ensure a complete and vivid depiction of the scene. Your ability to understand nuanced human behavior and context should help you craft a sentence that is both engaging and true to life.

Input:
thoughts: reminiscing about childhood
feelings: nostalgic, content
actions: none
others: none
Output: You were feeling nostalgic and content, reminiscing about your childhood.

Input:
thoughts: outlining a presentation
feelings: focused, calm
actions: creating slides
others: soft jazz music, dim desk lamp
Output: Calmly focused, you were outlining a presentation, creating slides under dim light with jazz music softly playing.

Based on the above-provided explanation and examples create the sentence for the following without creating any additional examples:

Input:{}
Output:""".format(self.state)

        return get_sentence(self.generate(prompt), self.state, self.start_datetime, self.end_datetime)

In [None]:
from pprint import pprint

In [None]:
words = "happy, cheese, lunch"

In [None]:
task = Task(model_name="mistral") # two models: "mistral" and "mixtral"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
task.semantic_classification(words)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'thought': [],
 'feeling': ['happy'],
 'action': ['eat lunch'],
 'other': ['cheese']}

In [None]:
task.sent_generation()

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'10:21 11/03/2024, You were happy, eating lunch with cheese.'