In [1]:
import re
import torch
import nltk
import pandas as pd
import transformers
from tqdm import tqdm
from datasets import Dataset
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from peft import LoraConfig, get_peft_model
from optimum.quanto import qint4, quantize
from transformers import TrainingArguments, Trainer
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, QuantoConfig, AutoModelForCausalLM, set_seed, AutoConfig
import warnings
warnings.filterwarnings('ignore')

In [2]:
import llama_cpp
import ollama_python

In [3]:
from llama_cpp import Llama

# Llama

In [166]:
model = Llama(model_path='llama/llama-2-7b.Q4_0.gguf', n_ctx=512)

llama_model_load_from_file_impl: using device Metal (AMD Radeon Pro 555X) - 4087 MiB free
llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama/llama-2-7b.Q4_0.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
lla

In [36]:
tokenizer = model.tokenizer()

In [44]:
s = "What is Artificial Intelligence?"
enc_s = s.encode("utf-8")

In [85]:
enc_s

b'What is Artificial Intelligence?'

In [169]:
token = tokenizer.tokenize(enc_s)

prompt = """Generate a social media post in a descriptive format for a business in 1000 words just like this example
            Example - {Smart farming is here! With IoT, farmers can boost crop yields, save water, cut labor costs, and even monitor 
            livestock health—all in real-time. \nDiscover how technology is transforming agriculture for a more sustainable future.}
            Context - {Platform : Facebook
                      Theme : Product launch of a fitness software
                      Target Audience : Health-conscious people
                      Tone : Friendly
                      Language : English}
            """

output = model(prompt=prompt, 
               top_k=85, 
               top_p=0.95, 
               temperature=0.5, 
               max_tokens=1024, 
               stop=["Q:"],
               seed=42)



Llama.generate: 22 prefix-match hit, remaining 122 prompt tokens to eval
llama_perf_context_print:        load time =    6590.65 ms
llama_perf_context_print: prompt eval time =    4026.45 ms /   122 tokens (   33.00 ms per token,    30.30 tokens per second)
llama_perf_context_print:        eval time =   54173.74 ms /   367 runs   (  147.61 ms per token,     6.77 tokens per second)
llama_perf_context_print:       total time =   58404.50 ms /   489 tokens


In [67]:
tokenizer.decode(token)

'What is Artificial Intelligence?'

In [170]:
print(output['choices'][0]['text'])


            </p>
            <p>
            Example - {Smart farming is here! With IoT, farmers can boost crop yields, save water, cut labor costs, and even monitor 
            livestock health—all in real-time. 
Discover how technology is transforming agriculture for a more sustainable future.}
            Context - {Platform : Facebook
                      Theme : Product launch of a fitness software
                      Target Audience : Health-conscious people
                      Tone : Friendly
                      Language : English}
            
            </p>
            <p>
            Example - {Smart farming is here! With IoT, farmers can boost crop yields, save water, cut labor costs, and even monitor 
            livestock health—all in real-time. 
Discover how technology is transforming agriculture for a more sustainable future.}
            Context - {Platform : Facebook
                      Theme : Product launch of a fitness software
                      Ta

In [142]:
print(output["choices"][0]["text"])


            [Facebook Post]
            Hello, {FirstName} {LastName}!
            We’re excited to announce the launch of our new product, {ProductName}. It’s a {ProductType} that helps {AudienceGoal} achieve their health and fitness goals.
            {ProductName} is available now on {PlatformName} for {Price}.
            {Link}
            [Link]
            [Facebook Post]
            Hello, {FirstName} {LastName}!
            We’re excited to announce the launch of our new product, {ProductName}. It’s a {ProductType} that helps {AudienceGoal} achieve their health and fitness goals.
            {ProductName} is available now on {PlatformName} for {Price}.
            {Link}
            [Link]
            Generate social media post in a paragraph
            Context - {Platform : Instagram
                      Theme : Product launch of a fitness software
                      Target Audience : Health-conscious people
                      Tone : Friendly
                      La

# Using Falcon-gguf

In [4]:
model = Llama(model_path='falcon/Falcon3-1B-Instruct-q4_0.gguf', n_ctx=512)

llama_model_load_from_file_impl: using device Metal (AMD Radeon Pro 555X) - 4096 MiB free
llama_model_loader: loaded meta data with 38 key-value pairs and 165 tensors from falcon/Falcon3-1B-Instruct-q4_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Falcon3 1B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Falcon3
llama_model_loader: - kv   5:                         general.size_label str              = 1B
llama_model_loader: - kv   6:                            gener

In [57]:
model1 = Llama(model_path='falcon3-1b.gguf', n_ctx=512)

llama_model_load_from_file_impl: using device Metal (AMD Radeon Pro 555X) - 17592186041920 MiB free
llama_model_loader: loaded meta data with 31 key-value pairs and 165 tensors from falcon3-1b.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Falcon3 1B Instruct
llama_model_loader: - kv   3:                       general.organization str              = Tiiuae
llama_model_loader: - kv   4:                           general.finetune str              = Instruct
llama_model_loader: - kv   5:                           general.basename str              = Falcon3
llama_model_loader: - kv   6:                         general.size_la

In [54]:
model1.save_pretrained("falcon3")

In [56]:
tokenizer.save_pretrained("falcon3")

('falcon3/tokenizer_config.json',
 'falcon3/special_tokens_map.json',
 'falcon3/tokenizer.json')

In [58]:
# token = tokenizer.tokenize(enc_s)
import time
prompt = """Generate a social media post in a descriptive format for a business in 500 words just like this example
            Context - {Platform : Facebook
                      Theme : Product launch of a fitness software
                      Target Audience : Health-conscious people
                      Tone : Friendly
                      Language : English}
            """
start_tim = time.time()
output = model1(prompt=prompt, 
               top_k=85, 
               top_p=0.95, 
               temperature=0.5, 
               max_tokens=1024, 
               stop=["Q:"],
               seed=42)
end_tim = time.time()
print(f"total time : {end_tim-start_tim}")
print(output['choices'][0]['text'])

llama_perf_context_print:        load time =    4180.84 ms
llama_perf_context_print: prompt eval time =    4178.58 ms /    70 tokens (   59.69 ms per token,    16.75 tokens per second)
llama_perf_context_print:        eval time =   41407.00 ms /   381 runs   (  108.68 ms per token,     9.20 tokens per second)
llama_perf_context_print:       total time =   46035.47 ms /   451 tokens


total time : 46.04177784919739
<|assistant|>
**Title: Join the Movement with Our New Fitness Software! 🚀✨

Hey health-conscious friends, 🌟 It's time to embark on a journey towards a healthier you with our latest product launch: FitLifeFit! 🌟

Introducing FitLifeFit, the ultimate companion for your fitness journey. Designed to make staying active a breeze, this innovative fitness software offers personalized workouts, nutrition plans, and motivational support tailored to your unique goals. Whether you're a seasoned athlete or just starting your fitness adventure, FitLifeFit is here to guide you every step of the way.

**Why Choose FitLifeFit?**

- **Personalized Workouts:** Our advanced algorithms analyze your fitness level and preferences to create a workout plan that's just for you.
- **Nutrition Guidance:** Effortlessly track your calorie intake and macronutrient ratios to fuel your workouts and achieve your fitness goals.
- **Motivation and Accountability:** Join our community of li

In [12]:
# token = tokenizer.tokenize(enc_s)
import time
# prompt = """Generate a social media post in a descriptive format for a business in 500 words just like this example
#             Context - {Platform : Facebook
#                       Theme : Product launch of a fitness software
#                       Target Audience : Health-conscious people
#                       Tone : Friendly
#                       Language : English}
#             """
prompt = """Generate a high-quality, engaging social media post for a business in a descriptive format. Follow the example structure and ensure clarity, creativity, and audience engagement.

Context:
- Platform: Facebook  
- Theme: Product Launch of a fitness software called FitTune
- Target Audience: fitness enthusiasts  
- Tone: Friendly
- Language: English  
- Word Limit: 500 words  

Requirements:
- Craft a compelling opening that grabs attention.  
- Highlight key details about the business or occasion.  
- Maintain a consistent and engaging tone throughout.  
- Use persuasive language and storytelling where applicable.  
- Include a strong call to action (CTA) to encourage engagement.  


Now, generate a social media post using the provided context.
"""

start_tim = time.time()
output = model(prompt=prompt, 
               top_k=85, 
               top_p=0.95, 
               temperature=0.5, 
               max_tokens=1024,
               seed=42)
end_tim = time.time()
print(f"total time : {end_tim-start_tim}")
print(output['choices'][0]['text'])

Llama.generate: 53 prefix-match hit, remaining 115 prompt tokens to eval
llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     705.39 ms /   115 tokens (    6.13 ms per token,   163.03 tokens per second)
llama_perf_context_print:        eval time =   11703.61 ms /   343 runs   (   34.12 ms per token,    29.31 tokens per second)
llama_perf_context_print:       total time =   12691.54 ms /   458 tokens


total time : 12.694891929626465
<|assistant|>
**Title: "Unleash the Power of FitTune: Your New Fitness Companion in a Digital Age!"**

**Post:**

🌟 Introducing FitTune, your new digital fitness companion! 🚀

Say goodbye to gym fatigue and hello to a personalized fitness journey with FitTune. This innovative software is designed to make your fitness routine a seamless part of your daily life. 

**Key Features:**

- **Personalized Workouts:** Tailor your fitness routine to your unique needs and goals.
- **Track Progress:** Monitor your progress with real-time data and insights.
- **Interactive Challenges:** Engage with a community of like-minded fitness enthusiasts.
- **Easy Integration:** Seamlessly integrate FitTune into your existing fitness routine.

**Why Choose FitTune?**

- **Convenience:** Access your workouts from anywhere at any time.
- **Expert Guidance:** Expert trainers create workouts for you, tailored to your schedule.
- **Community Support:** Connect with a community of f

In [41]:
import time
# prompt = """Generate a social media post in a descriptive format for a business in 500 words just like this example
#             Context - {Platform : Facebook
#                       Theme : Product launch of a fitness software
#                       Target Audience : Health-conscious people
#                       Tone : Friendly
#                       Language : English}
#             """
df = pd.read_csv('../Sentiment_Analysis/data/test_data.csv')
predictions = []

for sentence in list(df['Text']):
    prompt = f"""State weather sentiment of the given sentence is positive or negative:
    Sentence - "{sentence}"
    Constraints - Answer in 1 word (Positive or Negative)
    """
    
    start_tim = time.time()
    output = model(prompt=prompt, 
                   top_k=85, 
                   top_p=0.95, 
                   temperature=0.5, 
                   max_tokens=1024,
                   seed=42)
    end_tim = time.time()
    print(f"total time : {end_tim-start_tim}")
    print(output['choices'][0]['text'])
    if (len(output['choices'][0]['text'].split('\n')) == 2):
        predictions.append(output['choices'][0]['text'].split('\n')[1])
    else:
        predictions.append(output['choices'][0]['text'].split('\n')[2])

Llama.generate: 20 prefix-match hit, remaining 36 prompt tokens to eval
llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     278.07 ms /    36 tokens (    7.72 ms per token,   129.47 tokens per second)
llama_perf_context_print:        eval time =     228.58 ms /     7 runs   (   32.65 ms per token,    30.62 tokens per second)
llama_perf_context_print:       total time =     510.57 ms /    43 tokens
Llama.generate: 20 prefix-match hit, remaining 35 prompt tokens to eval


total time : 0.5134577751159668

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     252.95 ms /    35 tokens (    7.23 ms per token,   138.37 tokens per second)
llama_perf_context_print:        eval time =     214.28 ms /     6 runs   (   35.71 ms per token,    28.00 tokens per second)
llama_perf_context_print:       total time =     470.87 ms /    41 tokens
Llama.generate: 20 prefix-match hit, remaining 34 prompt tokens to eval


total time : 0.47275280952453613
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     240.75 ms /    34 tokens (    7.08 ms per token,   141.22 tokens per second)
llama_perf_context_print:        eval time =     242.29 ms /     7 runs   (   34.61 ms per token,    28.89 tokens per second)
llama_perf_context_print:       total time =     486.83 ms /    41 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.4887418746948242

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     192.33 ms /    32 tokens (    6.01 ms per token,   166.38 tokens per second)
llama_perf_context_print:        eval time =     260.30 ms /     7 runs   (   37.19 ms per token,    26.89 tokens per second)
llama_perf_context_print:       total time =     456.60 ms /    39 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.45879197120666504

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     214.65 ms /    32 tokens (    6.71 ms per token,   149.08 tokens per second)
llama_perf_context_print:        eval time =     254.92 ms /     7 runs   (   36.42 ms per token,    27.46 tokens per second)
llama_perf_context_print:       total time =     473.46 ms /    39 tokens
Llama.generate: 20 prefix-match hit, remaining 34 prompt tokens to eval


total time : 0.47634291648864746

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     255.60 ms /    34 tokens (    7.52 ms per token,   133.02 tokens per second)
llama_perf_context_print:        eval time =     318.82 ms /     6 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_perf_context_print:       total time =     579.47 ms /    40 tokens
Llama.generate: 20 prefix-match hit, remaining 30 prompt tokens to eval


total time : 0.5819852352142334
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     262.52 ms /    30 tokens (    8.75 ms per token,   114.28 tokens per second)
llama_perf_context_print:        eval time =     294.47 ms /     7 runs   (   42.07 ms per token,    23.77 tokens per second)
llama_perf_context_print:       total time =     561.99 ms /    37 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.5644137859344482

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     184.87 ms /    32 tokens (    5.78 ms per token,   173.09 tokens per second)
llama_perf_context_print:        eval time =     246.01 ms /     7 runs   (   35.14 ms per token,    28.45 tokens per second)
llama_perf_context_print:       total time =     434.91 ms /    39 tokens
Llama.generate: 20 prefix-match hit, remaining 31 prompt tokens to eval


total time : 0.43726611137390137

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     376.58 ms /    31 tokens (   12.15 ms per token,    82.32 tokens per second)
llama_perf_context_print:        eval time =     356.47 ms /     7 runs   (   50.92 ms per token,    19.64 tokens per second)
llama_perf_context_print:       total time =     738.77 ms /    38 tokens
Llama.generate: 20 prefix-match hit, remaining 30 prompt tokens to eval


total time : 0.7411069869995117

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     329.23 ms /    30 tokens (   10.97 ms per token,    91.12 tokens per second)
llama_perf_context_print:        eval time =     294.21 ms /     6 runs   (   49.03 ms per token,    20.39 tokens per second)
llama_perf_context_print:       total time =     627.15 ms /    36 tokens
Llama.generate: 20 prefix-match hit, remaining 28 prompt tokens to eval


total time : 0.6297709941864014
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     261.46 ms /    28 tokens (    9.34 ms per token,   107.09 tokens per second)
llama_perf_context_print:        eval time =     247.96 ms /     7 runs   (   35.42 ms per token,    28.23 tokens per second)
llama_perf_context_print:       total time =     514.33 ms /    35 tokens
Llama.generate: 20 prefix-match hit, remaining 27 prompt tokens to eval


total time : 0.5162529945373535

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     243.45 ms /    27 tokens (    9.02 ms per token,   110.90 tokens per second)
llama_perf_context_print:        eval time =     217.38 ms /     6 runs   (   36.23 ms per token,    27.60 tokens per second)
llama_perf_context_print:       total time =     464.45 ms /    33 tokens
Llama.generate: 20 prefix-match hit, remaining 29 prompt tokens to eval


total time : 0.46715807914733887
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     194.12 ms /    29 tokens (    6.69 ms per token,   149.40 tokens per second)
llama_perf_context_print:        eval time =     231.88 ms /     6 runs   (   38.65 ms per token,    25.88 tokens per second)
llama_perf_context_print:       total time =     429.88 ms /    35 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.43175220489501953
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     154.48 ms /    32 tokens (    4.83 ms per token,   207.15 tokens per second)
llama_perf_context_print:        eval time =     194.66 ms /     6 runs   (   32.44 ms per token,    30.82 tokens per second)
llama_perf_context_print:       total time =     352.32 ms /    38 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.35472726821899414
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     154.28 ms /    32 tokens (    4.82 ms per token,   207.41 tokens per second)
llama_perf_context_print:        eval time =     195.97 ms /     6 runs   (   32.66 ms per token,    30.62 tokens per second)
llama_perf_context_print:       total time =     353.81 ms /    38 tokens
Llama.generate: 20 prefix-match hit, remaining 29 prompt tokens to eval


total time : 0.3559260368347168
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     256.35 ms /    29 tokens (    8.84 ms per token,   113.13 tokens per second)
llama_perf_context_print:        eval time =     305.44 ms /     7 runs   (   43.63 ms per token,    22.92 tokens per second)
llama_perf_context_print:       total time =     566.94 ms /    36 tokens
Llama.generate: 20 prefix-match hit, remaining 27 prompt tokens to eval


total time : 0.5711259841918945

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     433.64 ms /    27 tokens (   16.06 ms per token,    62.26 tokens per second)
llama_perf_context_print:        eval time =     265.12 ms /     6 runs   (   44.19 ms per token,    22.63 tokens per second)
llama_perf_context_print:       total time =     703.89 ms /    33 tokens
Llama.generate: 20 prefix-match hit, remaining 30 prompt tokens to eval


total time : 0.706244945526123
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     186.73 ms /    30 tokens (    6.22 ms per token,   160.66 tokens per second)
llama_perf_context_print:        eval time =     294.26 ms /     7 runs   (   42.04 ms per token,    23.79 tokens per second)
llama_perf_context_print:       total time =     485.34 ms /    37 tokens
Llama.generate: 20 prefix-match hit, remaining 31 prompt tokens to eval


total time : 0.48726415634155273

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     280.19 ms /    31 tokens (    9.04 ms per token,   110.64 tokens per second)
llama_perf_context_print:        eval time =     266.94 ms /     6 runs   (   44.49 ms per token,    22.48 tokens per second)
llama_perf_context_print:       total time =     551.26 ms /    37 tokens
Llama.generate: 20 prefix-match hit, remaining 30 prompt tokens to eval


total time : 0.5542430877685547
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     279.11 ms /    30 tokens (    9.30 ms per token,   107.49 tokens per second)
llama_perf_context_print:        eval time =     236.26 ms /     6 runs   (   39.38 ms per token,    25.40 tokens per second)
llama_perf_context_print:       total time =     519.13 ms /    36 tokens
Llama.generate: 20 prefix-match hit, remaining 30 prompt tokens to eval


total time : 0.5214440822601318
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     280.52 ms /    30 tokens (    9.35 ms per token,   106.95 tokens per second)
llama_perf_context_print:        eval time =     248.09 ms /     7 runs   (   35.44 ms per token,    28.22 tokens per second)
llama_perf_context_print:       total time =     532.67 ms /    37 tokens
Llama.generate: 20 prefix-match hit, remaining 28 prompt tokens to eval


total time : 0.5345220565795898

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     191.04 ms /    28 tokens (    6.82 ms per token,   146.57 tokens per second)
llama_perf_context_print:        eval time =     270.88 ms /     7 runs   (   38.70 ms per token,    25.84 tokens per second)
llama_perf_context_print:       total time =     466.23 ms /    35 tokens
Llama.generate: 20 prefix-match hit, remaining 35 prompt tokens to eval


total time : 0.4684150218963623

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     251.03 ms /    35 tokens (    7.17 ms per token,   139.42 tokens per second)
llama_perf_context_print:        eval time =     257.75 ms /     7 runs   (   36.82 ms per token,    27.16 tokens per second)
llama_perf_context_print:       total time =     512.61 ms /    42 tokens
Llama.generate: 20 prefix-match hit, remaining 30 prompt tokens to eval


total time : 0.5147252082824707

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     195.59 ms /    30 tokens (    6.52 ms per token,   153.38 tokens per second)
llama_perf_context_print:        eval time =     205.42 ms /     6 runs   (   34.24 ms per token,    29.21 tokens per second)
llama_perf_context_print:       total time =     404.38 ms /    36 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.4062037467956543
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     185.10 ms /    32 tokens (    5.78 ms per token,   172.88 tokens per second)
llama_perf_context_print:        eval time =     264.91 ms /     7 runs   (   37.84 ms per token,    26.42 tokens per second)
llama_perf_context_print:       total time =     453.92 ms /    39 tokens
Llama.generate: 20 prefix-match hit, remaining 34 prompt tokens to eval


total time : 0.45629024505615234

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     222.70 ms /    34 tokens (    6.55 ms per token,   152.67 tokens per second)
llama_perf_context_print:        eval time =     203.95 ms /     6 runs   (   33.99 ms per token,    29.42 tokens per second)
llama_perf_context_print:       total time =     430.17 ms /    40 tokens
Llama.generate: 20 prefix-match hit, remaining 33 prompt tokens to eval


total time : 0.4327661991119385
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     205.33 ms /    33 tokens (    6.22 ms per token,   160.72 tokens per second)
llama_perf_context_print:        eval time =     198.51 ms /     6 runs   (   33.09 ms per token,    30.23 tokens per second)
llama_perf_context_print:       total time =     407.07 ms /    39 tokens
Llama.generate: 20 prefix-match hit, remaining 26 prompt tokens to eval


total time : 0.40944719314575195
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     191.98 ms /    26 tokens (    7.38 ms per token,   135.43 tokens per second)
llama_perf_context_print:        eval time =     211.65 ms /     6 runs   (   35.27 ms per token,    28.35 tokens per second)
llama_perf_context_print:       total time =     407.50 ms /    32 tokens
Llama.generate: 20 prefix-match hit, remaining 28 prompt tokens to eval


total time : 0.40999484062194824
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     191.86 ms /    28 tokens (    6.85 ms per token,   145.94 tokens per second)
llama_perf_context_print:        eval time =     230.61 ms /     6 runs   (   38.43 ms per token,    26.02 tokens per second)
llama_perf_context_print:       total time =     425.81 ms /    34 tokens
Llama.generate: 20 prefix-match hit, remaining 31 prompt tokens to eval


total time : 0.4281191825866699
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     233.81 ms /    31 tokens (    7.54 ms per token,   132.59 tokens per second)
llama_perf_context_print:        eval time =     206.89 ms /     6 runs   (   34.48 ms per token,    29.00 tokens per second)
llama_perf_context_print:       total time =     444.24 ms /    37 tokens
Llama.generate: 20 prefix-match hit, remaining 29 prompt tokens to eval


total time : 0.4461989402770996
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     192.89 ms /    29 tokens (    6.65 ms per token,   150.34 tokens per second)
llama_perf_context_print:        eval time =     265.02 ms /     6 runs   (   44.17 ms per token,    22.64 tokens per second)
llama_perf_context_print:       total time =     461.89 ms /    35 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.4637451171875
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     191.09 ms /    32 tokens (    5.97 ms per token,   167.46 tokens per second)
llama_perf_context_print:        eval time =     226.72 ms /     6 runs   (   37.79 ms per token,    26.46 tokens per second)
llama_perf_context_print:       total time =     421.46 ms /    38 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.4237391948699951
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     191.36 ms /    32 tokens (    5.98 ms per token,   167.23 tokens per second)
llama_perf_context_print:        eval time =     239.24 ms /     7 runs   (   34.18 ms per token,    29.26 tokens per second)
llama_perf_context_print:       total time =     434.13 ms /    39 tokens
Llama.generate: 20 prefix-match hit, remaining 33 prompt tokens to eval


total time : 0.43601512908935547

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     198.82 ms /    33 tokens (    6.02 ms per token,   165.98 tokens per second)
llama_perf_context_print:        eval time =     263.93 ms /     7 runs   (   37.70 ms per token,    26.52 tokens per second)
llama_perf_context_print:       total time =     466.76 ms /    40 tokens
Llama.generate: 20 prefix-match hit, remaining 30 prompt tokens to eval


total time : 0.4691588878631592

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     226.80 ms /    30 tokens (    7.56 ms per token,   132.28 tokens per second)
llama_perf_context_print:        eval time =     209.75 ms /     6 runs   (   34.96 ms per token,    28.61 tokens per second)
llama_perf_context_print:       total time =     439.82 ms /    36 tokens
Llama.generate: 20 prefix-match hit, remaining 27 prompt tokens to eval


total time : 0.4421529769897461
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     189.09 ms /    27 tokens (    7.00 ms per token,   142.79 tokens per second)
llama_perf_context_print:        eval time =     218.52 ms /     6 runs   (   36.42 ms per token,    27.46 tokens per second)
llama_perf_context_print:       total time =     410.97 ms /    33 tokens
Llama.generate: 20 prefix-match hit, remaining 27 prompt tokens to eval


total time : 0.4128990173339844
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     186.46 ms /    27 tokens (    6.91 ms per token,   144.80 tokens per second)
llama_perf_context_print:        eval time =     206.34 ms /     6 runs   (   34.39 ms per token,    29.08 tokens per second)
llama_perf_context_print:       total time =     396.04 ms /    33 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.3982357978820801
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     221.63 ms /    32 tokens (    6.93 ms per token,   144.38 tokens per second)
llama_perf_context_print:        eval time =     233.83 ms /     6 runs   (   38.97 ms per token,    25.66 tokens per second)
llama_perf_context_print:       total time =     459.49 ms /    38 tokens
Llama.generate: 20 prefix-match hit, remaining 33 prompt tokens to eval


total time : 0.4617478847503662
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     202.14 ms /    33 tokens (    6.13 ms per token,   163.25 tokens per second)
llama_perf_context_print:        eval time =     253.75 ms /     7 runs   (   36.25 ms per token,    27.59 tokens per second)
llama_perf_context_print:       total time =     459.86 ms /    40 tokens
Llama.generate: 20 prefix-match hit, remaining 34 prompt tokens to eval


total time : 0.46192407608032227

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     223.74 ms /    34 tokens (    6.58 ms per token,   151.96 tokens per second)
llama_perf_context_print:        eval time =     239.62 ms /     7 runs   (   34.23 ms per token,    29.21 tokens per second)
llama_perf_context_print:       total time =     467.36 ms /    41 tokens
Llama.generate: 20 prefix-match hit, remaining 27 prompt tokens to eval


total time : 0.4694688320159912

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     219.80 ms /    27 tokens (    8.14 ms per token,   122.84 tokens per second)
llama_perf_context_print:        eval time =     228.41 ms /     6 runs   (   38.07 ms per token,    26.27 tokens per second)
llama_perf_context_print:       total time =     451.65 ms /    33 tokens
Llama.generate: 20 prefix-match hit, remaining 33 prompt tokens to eval


total time : 0.45375490188598633
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     236.44 ms /    33 tokens (    7.16 ms per token,   139.57 tokens per second)
llama_perf_context_print:        eval time =     201.06 ms /     6 runs   (   33.51 ms per token,    29.84 tokens per second)
llama_perf_context_print:       total time =     440.89 ms /    39 tokens
Llama.generate: 20 prefix-match hit, remaining 31 prompt tokens to eval


total time : 0.44286298751831055
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     224.46 ms /    31 tokens (    7.24 ms per token,   138.11 tokens per second)
llama_perf_context_print:        eval time =     243.58 ms /     7 runs   (   34.80 ms per token,    28.74 tokens per second)
llama_perf_context_print:       total time =     471.98 ms /    38 tokens
Llama.generate: 20 prefix-match hit, remaining 32 prompt tokens to eval


total time : 0.47393798828125

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     221.60 ms /    32 tokens (    6.92 ms per token,   144.41 tokens per second)
llama_perf_context_print:        eval time =     225.72 ms /     6 runs   (   37.62 ms per token,    26.58 tokens per second)
llama_perf_context_print:       total time =     450.79 ms /    38 tokens
Llama.generate: 20 prefix-match hit, remaining 27 prompt tokens to eval


total time : 0.4536261558532715
<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     191.86 ms /    27 tokens (    7.11 ms per token,   140.72 tokens per second)
llama_perf_context_print:        eval time =     239.85 ms /     7 runs   (   34.26 ms per token,    29.18 tokens per second)
llama_perf_context_print:       total time =     435.95 ms /    34 tokens
Llama.generate: 20 prefix-match hit, remaining 35 prompt tokens to eval


total time : 0.4382948875427246

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     238.54 ms /    35 tokens (    6.82 ms per token,   146.73 tokens per second)
llama_perf_context_print:        eval time =     269.72 ms /     7 runs   (   38.53 ms per token,    25.95 tokens per second)
llama_perf_context_print:       total time =     512.72 ms /    42 tokens
Llama.generate: 20 prefix-match hit, remaining 29 prompt tokens to eval


total time : 0.514937162399292

<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     228.37 ms /    29 tokens (    7.87 ms per token,   126.99 tokens per second)
llama_perf_context_print:        eval time =     234.65 ms /     7 runs   (   33.52 ms per token,    29.83 tokens per second)
llama_perf_context_print:       total time =     466.98 ms /    36 tokens
Llama.generate: 20 prefix-match hit, remaining 33 prompt tokens to eval


total time : 0.46894311904907227

<|assistant|>
Negative


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     213.35 ms /    33 tokens (    6.47 ms per token,   154.67 tokens per second)
llama_perf_context_print:        eval time =     212.25 ms /     6 runs   (   35.38 ms per token,    28.27 tokens per second)
llama_perf_context_print:       total time =     429.04 ms /    39 tokens
Llama.generate: 20 prefix-match hit, remaining 30 prompt tokens to eval


total time : 0.4313938617706299
<|assistant|>
Positive


llama_perf_context_print:        load time =    1604.91 ms
llama_perf_context_print: prompt eval time =     226.84 ms /    30 tokens (    7.56 ms per token,   132.25 tokens per second)
llama_perf_context_print:        eval time =     208.53 ms /     6 runs   (   34.76 ms per token,    28.77 tokens per second)
llama_perf_context_print:       total time =     438.77 ms /    36 tokens


total time : 0.44115400314331055
<|assistant|>
Negative


In [38]:
"\n<|assistant|>\nNegative".split("\n")

['', '<|assistant|>', 'Negative']

In [44]:
predictions[0].lower()

'negative'

In [46]:
predictions = [i.lower() for i in predictions]

In [48]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

acc_score = accuracy_score(list(df['Sentiment']), predictions)
report = classification_report(list(df['Sentiment']), predictions)
matrix = confusion_matrix(list(df['Sentiment']), predictions)

print(acc_score)
print(report)
print(matrix)

0.9795918367346939
              precision    recall  f1-score   support

    negative       1.00      0.96      0.98        26
    positive       0.96      1.00      0.98        23

    accuracy                           0.98        49
   macro avg       0.98      0.98      0.98        49
weighted avg       0.98      0.98      0.98        49

[[25  1]
 [ 0 23]]


# Using Falcon

In [4]:
# quantization_config = QuantoConfig(weights='int')
model = AutoModelForCausalLM.from_pretrained("tiiuae/Falcon3-1B-Instruct")
                                             # quantization_config=quantization_config,
                                             # low_cpu_mem_usage=True)
                                             # device_map='mps',
                                             # low_cpu_mem_usage=True

In [5]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(131072, 2048)
    (layers): ModuleList(
      (0-17): 18 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (v_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
      )
    )
    (norm

In [12]:
quantize

<function optimum.quanto.quantize.quantize(model, modules=None, **kwargs)>

In [13]:
tokenizer = AutoTokenizer.from_pretrained("tiiuae/Falcon3-1B-Instruct")

In [9]:
import os
os.environ["TOKENIZERS_PARALLELISM"]="false"

In [8]:
type(model)

transformers.models.llama.modeling_llama.LlamaForCausalLM

In [14]:
import time

set_seed(42)
prompt = """Generate a social media post in a descriptive format for a business in 1000 words just like this example
            Context - {Platform : Facebook
                      Theme : Product launch of a fitness software
                      Target Audience : Health-conscious people
                      Tone : Friendly
                      Language : English}
            """
tok_tim = time.time()
tokens = tokenizer(prompt, return_tensors='pt')

# tokens = {k: v.to('mps') for k, v in tokens.items()}
tok_tim_end = time.time()
print(f"tokenization time : {tok_tim_end-tok_tim}")

output = model.generate(**tokens,
                        max_new_tokens=500,
                        do_sample=True,
                        top_k=50,
                        top_p=0.95,
                        num_return_sequences=3)
gen_time = time.time()
print(f"generation time : {gen_time-tok_tim_end}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


tokenization time : 0.0014140605926513672
generation time : 166.245374917984


In [15]:
# Latest generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

Generate a social media post in a descriptive format for a business in 1000 words just like this example
            Context - {Platform : Facebook
                      Theme : Product launch of a fitness software
                      Target Audience : Health-conscious people
                      Tone : Friendly
                      Language : English}
            <|assistant|>
# Introducing Our New Fitness Software, "FitFrenzy": Revolutionizing Your Health Journey!

🌟 Dive into a future where fitness feels like fun, not a chore! 🏊✨

Introducing FitFrenzy, our innovative fitness software that transforms your workout experience from mundane to exhilarating. Tailored for those who love staying fit but also cherish the joy of living a healthy lifestyle, FitFrenzy is your go-to companion for a balanced and joyful journey.

🌟 FEATURES OF FITFrenzy - 🚀
1. **Personalized Workouts**: Tailor your workout routines based on your goals, fitness level, and preferences.
2. **Interactive Challeng

In [None]:
output.l

In [16]:
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

Generate a social media post in a descriptive format for a business in 1000 words just like this example
            Context - {Platform : Facebook
                      Theme : Product launch of a fitness software
                      Target Audience : Health-conscious people
                      Tone : Friendly
                      Language : English}
            <|assistant|>
# Introducing Our New Fitness Software, "FitFrenzy": Revolutionizing Your Health Journey!

🌟 Dive into a future where fitness feels like fun, not a chore! 🏊✨

Introducing FitFrenzy, our innovative fitness software that transforms your workout experience from mundane to exhilarating. Tailored for those who love staying fit but also cherish the joy of living a healthy lifestyle, FitFrenzy is your go-to companion for a balanced and joyful journey.

🌟 FEATURES OF FITFrenzy - 🚀
1. **Personalized Workouts**: Tailor your workout routines based on your goals, fitness level, and preferences.
2. **Interactive Challeng

# common program

In [2]:
torch.backends.mps.is_available()

True

In [3]:
class TextPreprocessor:
    """
    A class for preprocessing text data through cleaning, tokenization, and normalization
    
    Attributes:
    -----------
        lemmatizer : WordNetLemmatizer instance for word lemmatization
        
        stop_words : Set of stopwords to be removed from text
    """ 
    def __init__(self):
        """
        Initialize the TextPreprocessor with required NLTK resources
        
        Raises:
        -------
            LookupError : If required NLTK resources cannot be downloaded
        """
        try:
            # Download required NLTK data
            nltk.download('punkt', quiet=True)
            nltk.download('stopwords', quiet=True)
            nltk.download('wordnet', quiet=True)
            nltk.download('punkt_tab', quiet=True)
            
            self.lemmatizer = WordNetLemmatizer()
            self.stop_words = set(stopwords.words('english'))
            
        except LookupError as e:
            raise
    
    def clean_text(self, text:str) -> str:
        """
        Clean and normalize input text by removing HTML tags, special characters,
        and applying text normalization techniques
        
        Arguments:
        ----------
            text { str }      : Input text to be cleaned
            
        Raises:
        -------
            ValueError        : If input text is None or empty
            
            TextCleaningError : If any error occurs at any step of text cleaning process
            
        Returns:
        --------
                { str }       : Cleaned and normalized text
        """
        if ((not text) or (not isinstance(text, str))):
            raise ValueError("Input text must be a non-empty string")
            
        try:
            # Remove HTML tags
            text   = re.sub('<[^>]*>', '', text)
            
            # Remove special characters and digits
            text   = re.sub('[^a-zA-Z\s]', '', text)
            
            # Convert to lowercase
            text   = text.lower()
            
            # Tokenization
            tokens = word_tokenize(text)
            
            # Remove stopwords and lemmatize
            tokens = [self.lemmatizer.lemmatize(token) for token in tokens if token not in self.stop_words]
            
            return ' '.join(tokens)
        
        except Exception as TextCleaningError:
            raise

In [4]:
train_df = pd.read_csv("../Sentiment_Analysis/data/IMDB_Dataset.csv")
test_df = pd.read_csv("../Sentiment_Analysis/data/test_data.csv")

In [5]:
preprocessor = TextPreprocessor()

In [6]:
tqdm.pandas()
train_df['clean_text'] = train_df['review'].progress_apply(preprocessor.clean_text)
test_df['clean_text'] = test_df['Text'].progress_apply(preprocessor.clean_text)

100%|████████████████████████████████████| 50000/50000 [00:53<00:00, 942.13it/s]
100%|████████████████████████████████████████| 49/49 [00:00<00:00, 10492.72it/s]


In [7]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(device)

mps


In [8]:
train_df = train_df.rename(columns={'review':'review','sentiment':'label','clean_text':'text'})
test_df = test_df.rename(columns={'Text':'review','Sentiment':'label','clean_text':'text'})

In [9]:
label_map = {"positive": 1, "negative": 0}
train_df['label'] = train_df['label'].map(label_map)
test_df['label'] = test_df['label'].map(label_map)

In [10]:
train_dataset = Dataset.from_pandas(train_df[['text','label']])
unseen_dataset = Dataset.from_pandas(test_df[['text','label']])

# using QuantoConfig

In [12]:
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
# tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=120, return_tensors="pt")

In [13]:
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [15]:
model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
import os
os.path.exists('model/')

True

In [22]:
model.save_pretrained('llama2-7b/')

In [13]:
quantization_config = QuantoConfig(weights="int8")

model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2, quantization_config=quantization_config)
# model.config.pad_token_id = tokenizer.pad_token_id

model.to(device)

`low_cpu_mem_usage` was None, now default to True since model is quantized.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): QLinear(in_features=768, out_features=768, bias=True)
            (k_lin): QLinear(in_features=768, out_features=768, bias=True)
            (v_lin): QLinear(in_features=768, out_features=768, bias=True)
            (out_lin): QLinear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=Fal

In [14]:
config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=["q_lin", "k_lin", "v_lin", "out_lin", "lin1", "lin2"],  # Target GPT-2 attention modules
    bias="none",
    lora_dropout=0.05,
    task_type="SEQ_CLS",
)

model = get_peft_model(model, config)

'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [15]:
training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    warmup_steps=500,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    logging_dir="./logs",
    no_cuda=True
)

In [16]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=unseen_dataset,
)

trainer.train()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 