# **INSTALL AND IMPORT**

In [3]:
%%capture
!pip install torch
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
!pip install triton
!pip install rouge_score
!pip install gradio

In [4]:
from unsloth import FastLanguageModel
import torch
import pandas as pd
import string
from rouge_score import rouge_scorer
from transformers import TextStreamer
import gradio as gr

major_version, minor_version = torch.cuda.get_device_capability()

max_seq_length = 2048 # can go up to 8k
dtype = None
load_in_4bit = True # 4bit quantization

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "paramasivan27/llama-3-8b-bnb-4bit", # Llama-3
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.9: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/198 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Unsloth 2024.9 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


# **PROMPT DEFINITION & INITIAL VALIDATION**

In [5]:
# this is basically the system prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # do not forget this part!

In [6]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "What are some limitations of bag-of-words models in representing text?", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 32)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
What are some limitations of bag-of-words models in representing text?

### Input:


### Response:
Bag-of-words models have several limitations including they do not capture the order of words in the sequence they do not consider the semantic relationship between words and they have


In [25]:
inputs = tokenizer(
[
    alpaca_prompt.format(
        "What is a Vanishing gradient problem ?", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1024)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
What is a Vanishing gradient problem?

### Input:


### Response:
The vanishing gradient problem is a challenge in training deep neural networks. As the network gets deeper, the gradients used to update the weights become smaller, which can slow down or prevent learning in earlier layers.<|end_of_text|>


In [26]:
inputs = tokenizer(
[
    alpaca_prompt.format(
        "What are some limitations of bag-of-words models in representing text?", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1024)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
What are some limitations of bag-of-words models in representing text?

### Input:


### Response:
Bag-of-words models have several limitations including they may not capture the semantic relationship between words they are sensitive to noise such as typos and they may have high dimensionality.<|end_of_text|>


# **LOAD TEST DATASET**

In [8]:
df = pd.read_csv('./aiml-qa-test_csv.csv', header=0)

# Display the first few rows of the DataFrame
print(df.head())

df_2 = pd.read_csv('./test.csv', header=0)

# Display the first few rows of the DataFrame
print(df_2.head())

                                            question  \
0    How we can effectively convert 2D images to 1D?   
1  Can we utilize an autoencoder to perform dimen...   
2  What is NLP's current biggest challenge that i...   
3  Which problems cannot be solved by Neural netw...   
4                      Is scaling necessary for SVM?   

                                             answer1  \
0  Converting images to 1D data may not be effect...   
1  Yes, autoencoders can be applied to numerical ...   
2  The main challenges of NLP is finding and coll...   
3  While neural networks have shown great success...   
4  Yes, scaling the input data is generally recom...   

                                             answer2  
0  To effectively convert 2D images to 1D, use te...  
1  Yes, autoencoders can be used for dimensionali...  
2  NLP models struggle with tasks that require re...  
3  Neural networks are powerful, but they may str...  
4  Scaling the input data is advisable when utili..

# **RESPONSE FUNCTION DEFINITION & GENERATION**

In [9]:
df['generated_answer']=None
df_2['generated_answer']=None

In [10]:
def generate_response(row, model):
    # Pre-process the question or instruction (using the text_streamer as needed)
    inputs = tokenizer(
        [alpaca_prompt.format(str(i[1]['question']), # instruction
                              "", # input
                              "", # output - leave this blank for generation!
                              )
        ], return_tensors = "pt").to("cuda")
    # Generate the output
    output = model.generate(**inputs, streamer=None, max_new_tokens=1024)
    # Decode the output into text
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response.split('### Response:')

In [11]:

for i in df_2.iterrows():
  a = generate_response(i[1]['question'], model)
  gen_ans = a[1].replace("\n", "")
  df_2.at[i[0], 'generated_answer'] = gen_ans

for i in df.iterrows():
  a = generate_response(i[1]['question'], model)
  gen_ans = a[1].replace("\n", "")
  df.at[i[0], 'generated_answer'] = gen_ans

In [12]:
df_2

Unnamed: 0,question,answer,generated_answer
0,What is feature selection?,The process of selecting a subset of relevant ...,The process of selecting a subset of relevant ...
1,"What are Markov decision processes (MDP), and ...",MDPs are mathematical frameworks for modeling ...,MDPs are mathematical models that describe env...
2,What is batch normalization and how does it work?,Normalizes the layer's input over a mini-batch,Batch normalization normalizes the inputs of e...
3,How does Word2Vec create word embeddings?,Word2Vec creates word embeddings by training o...,Word2Vec creates word embeddings by representi...
4,What is the role of hidden layers in MLP?,Hidden layers in MLP help capture complex patt...,Hidden layers transform inputs into higher-lev...
...,...,...,...
244,What is the Zipf Distribution in NLP?,The Zipf Distribution describes how a few elem...,The Zipf distribution is a power law distribut...
245,What is transfer learning in deep learning?,Transfer learning adapts a pre-trained model t...,Transfer learning involves taking a pre-traine...
246,Explain the use of tokenization in NLP.,Tokenization is the process of breaking text i...,Tokenization is the process of breaking text i...
247,What is the Perceptron Learning Rule?,The Perceptron Learning Rule is an algorithm u...,The Perceptron Learning Rule is a method for u...


# **ROUGE SCORE CALCULATION**

In [14]:

# Initialize ROUGE scorer for specific metrics
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
all_rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}

all_answers = df_2['answer'].astype(str).tolist()
all_generated_answers = df_2['generated_answer'].astype(str).tolist()

for i in range(len(all_answers)):
  scores = scorer.score(all_answers[i], all_generated_answers[i])
  for key in scores:
    all_rouge_scores[key].append(scores[key].fmeasure)

avg_rouge_scores = {key: sum(scores)/len(scores) for key, scores in all_rouge_scores.items()}
print("Average ROUGE Scores:", avg_rouge_scores)

Average ROUGE Scores: {'rouge1': 0.48342827227660445, 'rouge2': 0.2631626190945965, 'rougeL': 0.41491957225471765}


In [15]:
df

Unnamed: 0,question,answer1,answer2,generated_answer
0,How we can effectively convert 2D images to 1D?,Converting images to 1D data may not be effect...,"To effectively convert 2D images to 1D, use te...","To effectively convert 2D images to 1D, you ca..."
1,Can we utilize an autoencoder to perform dimen...,"Yes, autoencoders can be applied to numerical ...","Yes, autoencoders can be used for dimensionali...","Yes, autoencoders can be used for dimensionali..."
2,What is NLP's current biggest challenge that i...,The main challenges of NLP is finding and coll...,NLP models struggle with tasks that require re...,"NLP's current biggest challenge is the ""cold s..."
3,Which problems cannot be solved by Neural netw...,While neural networks have shown great success...,"Neural networks are powerful, but they may str...","Yes, Neural Networks can solve almost all the ..."
4,Is scaling necessary for SVM?,"Yes, scaling the input data is generally recom...",Scaling the input data is advisable when utili...,"Yes, scaling is often beneficial for SVM as it..."
...,...,...,...,...
115,Can you repeat difference between data mining ...,Data mining refers to the process of discoveri...,Machine learning is a subset of data mining th...,Data mining involves the extraction of valuabl...
116,Is there any software available for clinical l...,"CLAMP (Clinical Language Annotation, Modeling,...",CLAMP is a comprehensive clinical Natural Lang...,There are some commercial and open-source soft...
117,When do we slice?,Slicing is a useful technique in Python for ex...,Slicing can be useful for working with large d...,Slicing is used when we need to access or extr...
118,"In terms of obtaining better context, is lemma...","Yes, lemmatization is generally considered bet...","Yes,Unlike stemming, which simply trims words ...",Lemmatization is considered superior to stemmi...


In [16]:
# Initialize ROUGE scorer for specific metrics
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
all_rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}


all_generated_answers = df['generated_answer'].astype(str).tolist()

temp = df[['answer1', 'answer2']]
all_answers = temp.values.tolist()

for i in range(len(all_answers)):
  for answer in all_answers[i]:
    if answer is not None:
      scores = scorer.score(answer, all_generated_answers[i])
      for key in scores:
        all_rouge_scores[key].append(scores[key].fmeasure)

avg_rouge_scores = {key: sum(scores)/len(scores) for key, scores in all_rouge_scores.items()}
print("Average ROUGE Scores:", avg_rouge_scores)

Average ROUGE Scores: {'rouge1': 0.4032413346062863, 'rouge2': 0.16856584350609868, 'rougeL': 0.31638937843778997}


In [None]:
df.head()

Unnamed: 0,question,answer1,answer2,generated_answer
0,How we can effectively convert 2D images to 1D?,Converting images to 1D data may not be effect...,"To effectively convert 2D images to 1D, use te...","To effectively convert 2D images to 1D, you ca..."
1,Can we utilize an autoencoder to perform dimen...,"Yes, autoencoders can be applied to numerical ...","Yes, autoencoders can be used for dimensionali...","Yes, autoencoders can be used for dimensionali..."
2,What is NLP's current biggest challenge that i...,The main challenges of NLP is finding and coll...,NLP models struggle with tasks that require re...,"NLP's current biggest challenge is the ""cold s..."
3,Which problems cannot be solved by Neural netw...,While neural networks have shown great success...,"Neural networks are powerful, but they may str...",Neural networks cannot solve problems that req...
4,Is scaling necessary for SVM?,"Yes, scaling the input data is generally recom...",Scaling the input data is advisable when utili...,"Yes, scaling is recommended for SVM to ensure ..."


# **GRADIO APP**

In [16]:
def generate_gradio_response(question, model,mn_tokens):
    # Pre-process the question or instruction (using the text_streamer as needed)
    inputs = tokenizer(
        [alpaca_prompt.format(str(question), # instruction
                              "", # input
                              "", # output - leave this blank for generation!
                              )
        ], return_tensors = "pt").to("cuda")
    # Generate the output
    output = model.generate(**inputs, streamer=None, max_new_tokens=mn_tokens)
    # Decode the output into text
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response.split('### Response:')

In [17]:

# Gradio interface function
def chatbot_interface(user_input, other_input, max_new_tokens):
  print(other_input)
  response = generate_gradio_response(user_input, model, max_new_tokens)
  answer = response[1].replace("\n", "")
  return answer

# Create Gradio chat interface
#gr.Interface(fn=chatbot_interface,
#             inputs="text",
#             outputs="text",
#             title="Llama 3 Chatbot",
#             description="Chat with the fine-tuned Llama 3 model using Alpaca prompt"
#            ).launch()

demo = gr.ChatInterface(
    chatbot_interface,
    title="Cohort 22 - Group 16: AIML Q and A Llama 3",
        additional_inputs=[
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
    ],
).launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://cf91a5814921f57cfc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
