# **INSTALL AND IMPORT**

In [1]:
%%capture
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
!pip install rouge_score
!pip install gradio

In [2]:
import os
import torch
import pandas as pd
import gradio as gr
from datasets import load_dataset, Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')
import string
from rouge_score import rouge_scorer

In [3]:
model_name = "paramasivan27/Llama-2-7b-for_q_and_a" # Model

In [4]:

device_map = {"": 0}

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4", #(fp4 or nf4)
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

config.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/195 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/434 [00:00<?, ?B/s]

# **PROMPT DEFINITION & INITIAL VALIDATION**

In [5]:

# Run text generation pipeline with our next model
prompt = "What is the curse of dimensionality?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=72)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'].split("[/INST]")[1])

 The phenomenon where the complexity of a model increases exponentially with the number of features, making learning difficult. It can lead to overfitting and reduce the performance of the model. Techniques like feature selection and dimensionality reduction can help mitigate this curse


# **LOAD TEST DATASET**

In [7]:

df = pd.read_csv('./aiml-qa-test_csv.csv', header=0)

# Display the first few rows of the DataFrame
print(df.head())

df_2 = pd.read_csv('./test.csv', header=0)

# Display the first few rows of the DataFrame
print(df_2.head())

                                            question  \
0    How we can effectively convert 2D images to 1D?   
1  Can we utilize an autoencoder to perform dimen...   
2  What is NLP's current biggest challenge that i...   
3  Which problems cannot be solved by Neural netw...   
4                      Is scaling necessary for SVM?   

                                             answer1  \
0  Converting images to 1D data may not be effect...   
1  Yes, autoencoders can be applied to numerical ...   
2  The main challenges of NLP is finding and coll...   
3  While neural networks have shown great success...   
4  Yes, scaling the input data is generally recom...   

                                             answer2  
0  To effectively convert 2D images to 1D, use te...  
1  Yes, autoencoders can be used for dimensionali...  
2  NLP models struggle with tasks that require re...  
3  Neural networks are powerful, but they may str...  
4  Scaling the input data is advisable when utili..

# **RESPONSE FUNCTION DEFINITION & GENERATION**

In [7]:
df['generated_answer']=None
df_2['generated_answer']=None

In [8]:
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=72)

def generate_response(row):
    # Pre-process the question or instruction (using the text_streamer as needed)
    prompt = row
    result = pipe(f"<s>[INST] {prompt} [/INST]")

    response = result[0]['generated_text'].split("[/INST]")[1]
    return response

In [9]:

for i in df_2.iterrows():
  a = generate_response(i[1]['question'])
  gen_ans = a
  df_2.at[i[0], 'generated_answer'] = gen_ans

In [10]:
for i in df.iterrows():
  a = generate_response(i[1]['question'])
  gen_ans = a
  df.at[i[0], 'generated_answer'] = gen_ans

In [11]:
df

Unnamed: 0,question,answer1,answer2,generated_answer
0,How we can effectively convert 2D images to 1D?,Converting images to 1D data may not be effect...,"To effectively convert 2D images to 1D, use te...","To convert 2D images to 1D, we can use convol..."
1,Can we utilize an autoencoder to perform dimen...,"Yes, autoencoders can be applied to numerical ...","Yes, autoencoders can be used for dimensionali...","Yes, Autoencoder can be applied numerical dat..."
2,What is NLP's current biggest challenge that i...,The main challenges of NLP is finding and coll...,NLP models struggle with tasks that require re...,NLPs current biggest challenge is being tried...
3,Which problems cannot be solved by Neural netw...,While neural networks have shown great success...,"Neural networks are powerful, but they may str...",Neural networks cannot solve problems that re...
4,Is scaling necessary for SVM?,"Yes, scaling the input data is generally recom...",Scaling the input data is advisable when utili...,"Yes, scaling is recommended for SVM to ensure..."
...,...,...,...,...
115,Can you repeat difference between data mining ...,Data mining refers to the process of discoveri...,Machine learning is a subset of data mining th...,Data mining involves the extraction of useful...
116,Is there any software available for clinical l...,"CLAMP (Clinical Language Annotation, Modeling,...",CLAMP is a comprehensive clinical Natural Lang...,"Yes, there are several software options avail..."
117,When do we slice?,Slicing is a useful technique in Python for ex...,Slicing can be useful for working with large d...,Slicing is used when there is a specific stru...
118,"In terms of obtaining better context, is lemma...","Yes, lemmatization is generally considered bet...","Yes,Unlike stemming, which simply trims words ...","Yes, lemmatization is considered better than ..."


# **ROUGE SCORE CALCULATION**

In [12]:
# Initialize ROUGE scorer for specific metrics
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
all_rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}

all_answers = df_2['answer'].astype(str).tolist()
all_generated_answers = df_2['generated_answer'].astype(str).tolist()

for i in range(len(all_answers)):
  scores = scorer.score(all_answers[i], all_generated_answers[i])
  for key in scores:
    all_rouge_scores[key].append(scores[key].fmeasure)

avg_rouge_scores = {key: sum(scores)/len(scores) for key, scores in all_rouge_scores.items()}
print("Average ROUGE Scores:", avg_rouge_scores)

Average ROUGE Scores: {'rouge1': 0.3912341324778206, 'rouge2': 0.18069427233839613, 'rougeL': 0.3157717606974474}


In [13]:
# Initialize ROUGE scorer for specific metrics
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
all_rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}


all_generated_answers = df['generated_answer'].astype(str).tolist()

temp = df[['answer1', 'answer2']]
all_answers = temp.values.tolist()

for i in range(len(all_answers)):
  for answer in all_answers[i]:
    if answer is not None:
      scores = scorer.score(answer, all_generated_answers[i])
      for key in scores:
        all_rouge_scores[key].append(scores[key].fmeasure)

avg_rouge_scores = {key: sum(scores)/len(scores) for key, scores in all_rouge_scores.items()}
print("Average ROUGE Scores:", avg_rouge_scores)

Average ROUGE Scores: {'rouge1': 0.3727366428930053, 'rouge2': 0.14961483164598563, 'rougeL': 0.2863850574696099}


# **GRADIO APP**

In [None]:
#df_2.to_csv('Llama2_df_2_with_gen_answers.csv', index=False)
#df.to_csv('Llama2_df_with_gen_answers.csv', index=False)

In [8]:

def generate_gradio_response(question, other_input, max_new_token):
  pipe_gradio = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=max_new_token)
  # Pre-process the question or instruction (using the text_streamer as needed)
  prompt = question
  result = pipe(f"<s>[INST] {prompt} [/INST]")
  response = result[0]['generated_text'].split("[/INST]")[1]
  return response

In [10]:

# Gradio interface function
def chatbot_interface(user_input, other_input, max_new_tokens):
  print(other_input)
  response = generate_gradio_response(user_input, model, max_new_tokens)
  answer = response
  return answer

demo = gr.ChatInterface(
    chatbot_interface,
    title="Cohort 22 - Group 16: AIML Q and A Llama 2",
        additional_inputs=[
        gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"),
    ],
).launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://0d4dab5df43e462395.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
