In [1]:
!pip install transformers datasets
!pip install -U bitsandbytes
!pip install mplcyberpunk

Collecting bitsandbytes
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.3
Collecting mplcyberpunk
  Downloading mplcyberpunk-0.7.1-py3-none-any.whl.metadata (546 bytes)
Downloading mplcyberpunk-0.7.1-py3-none-any.whl (6.4 kB)
Installing collected packages: mplcyberpunk
Successfully installed mplcyberpunk-0.7.1


In [2]:
from huggingface_hub import login

login(token='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')

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import torch
import time
import matplotlib.pyplot as mtp
import mplcyberpunk as mcy

dataset = load_dataset("cais/mmlu", "college_mathematics")

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", use_auth_token=True)
model = AutoModelForCausalLM.from_pretrained(
        "microsoft/Phi-3.5-mini-instruct",
         device_map = 'auto',
         load_in_4bit = True
        )

Downloading readme:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/138k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/16.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/5.00k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/5.16k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/100 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/11 [00:00<?, ? examples/s]

Generating dev split:   0%|          | 0/5 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/3.98k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/3.45k [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/195 [00:00<?, ?B/s]

In [5]:
def zero_shot_prompt(question, options):
    options_text = "\n".join([f"{i + 1}: {option}" for i, option in enumerate(options)])
    return f"Choose the answer to the given question from below options, numbered from 1 to 4.\n{question}\nOptions:\n{options_text}\nChoose the correct option number, 'x' , by displaying in this format: 'Answer : x' strictly!! No explanation required."

def chain_of_thought_prompt(question, options):
    options_text = "\n".join([f"{i + 1}: {option}" for i, option in enumerate(options)])
    return f"Choose the answer to the given question from below options. Think step by step.\n{question}\nOptions:\n{options_text}\nChoose the correct option number, 'x' , by displaying in this format: 'Answer : x' strictly!!"

In [6]:
def run_inference_phi(model, tokenizer, prompt, max_length=1000):
    inp = tokenizer(prompt, return_tensors="pt")
    inp = inp.to(model.device)
    with torch.no_grad():
        start = time.time()
        out = model.generate(**inp, max_length=max_length)
        end = time.time()
    gen_text = tokenizer.decode(out[0], skip_special_tokens=True)
    inf_time = end - start
    return gen_text, inf_time

In [7]:
import re

def extract_with_regex(gen_text):
    match = re.search(r"Answer\s*:\s*(\d+)", gen_text)

    if match:
        option_num = match.group(1)
        print(f"Extracted answer number: {option_num}")
        return int(option_num) - 1
    else:
        print(f"Failed to extract answer from: {gen_text}")
        return 0

***ZERO-SHOT***

In [9]:
actual_answers = []
predicted_answers = []

In [10]:
tot_time_zero_shot = 0

for idx, sample in enumerate(dataset['test']):
    question = sample['question']
    options = sample['choices']
    correct_answer = sample['answer']

    actual_answers.append(correct_answer)

    zero_shot_prompt_text = zero_shot_prompt(question, options)

    print(f"**** Running Zero-Shot Inference for Sample {idx + 1} ****")
    generated_zero_shot, time_taken_zero_shot = run_inference_phi(model, tokenizer, zero_shot_prompt_text)

    predicted_answer = extract_with_regex(generated_zero_shot)
    predicted_answers.append(predicted_answer)

    print(f"Example {idx + 1} - Zero-Shot Result:\nGenerated Text: {generated_zero_shot}\nPredicted Answer: {predicted_answer}\nTime Taken: {time_taken_zero_shot:.2f} seconds\n")
    tot_time_zero_shot += time_taken_zero_shot

**** Running Zero-Shot Inference for Sample 1 ****


You are not running the flash-attention implementation, expect numerical differences.


Extracted answer number: 2
Example 1 - Zero-Shot Result:
Generated Text: Choose the answer to the given question from below options, numbered from 1 to 4.
Let k be the number of real solutions of the equation e^x + x - 2 = 0 in the interval [0, 1], and let n be the number of real solutions that are not in [0, 1]. Which of the following is true?
Options:
1: k = 0 and n = 1
2: k = 1 and n = 0
3: k = n = 1
4: k > 1
Choose the correct option number, 'x' , by displaying in this format: 'Answer : x' strictly!! No explanation required.


## Response:
To solve this problem, we need to analyze the behavior of the function f(x) = e^x + x - 2 within the interval [0, 1].

First, let's find the critical points by taking the derivative of f(x) and setting it equal to zero:

f'(x) = e^x + 1

Since e^x is always positive, f'(x) = e^x + 1 will also always be positive. Therefore, there are no critical points within the interval [0, 1].

Now, let's evaluate the function at the endpoints of the interval:


In [11]:
accuracy = sum([1 for i in range(len(actual_answers)) if actual_answers[i] == predicted_answers[i]]) / len(actual_answers) * 100

print(f"**** Zero-Shot Inference Completed ****")
print(f"Total Time: {tot_time_zero_shot:.2f} sec")
print(f"Overall Accuracy: {accuracy:.2f}%")

**** Zero-Shot Inference Completed ****
Total Time: 8560.13 sec
Overall Accuracy: 29.00%


CHAIN OF THOUGHT (ZERO-SHOT PROMPTING)

In [12]:
actual_answers = []
predicted_answers = []

In [13]:
tot_time_chain_of_thought = 0

for idx, sample in enumerate(dataset['test']):
    question = sample['question']
    options = sample['choices']
    correct_answer = sample['answer']

    actual_answers.append(correct_answer)

    chain_of_thought_prompt_text = chain_of_thought_prompt(question, options)

    print(f"**** Running Chain-of-Thought (Zero Shot) Inference for Sample {idx + 1} ****")
    generated_chain_of_thought, time_taken_chain_of_thought = run_inference_phi(model, tokenizer, chain_of_thought_prompt_text)

    predicted_answer = extract_with_regex(generated_chain_of_thought)
    predicted_answers.append(predicted_answer)

    print(f"Example {idx + 1} - Chain-of-Thought Result:\nGenerated Text: {generated_chain_of_thought}\nPredicted Answer: {predicted_answer}\nTime Taken: {time_taken_chain_of_thought:.2f} seconds\n")
    tot_time_chain_of_thought += time_taken_chain_of_thought

**** Running Chain-of-Thought (Zero Shot) Inference for Sample 1 ****
Extracted answer number: 2
Example 1 - Chain-of-Thought Result:
Generated Text: Choose the answer to the given question from below options. Think step by step.
Let k be the number of real solutions of the equation e^x + x - 2 = 0 in the interval [0, 1], and let n be the number of real solutions that are not in [0, 1]. Which of the following is true?
Options:
1: k = 0 and n = 1
2: k = 1 and n = 0
3: k = n = 1
4: k > 1
Choose the correct option number, 'x' , by displaying in this format: 'Answer : x' strictly!!

To solve this problem, we need to analyze the behavior of the function f(x) = e^x + x - 2 on the interval [0, 1].

Step 1: Find the critical points of f(x) within the interval [0, 1].
To find the critical points, we need to take the derivative of f(x) and set it equal to zero:

f'(x) = d/dx (e^x + x - 2)

f'(x) = e^x + 1

Now, we set f'(x) = 0 and solve for x:

e^x + 1 = 0
e^x = -1

Since the exponential functi

In [14]:
accuracy = sum([1 for i in range(len(actual_answers)) if actual_answers[i] == predicted_answers[i]]) / len(actual_answers) * 100

print(f"**** Chain-of-Thought Inference Completed ****")
print(f"Total Time: {tot_time_chain_of_thought:.2f} sec")
print(f"Overall Accuracy: {accuracy:.2f}%")

**** Chain-of-Thought Inference Completed ****
Total Time: 6809.03 sec
Overall Accuracy: 35.00%
