Install Libraries

In [None]:
!pip install transformers bitsandbytes datasets


Collecting bitsandbytes
  Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl (60.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.1


Import Necessary Modules

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch


Define Model Loading Function

In [None]:
def load_model(model_id, hf_token, max_new_tokens=256):
    print(f"Loading {model_id} ...")
    quant_config = BitsAndBytesConfig(load_in_8bit=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        quantization_config=quant_config,
        use_auth_token=hf_token
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_id,
        use_auth_token=hf_token
    )
    return pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0 if torch.cuda.is_available() else -1,
        max_new_tokens=max_new_tokens,
        truncation=True
    )


Load Models

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch

hf_token = "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"  # Hugging Face token

def load_model(model_id, token):
    print(f"Loading {model_id} ...")
    # Memory-efficient 8-bit loading
    quant_config = BitsAndBytesConfig(load_in_8bit=True)

    # Load model with device_map="auto" (handles GPU automatically)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        quantization_config=quant_config,
        token=token
    )

    tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)

    # IMPORTANT: Do NOT pass device here, accelerate already mapped it
    return pipeline("text-generation", model=model, tokenizer=tokenizer)

# Load models one by one
deepseek_pipe = load_model("deepseek-ai/deepseek-coder-1.3b-base", hf_token)
phi2_pipe = load_model("microsoft/phi-2", hf_token)

# Load Gemma separately if GPU memory is low
gemma_pipe = load_model("google/gemma-2b", hf_token)



Loading deepseek-ai/deepseek-coder-1.3b-base ...


config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/793 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

Device set to use cpu


Loading microsoft/phi-2 ...


config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Device set to use cpu


Loading google/gemma-2b ...


config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Device set to use cpu


Store the pipelines in a dictionary for easy access:

In [None]:
pipes = {
    "DeepSeek": deepseek_pipe,
    "Phi-2": phi2_pipe,
    "Gemma": gemma_pipe
}


Create a function to generate code based on the selected model and prompt:

In [None]:
def generate_code(model_name, prompt_text):
    if model_name not in pipes:
        print(f" Model '{model_name}' not loaded.")
        return None
    pipe = pipes[model_name]
    output = pipe(prompt_text)[0]["generated_text"]
    return output


Use widgets to create a user interface for model selection and prompt input:

In [None]:
import ipywidgets as widgets
from IPython.display import display

model_dropdown = widgets.Dropdown(
    options=["DeepSeek", "Phi-2", "Gemma"],
    description="Model:",
    disabled=False
)

prompt_textarea = widgets.Textarea(
    value='Write a Python function to check if a number is prime.',
    description='Prompt:',
    disabled=False
)

output_area = widgets.Output()

def on_button_click(b):
    with output_area:
        print("Generating code...")
        code = generate_code(model_dropdown.value, prompt_textarea.value)
        if code:
            print("Generated Code:\n", code)

generate_button = widgets.Button(description="Generate Code")
generate_button.on_click(on_button_click)

display(model_dropdown, prompt_textarea, generate_button, output_area)


Dropdown(description='Model:', options=('DeepSeek', 'Phi-2', 'Gemma'), value='DeepSeek')

Textarea(value='Write a Python function to check if a number is prime.', description='Prompt:')

Button(description='Generate Code', style=ButtonStyle())

Output()

Combining two models (Deepseek and phi)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig

hf_token = "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"  # Your Hugging Face token

def load_model(model_id, token):
    print(f"\n Loading {model_id} ...")

    # 8-bit quantization config with CPU offload for modules that don't fit GPU
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=True  # <--- enable CPU offload
    )

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",  # auto assigns layers to GPU/CPU
        quantization_config=quant_config,
        token=token
    )

    tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)

    return pipeline("text-generation", model=model, tokenizer=tokenizer)

# Load models one at a time to avoid GPU OOM
deepseek_pipe = load_model("deepseek-ai/deepseek-coder-1.3b-base", hf_token)
phi2_pipe = load_model("microsoft/phi-2", hf_token)
# gemma_pipe = load_model("google/gemma-2b", hf_token)  # Load separately if memory is low



 Loading deepseek-ai/deepseek-coder-1.3b-base ...


Device set to use cpu



 Loading microsoft/phi-2 ...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


Output for the above combined models

In [None]:
# Define a function to generate code from a model pipeline
def generate_code(model_name, prompt_text):
    print(f"\n--- {model_name} ---")
    if model_name == "DeepSeek":
        output = deepseek_pipe(prompt_text, max_new_tokens=200)[0]['generated_text']
    elif model_name == "Phi-2":
        output = phi2_pipe(prompt_text, max_new_tokens=200)[0]['generated_text']
    # elif model_name == "Gemma":
    #     output = gemma_pipe(prompt_text, max_new_tokens=200)[0]['generated_text']
    print(output)
    return output

# Example prompt
prompt = "Write a Python function to check if a number is prime."

# Generate code using DeepSeek
deepseek_output = generate_code("DeepSeek", prompt)

# Generate code using Phi-2
phi2_output = generate_code("Phi-2", prompt)



--- DeepSeek ---


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Write a Python function to check if a number is prime.
  The function should take a single argument, n and return True if n is a prime 
      number and False otherwise.
"""
def is_prime(n):
    if n <= 1:
        return False
    for i in range(2, n):
        if n % i == 0:
            return False
    return True

def main():
    num = int(input("Enter a number:"))
    print(is_prime(num))

if __name__ == "__main__":
    main()


"""
Write a Python function to print the even numbers from a given list.
"""
def even_nums(lst):
    for i in lst:
        if i % 2 == 0:
            print(i)

def main():
    lst = [1, 2, 3, 4, 5,

--- Phi-2 ---
Write a Python function to check if a number is prime.

Hint: You might want to use a for loop to check divisibility from 2 to the square root of the number.

```python
def is_prime(n):
    if n <= 1:
        return False
    elif n <= 3:
        return True
    elif n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
    

In [None]:
#  UI + Visualization for code generation
from IPython.display import display, Markdown
import ipywidgets as widgets

# Define a function to generate code from a selected model
def generate_code_ui(model_name, prompt_text):
    if model_name == "DeepSeek":
        output = deepseek_pipe(prompt_text, max_new_tokens=300)[0]['generated_text']
    elif model_name == "Phi-2":
        output = phi2_pipe(prompt_text, max_new_tokens=300)[0]['generated_text']
    elif model_name == "Gemma":
        output = gemma_pipe(prompt_text, max_new_tokens=300)[0]['generated_text']
    else:
        output = "Model not loaded!"

    # Display nicely
    display(Markdown(f"### Model: {model_name}"))
    display(Markdown(f"```python\n{output}\n```"))
    return output

# 🎛 Widgets
model_dropdown = widgets.Dropdown(
    options=["DeepSeek", "Phi-2", "Gemma"],
    value="DeepSeek",
    description="Model:"
)

prompt_textbox = widgets.Textarea(
    value="Write a Python function to check if a number is prime.",
    description="Prompt:",
    layout=widgets.Layout(width="80%", height="100px")
)

generate_button = widgets.Button(
    description="Generate Code",
    button_style="success"
)

output_area = widgets.Output()

# Button click handler
def on_generate_clicked(b):
    with output_area:
        output_area.clear_output()
        generate_code_ui(model_dropdown.value, prompt_textbox.value)

generate_button.on_click(on_generate_clicked)

# Display UI
display(model_dropdown)
display(prompt_textbox)
display(generate_button)
display(output_area)


Dropdown(description='Model:', options=('DeepSeek', 'Phi-2', 'Gemma'), value='DeepSeek')

Textarea(value='Write a Python function to check if a number is prime.', description='Prompt:', layout=Layout(…

Button(button_style='success', description='Generate Code', style=ButtonStyle())

Output()

In [None]:
# Define function to display outputs like mentor did
def display_generated_code(model_name, pipe, prompt):
    print(f"Model:\n\n{model_name}")
    print(f"Prompt:\n{prompt}")
    print("Generated code:")
    output = pipe(prompt, max_new_tokens=200)[0]['generated_text']
    print(output)
    print("\n" + "="*50 + "\n")

# Example prompt
prompt = "Write a Python function to check if a number is prime."

# Display DeepSeek output
display_generated_code("DeepSeek", deepseek_pipe, prompt)

# Display Phi-2 output
display_generated_code("Phi-2", phi2_pipe, prompt)

# Display Gemma output (if loaded separately)
# display_generated_code("Gemma", gemma_pipe, prompt)
