In [1]:
!pip install transformers trl peft bitsandbytes accelerate datasets safetensors -U

Collecting trl
  Downloading trl-0.23.0-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading trl-0.23.0-py3-none-any.whl (564 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m564.7/564.7 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, trl
Successfully installed bitsandbytes-0.47.0 trl-0.23.0


In [2]:
from huggingface_hub import notebook_login
from google.colab import userdata
import torch
import json
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, LoraConfig, PeftConfig, set_peft_model_state_dict
from safetensors.torch import load_file

In [3]:
print("Unzipping model files...")
!unzip -o /content/tiny-conversational-assistant-final.zip -d /content/

# --- Step 4: Automatically Find the Correct Adapter Path ---
import os
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')
print("\nSearching for the adapter_config.json file...")
find_command_output = !find /content -name "adapter_config.json"
if not find_command_output:
    raise FileNotFoundError("ERROR: Could not find 'adapter_config.json' after unzipping.")
correct_config_path = find_command_output[0]
local_adapter_path = os.path.dirname(correct_config_path)
print(f"✅ Found adapter folder at: {local_adapter_path}")

Unzipping model files...
Archive:  /content/tiny-conversational-assistant-final.zip
   creating: /content/content/tiny-conversational-assistant-final/
  inflating: /content/content/tiny-conversational-assistant-final/training_args.bin  
  inflating: /content/content/tiny-conversational-assistant-final/README.md  
  inflating: /content/content/tiny-conversational-assistant-final/adapter_config.json  
  inflating: /content/content/tiny-conversational-assistant-final/adapter_model.safetensors  
  inflating: /content/content/tiny-conversational-assistant-final/special_tokens_map.json  
  inflating: /content/content/tiny-conversational-assistant-final/tokenizer.model  
  inflating: /content/content/tiny-conversational-assistant-final/tokenizer.json  
  inflating: /content/content/tiny-conversational-assistant-final/tokenizer_config.json  
  inflating: /content/content/tiny-conversational-assistant-final/chat_template.jinja  

Searching for the adapter_config.json file...
✅ Found adapter fol

In [4]:
base_model_hub_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
local_adapter_path = "/content/content/tiny-conversational-assistant-final"

print("Loading FINE-TUNED model...")
# Load the base model first
ft_base_model = AutoModelForCausalLM.from_pretrained(
    base_model_hub_id,
    dtype=torch.bfloat16,
    trust_remote_code=True,
)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_hub_id)
tokenizer.pad_token = tokenizer.eos_token
# Manually load and apply adapters
config_path = f"{local_adapter_path}/adapter_config.json"
with open(config_path, 'r') as f:
    adapter_config_data = json.load(f)
peft_config = LoraConfig(**adapter_config_data)
ft_model = PeftModel(ft_base_model, peft_config)
weights_path = f"{local_adapter_path}/adapter_model.safetensors"
adapter_weights = load_file(weights_path)
set_peft_model_state_dict(ft_model, adapter_weights)
ft_model = ft_model.merge_and_unload()
ft_model.to("cuda")
print("✅ Fine-tuned model loaded successfully!")


# --- Step 6: Load the ORIGINAL Base Model for Comparison ---
print("\nLoading ORIGINAL BASE model...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_hub_id,
    dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)
print("✅ Base model loaded successfully!")

Loading FINE-TUNED model...


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

✅ Fine-tuned model loaded successfully!

Loading ORIGINAL BASE model...
✅ Base model loaded successfully!


In [5]:
test_set = [
    {"id": 1, "task": "Ask professor for feedback on a draft", "incoming": "N/A", "request": "an email to Professor Smith asking if he would be available to provide brief feedback on my thesis draft next week."},
    {"id": 2, "task": "Politely decline a social invitation", "incoming": "Hey, we're all going out for drinks on Friday night, you should come!", "request": "a polite text declining the invitation because I have to study for an exam."},
    {"id": 3, "task": "Follow up on a job application", "incoming": "N/A", "request": "a professional follow-up email about the Data Science Intern position I applied for last week."},
    {"id": 4, "task": "Apologize for missing a group meeting", "incoming": "Hey, where were you for the group meeting today? We needed your input.", "request": "an apologetic but concise reply, explaining I had a conflicting appointment and asking when I can sync up."},
    {"id": 5, "task": "Request a letter of recommendation", "incoming": "N/A", "request": "a formal email to Professor Davis, whom I took Advanced Algorithms with, asking for a letter of recommendation for a Master's program."},
    {"id": 6, "task": "Confirm meeting details", "incoming": "Just confirming our project discussion tomorrow.", "request": "a reply asking to confirm the time and the meeting link."},
    {"id": 7, "task": "Handle a group project conflict", "incoming": "I've done my part of the project, but I don't think a final section is good. We should rewrite it.", "request": "a diplomatic response that acknowledges their work but suggests discussing the section as a group before making major changes."},
    {"id": 8, "task": "Ask a clarifying question about an assignment", "incoming": "N/A", "request": "an email to my TA asking for clarification on the required formatting for the final paper in CS101."},
    {"id": 9, "task": "Send a thank-you note after an interview", "incoming": "N/A", "request": "a brief thank-you email to Ms. Chen after an interview for the summer internship role."},
    {"id": 10, "task": "Respond to a scheduling conflict", "incoming": "Can you meet at 4 PM today?", "request": "a reply stating that I can't do 4 PM but I am free anytime before noon."},
]


# --- Step 8: Define the Agent and Run Evaluation ---
def run_inference(model_to_use, user_request, incoming_message):
    prompt = (
        f"### Human:\nI received the following message: \"{incoming_message}\". "
        f"Please help me draft a response that is {user_request}.\n\n"
        f"### Assistant:\n"
    )
    if incoming_message == "N/A": # Handle cases with no incoming message
        prompt = f"### Human:\nPlease help me draft a message that is {user_request}.\n\n### Assistant:\n"

    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to("cuda")
    outputs = model_to_use.generate(**inputs, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    cleaned_response = response.split("### Assistant:")[-1].strip()
    return cleaned_response.split("### Human:")[0].strip()

# Store results for the report
evaluation_results = []
for item in test_set:
    print(f"\n--- Running Test Case #{item['id']}: {item['task']} ---")

    # Generate with fine-tuned model
    ft_response = run_inference(ft_model, item['request'], item['incoming'])
    print("\n  >> Fine-Tuned Model Response:")
    print(ft_response)

    # Generate with base model
    base_response = run_inference(base_model, item['request'], item['incoming'])
    print("\n  >> Base Model Response:")
    print(base_response)

    evaluation_results.append({
        "ID": item['id'],
        "Task": item['task'],
        "Fine-Tuned Response": ft_response,
        "Base Model Response": base_response
    })


--- Running Test Case #1: Ask professor for feedback on a draft ---

  >> Fine-Tuned Model Response:
Dear Professor Smith,

I hope this email finds you well. I am writing to request your feedback on my thesis draft. I am currently working on my thesis, and I would greatly appreciate your input on the following points:

1. Introduction:
- Is the introduction clear and concise?
- Does it provide a strong overview of the thesis topic?
- Does it include any relevant background information?

2. Literature Review:
- Is the literature review thorough and well-organized?
- Does it provide a comprehensive overview of the thesis topic?
- Does it include any relevant recent research?

3. Thesis Proposal:
- Is the thesis proposal clear and concise?
- Does it provide a strong argument for the thesis topic?
- Does it include any relevant background information?

4. Thesis Structure:
- Is the thesis structure clear and logical?
- Does it include a clear thesis statement?
- Does it include a clear th

In [6]:
# --- Step 1: Import Libraries ---
from huggingface_hub import HfApi
from google.colab import userdata
import os

# --- Step 2: Authenticate with Your WRITE Token ---
# This reads the HF_TOKEN with write access that you just saved in Colab Secrets.
hf_token = userdata.get('HF_TOKEN')
if not hf_token:
    raise ValueError("HF_TOKEN not found in Colab Secrets. Please add your WRITE token.")

# --- Step 3: Define Your Paths ---
# This script automatically finds your unzipped folder to avoid path errors.
find_output = !find /content -name "adapter_config.json"
if not find_output:
    raise FileNotFoundError("Could not find the unzipped adapter folder. Please make sure your model files are unzipped.")
local_model_path = os.path.dirname(find_output[0])
print(f"Found local model folder at: {local_model_path}")

hf_repo_id = "Yash12930/tinellama_student_assistant"

# --- Step 4: Upload the Folder ---
print(f"\nUploading files to {hf_repo_id}...")
api = HfApi(token=hf_token)
api.upload_folder(
    folder_path=local_model_path,
    repo_id=hf_repo_id,
    repo_type="model",
)

print(f"\n✅ Model uploaded successfully to https://huggingface.co/{hf_repo_id}")

Found local model folder at: /content/content/tiny-conversational-assistant-final

Uploading files to Yash12930/tinellama_student_assistant...


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...nal-assistant-final/tokenizer.model: 100%|##########|  500kB /  500kB            

  ...ant-final/adapter_model.safetensors:   3%|3         |  566kB / 18.0MB            

  ...l-assistant-final/training_args.bin:   3%|3         |   181B / 5.78kB            


✅ Model uploaded successfully to https://huggingface.co/Yash12930/tinellama_student_assistant
