In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch

In [None]:
import json

users_ds = json.load(open("fitness-users.json"))
users_df = pd.DataFrame(users_ds)
users_df.head()

# Load into DataFrames
nutrition_ds = json.load(open("average_fiteness_nutrition.json"))
nutrition_df = pd.DataFrame(nutrition_ds)

nutrition_df = nutrition_df.drop(columns=["date"])

nutrition_df.head()

Unnamed: 0,user_id,calories_consumed,protein_g,carbs_g,fat_g,fiber_g,sugar_g,sodium_mg
0,user_00001,2155,137,244,83,33,57,2054
1,user_00002,2207,127,245,88,30,64,2083
2,user_00003,1967,122,263,84,35,56,2097
3,user_00004,1945,115,232,88,31,57,1762
4,user_00005,2123,120,256,87,31,60,1991


In [None]:
import json
import random

# Split into train/eval (80/20)
split_idx = int(0.8 * len(users_ds))
train_users = users_ds[:split_idx]
eval_users = users_ds[split_idx:]

def generate_meals(goal):
    if goal == "endurance":
        return [
          "Breakfast: Oatmeal + whey protein + berries",
          "Snack: Greek yogurt + banana",
          "Lunch: Grilled chicken + quinoa + salad",
          "Snack: Trail mix + apple",
          "Dinner: Salmon + sweet potato + broccoli"
        ]
    elif goal == "strength":
        return [
          "Breakfast: Eggs + avocado toast",
          "Snack: Protein bar",
          "Lunch: Beef steak + brown rice + vegetables",
          "Snack: Cottage cheese + almonds",
          "Dinner: Chicken thigh + lentils + spinach"
        ]
    else:  # default balanced
        return [
            "Breakfast: Whole grain toast + peanut butter + fruit",
            "Snack: Boiled egg + apple",
            "Lunch: Tuna wrap + salad",
            "Snack: Yogurt + oats",
            "Dinner: Grilled turkey + quinoa + beans"
        ]

def generate_plan(user):
    plan = {}
    for day in range(1, 8):
        base_cal = 1800 + random.randint(-150, 150)
        protein = round(int(user.get("weight")) * 1.6 + random.randint(-10, 10))
        carbs = round(base_cal * 0.45 / 4)
        fats = round(base_cal * 0.25 / 9)

        plan[f"day{day}"] = {
            "calories": base_cal,
            "protein_g": protein,
            "carbs_g": carbs,
            "fat_g": fats,
            "meals": generate_meals(user.get("goals", "balanced")),
            "workout": random.choice([
                "45 min steady run", "Interval training 8x2min", "Strength training 40 min",
                "Yoga + mobility 30 min", "Long cycle 60 min"
            ]),
            "sleep_hours": random.choice([7, 7.5, 8, 8.5]),
            "hydration_liters": round(random.uniform(2.0, 3.0), 1),
            "notes": random.choice([
                "Keep steady pace", "Focus on recovery", "Include stretching",
                "Increase intensity slightly", "Maintain hydration"
            ])
        }
    return plan

def write_jsonl(users, filename):
    with open(filename, "w") as f:
        for user in users:
          input_prompt = f"user: {json.dumps({k:v for k,v in user.items() if k != 'user_id'})} Produce a 7-day plan JSON with day1..day7 including: calories, protein_g, carbs_g, fat_g, meals, workout, sleep_hours, hydration_liters, notes."
          target_plan = json.dumps(generate_plan(user))
          line = {"input": input_prompt, "target": target_plan}
          f.write(json.dumps(line) + "\n")

write_jsonl(train_users, "train.jsonl")
write_jsonl(eval_users, "eval.jsonl")

print("✅ train.jsonl and eval.jsonl generated successfully.")

✅ train.jsonl and eval.jsonl generated successfully.


In [None]:
from datasets import load_dataset

train = load_dataset("json", data_files="train.jsonl", split="train")
eval = load_dataset("json", data_files="eval.jsonl", split="train")

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
from transformers import T5TokenizerFast, T5ForConditionalGeneration
from peft import LoraConfig, get_peft_model

# Load T5 tokenizer & model
model_name = "t5-small"  # or t5-base/t5-large
tokenizer = T5TokenizerFast.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# ✅ Apply LoRA
lora_config = LoraConfig(
    r=16,              # Rank (higher = more capacity, but slower)
    lora_alpha=32,     # Scaling factor
    target_modules=["q", "v"],  # Apply to attention query & value projections
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Should show only a small % of params trainable

max_input = 256
max_target = 512

def preprocess(example):
    # Encode input
    model_inputs = tokenizer(
        example["input"],
        max_length=max_input,
        truncation=True,
        padding="max_length"
    )

    # Encode target
    labels = tokenizer(
        text_target=example["target"],
        max_length=max_target,
        truncation=True,
        padding="max_length"
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

train_ds = train.map(preprocess, batched=False)
eval_ds = eval.map(preprocess, batched=False)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

trainable params: 589,824 || all params: 61,096,448 || trainable%: 0.9654


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
  output_dir="./t5_fitness",
  per_device_train_batch_size=8,   # larger batch if GPU allows
  per_device_eval_batch_size=8,
  gradient_accumulation_steps=2,   # simulates batch=16
  num_train_epochs=5,              # a bit longer
  learning_rate=5e-4,              # slightly higher (faster convergence)
  lr_scheduler_type="linear",
  warmup_ratio=0.05,               # gradual LR warmup
  save_strategy="epoch",
  eval_strategy="epoch",
  logging_steps=50,
  fp16=True,                       # mixed precision for speed
  report_to="none"                 # disable wandb unless needed
)

trainer = Trainer(
  model=model,
  args=training_args,
  train_dataset=train_ds,
  eval_dataset=eval_ds,
  tokenizer=tokenizer
)

trainer.train()
trainer.save_pretrained("./t5_fitness")
tokenizer.save_pretrained("./t5_fitness")



  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.2485,0.184945
2,0.1899,0.161391
3,0.1772,0.153839
4,0.1694,0.148066
5,0.1674,0.146002


('./t5_fitness/tokenizer_config.json',
 './t5_fitness/special_tokens_map.json',
 './t5_fitness/spiece.model',
 './t5_fitness/added_tokens.json',
 './t5_fitness/tokenizer.json')

In [None]:
import torch
from transformers import T5ForConditionalGeneration, AutoTokenizer
from peft import PeftModel

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load base + LoRA weights
base_model = T5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = AutoTokenizer.from_pretrained("./t5_fitness/checkpoint-2500")
model = PeftModel.from_pretrained(base_model, "./t5_fitness/checkpoint-2500").to(device)

In [None]:
inp = """
user = {
  "age": 24,
  "weight": 81.9,
  "height": 155.9,
  "bmi": 33.7,
  "fitness_level": "intermediate",
  "goals": "endurance",
  "join_date": "2024-10-19"
}
Produce a plan for 3 days that contains:
calories, protein_g, carbs_g, fat_g, meals, workout, sleep_hours, hydration_liters, notes.Do not stop before day 7.
"""

inputs = tokenizer(inp, return_tensors="pt").to(model.device)
out = model.generate(
    **inputs,
    max_new_tokens=4000,
    do_sample=True,   # deterministic
    temperature=0.1,
    top_p=0.1
)

plan_text = tokenizer.decode(out[0], skip_special_tokens=True)
plan_text


', fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit orphan, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fruit, fru

In [None]:
import shutil
from google.colab import files

output_dir = "t5_fitness"
shutil.make_archive("t5_fitness", 'zip', output_dir)
files.download("t5_fitness.zip")