In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from datetime import datetime
import pandas as pd
import time
import re
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# === Prompts ===
SYSTEM_PROMPT = (
    "You are an air pollution assistant. "
    "Strictly respond to queries with a single real number only. "
    "Do not include any units, explanation, or punctuation. Just a single number."
)

USER_TEMPLATE = (
    "What is the average PM2.5 concentration (in μg/m³) in India during {month}, {year}? Give a single number only."
)

hf_token = "Your Token"
device = "cuda"
dtype = torch.bfloat16

### Gemma 9b

In [None]:
# === Settings ===
model_id = "google/gemma-2-9b-it"

# === Load Model ===
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map={"": 0},
    torch_dtype=dtype,
    token=hf_token,
)
model.eval()

# === Query Function ===
def query_llm(month, year):
    user_prompt = USER_TEMPLATE.format(month=month, year=year)
    full_prompt = f"{SYSTEM_PROMPT}\n\n{user_prompt}"

    messages = [{"role": "user", "content": full_prompt}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
        )

    decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    
    match = re.search(r"\d+(\.\d+)?", decoded)
    return float(match.group()) if match else float("nan")

# === Save Results ===
save_path = "gemma2_9b_it_nation_level_2023.csv"
rows = []
if os.path.exists(save_path):
    existing_df = pd.read_csv(save_path)
    print(f"Resuming from saved file with {len(existing_df)} rows.")
    processed_months = set(existing_df["month"])
    rows = existing_df.to_dict("records")
else:
    print("No previous file found. Starting fresh.")
    processed_months = set()

months = list(range(1, 13))
year = 2023

for month_num in months:
    month_name = datetime(1900, month_num, 1).strftime("%B")

    if month_name in processed_months:
        continue

    try:
        prediction = query_llm(month_name, year)
    except RuntimeError as e:
        if "CUDA out of memory" in str(e):
            print("CUDA OOM — stopping execution.")
            raise
        else:
            prediction = float("nan")

    row = {
        "year": year,
        "month": month_name,
        "model": model_id,
        "pm2.5": prediction,
    }
    rows.append(row)

    # Save after every month
    try:
        pd.DataFrame(rows).to_csv(save_path, index=False)
        print(f"[{datetime.now()}] Saved prediction for {month_name}")
    except Exception as e:
        print(f"[{datetime.now()}] Save error after {month_name}: {e}")

print(f"[{datetime.now()}] Final save complete. Total months: {len(rows)}")

### Gemma 27b

In [4]:
# === Settings ===
model_id = "google/gemma-2-27b-it"

# === Load Model ===
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map={"": 0},
    torch_dtype=dtype,
    token=hf_token,
)
model.eval()

# === Query Function ===
def query_llm(month, year):
    user_prompt = USER_TEMPLATE.format(month=month, year=year)
    full_prompt = f"{SYSTEM_PROMPT}\n\n{user_prompt}"

    messages = [{"role": "user", "content": full_prompt}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
        )

    decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    
    match = re.search(r"\d+(\.\d+)?", decoded)
    return float(match.group()) if match else float("nan")

# === Save Results ===
save_path = "gemma2_27b_it_nation_level_2023.csv"
rows = []
if os.path.exists(save_path):
    existing_df = pd.read_csv(save_path)
    print(f"Resuming from saved file with {len(existing_df)} rows.")
    processed_months = set(existing_df["month"])
    rows = existing_df.to_dict("records")
else:
    print("No previous file found. Starting fresh.")
    processed_months = set()

months = list(range(1, 13))
year = 2023

for month_num in months:
    month_name = datetime(1900, month_num, 1).strftime("%B")

    if month_name in processed_months:
        continue

    try:
        prediction = query_llm(month_name, year)
    except RuntimeError as e:
        if "CUDA out of memory" in str(e):
            print("CUDA OOM — stopping execution.")
            raise
        else:
            prediction = float("nan")

    row = {
        "year": year,
        "month": month_name,
        "model": model_id,
        "pm2.5": prediction,
    }
    rows.append(row)

    # Save after every month
    try:
        pd.DataFrame(rows).to_csv(save_path, index=False)
        print(f"[{datetime.now()}] Saved prediction for {month_name}")
    except Exception as e:
        print(f"[{datetime.now()}] Save error after {month_name}: {e}")

print(f"[{datetime.now()}] Final save complete. Total months: {len(rows)}")

Loading checkpoint shards: 100%|██████████| 12/12 [00:11<00:00,  1.04it/s]


No previous file found. Starting fresh.


skipping cudagraphs due to skipping cudagraphs due to cpu device (arg102_1)


[2025-07-17 00:00:42.668609] Saved prediction for January
[2025-07-17 00:00:42.930735] Saved prediction for February
[2025-07-17 00:00:43.190959] Saved prediction for March
[2025-07-17 00:00:43.452111] Saved prediction for April
[2025-07-17 00:00:43.712038] Saved prediction for May
[2025-07-17 00:00:43.973564] Saved prediction for June
[2025-07-17 00:00:44.233492] Saved prediction for July
[2025-07-17 00:00:44.494085] Saved prediction for August
[2025-07-17 00:00:44.755003] Saved prediction for September
[2025-07-17 00:00:45.014928] Saved prediction for October
[2025-07-17 00:00:45.320765] Saved prediction for November
[2025-07-17 00:00:45.627398] Saved prediction for December
[2025-07-17 00:00:45.627643] Final save complete. Total months: 12


### Llama 8b

In [5]:
# === Settings ===
model_id = "meta-llama/Llama-3.1-8B-Instruct"
save_path = "llama3_8b_it_nation_level_2023.csv"

# === Load Model ===
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
tokenizer.pad_token = tokenizer.eos_token  # Important for padding
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token=hf_token,
    device_map={"": 0},
    torch_dtype=dtype,
)
model.eval()

# Optional: compile model if supported
if hasattr(torch, "compile"):
    model = torch.compile(model)

# === Inference Function ===
def query_llm(month, year):
    user_prompt = USER_TEMPLATE.format(month=month, year=year)
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_prompt}
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=False,
            temperature=0,
            pad_token_id=tokenizer.pad_token_id,
        )

    decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    match = re.search(r"\d+(\.\d+)?", decoded)
    return float(match.group()) if match else float("nan")

# === Resume or Initialize ===
rows = []
if os.path.exists(save_path):
    existing_df = pd.read_csv(save_path)
    print(f"Resuming from saved file with {len(existing_df)} rows.")
    processed_months = set(existing_df["month"])
    rows = existing_df.to_dict("records")
else:
    print("No previous file found. Starting fresh.")
    processed_months = set()

months = list(range(1, 13))
year = 2023

for month_num in months:
    month_name = datetime(1900, month_num, 1).strftime("%B")

    if month_name in processed_months:
        continue

    try:
        prediction = query_llm(month_name, year)
    except RuntimeError as e:
        if "CUDA out of memory" in str(e):
            print("CUDA OOM — stopping execution.")
            raise
        else:
            prediction = float("nan")

    row = {
        "year": year,
        "month": month_name,
        "model": model_id,
        "pm2.5": prediction,
    }
    rows.append(row)

    # Save after every month
    try:
        pd.DataFrame(rows).to_csv(save_path, index=False)
        print(f"[{datetime.now()}] Saved prediction for {month_name}")
    except Exception as e:
        print(f"[{datetime.now()}] Save error after {month_name}: {e}")

print(f"[{datetime.now()}] Final save complete. Total months: {len(rows)}")

Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.60s/it]
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


No previous file found. Starting fresh.
[2025-07-17 00:06:07.539736] Saved prediction for January
[2025-07-17 00:06:07.601367] Saved prediction for February


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-17 00:06:07.663103] Saved prediction for March
[2025-07-17 00:06:07.724092] Saved prediction for April
[2025-07-17 00:06:07.785185] Saved prediction for May
[2025-07-17 00:06:07.845934] Saved prediction for June


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-17 00:06:07.907152] Saved prediction for July


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-17 00:06:08.163718] Saved prediction for August


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-17 00:06:08.420686] Saved prediction for September


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-17 00:06:08.677775] Saved prediction for October


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-17 00:06:08.934424] Saved prediction for November
[2025-07-17 00:06:09.219120] Saved prediction for December
[2025-07-17 00:06:09.219330] Final save complete. Total months: 12


### Llama 70b

In [4]:
# === Settings ===
model_id = "meta-llama/Llama-3.3-70B-Instruct"
save_path = "AirQuality/RQ1/Dataset/llama3_70b_it_nation_level_2023.csv"

# === Load Model ===
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
tokenizer.pad_token = tokenizer.eos_token 
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token=hf_token,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    max_memory={0: "78GiB", "cpu": "30GiB"}
)
model.eval()

# Optional: compile model if supported
if hasattr(torch, "compile"):
    model = torch.compile(model)

# === Inference Function ===
def query_llm(month, year):
    user_prompt = USER_TEMPLATE.format(month=month, year=year)
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_prompt}
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=False,
            temperature=0,
            pad_token_id=tokenizer.pad_token_id,
        )

    decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    match = re.search(r"\d+(\.\d+)?", decoded)
    return float(match.group()) if match else float("nan")

# === Resume or Initialize ===
rows = []
if os.path.exists(save_path):
    existing_df = pd.read_csv(save_path)
    print(f"Resuming from saved file with {len(existing_df)} rows.")
    processed_months = set(existing_df["month"])
    rows = existing_df.to_dict("records")
else:
    print("No previous file found. Starting fresh.")
    processed_months = set()

months = list(range(1, 13))
year = 2023

for month_num in months:
    month_name = datetime(1900, month_num, 1).strftime("%B")

    if month_name in processed_months:
        continue

    try:
        prediction = query_llm(month_name, year)
    except RuntimeError as e:
        if "CUDA out of memory" in str(e):
            print("CUDA OOM — stopping execution.")
            raise
        else:
            prediction = float("nan")

    row = {
        "year": year,
        "month": month_name,
        "model": model_id,
        "pm2.5": prediction,
    }
    rows.append(row)

    # Save after every month
    try:
        pd.DataFrame(rows).to_csv(save_path, index=False)
        print(f"[{datetime.now()}] Saved prediction for {month_name}")
    except Exception as e:
        print(f"[{datetime.now()}] Save error after {month_name}: {e}")

print(f"[{datetime.now()}] Final save complete. Total months: {len(rows)}")

Loading checkpoint shards: 100%|██████████| 30/30 [00:13<00:00,  2.30it/s]
Some parameters are on the meta device because they were offloaded to the disk and cpu.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


No previous file found. Starting fresh.


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:29:03.881611] Saved prediction for January


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:29:20.150769] Saved prediction for February


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:29:36.455737] Saved prediction for March


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:29:52.786148] Saved prediction for April


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:30:09.147213] Saved prediction for May


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:30:25.462609] Saved prediction for June


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:30:41.796033] Saved prediction for July


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:30:58.095826] Saved prediction for August


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:31:14.396077] Saved prediction for September


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:31:30.727133] Saved prediction for October


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-07-24 21:31:47.081550] Saved prediction for November
[2025-07-24 21:32:03.416624] Saved prediction for December
[2025-07-24 21:32:03.417091] Final save complete. Total months: 12


### Qwen 32B

In [None]:
# === Settings ===
model_id = "Qwen/Qwen3-32B"
save_path = "AirQuality/RQ1/Dataset/National/qwen3_32b_nation_level_2023.csv"

tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token=hf_token,
    device_map={"": 0},
    torch_dtype=dtype,
)
model.eval()

# Optional: compile model if supported
if hasattr(torch, "compile"):
    model = torch.compile(model)

# === Inference Function ===
def query_llm(month, year):
    user_prompt = USER_TEMPLATE.format(month=month, year=year)
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_prompt}
    ]
    prompt_text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )
    inputs = tokenizer([prompt_text], return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=False,
            temperature=0,
            pad_token_id=tokenizer.pad_token_id,
        )

    output_ids = outputs[0][inputs["input_ids"].shape[-1]:].tolist()
    decoded = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
    match = re.search(r"\d+(\.\d+)?", decoded)
    return float(match.group()) if match else float("nan")

# === Resume or Initialize ===
rows = []
if os.path.exists(save_path):
    existing_df = pd.read_csv(save_path)
    print(f"Resuming from saved file with {len(existing_df)} rows.")
    processed_months = set(existing_df["month"])
    rows = existing_df.to_dict("records")
else:
    print("No previous file found. Starting fresh.")
    processed_months = set()

months = list(range(1, 13))
year = 2023

for month_num in months:
    month_name = datetime(1900, month_num, 1).strftime("%B")

    if month_name in processed_months:
        continue

    try:
        prediction = query_llm(month_name, year)
    except RuntimeError as e:
        if "CUDA out of memory" in str(e):
            print("CUDA OOM — stopping execution.")
            raise
        else:
            prediction = float("nan")

    row = {
        "year": year,
        "month": month_name,
        "model": model_id,
        "pm2.5": prediction,
    }
    rows.append(row)

    # Save after every month
    try:
        pd.DataFrame(rows).to_csv(save_path, index=False)
        print(f"[{datetime.now()}] Saved prediction for {month_name}")
    except Exception as e:
        print(f"[{datetime.now()}] Save error after {month_name}: {e}")

print(f"[{datetime.now()}] Final save complete. Total months: {len(rows)}")

Loading checkpoint shards: 100%|██████████| 17/17 [00:12<00:00,  1.36it/s]
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


No previous file found. Starting fresh.


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:54.807880] Saved prediction for January


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:55.156748] Saved prediction for February


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:55.504862] Saved prediction for March


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:55.853671] Saved prediction for April


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:56.203984] Saved prediction for May


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:56.552584] Saved prediction for June


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:56.901695] Saved prediction for July


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:57.251374] Saved prediction for August


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:57.599824] Saved prediction for September


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:57.949762] Saved prediction for October


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[2025-08-03 23:35:58.310243] Saved prediction for November
[2025-08-03 23:35:58.598720] Saved prediction for December
[2025-08-03 23:35:58.599008] Final save complete. Total months: 12


: 