In [None]:
%pip install -q airavata-python-sdk[notebook]
import airavata_jupyter_magic

%authenticate
%request_runtime hpc_cpu --file=cybershuttle.yml --walltime=60 --use=NeuroData25VC2:cloud,expanse:shared,anvil:shared
%switch_runtime hpc_cpu

: 

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

def main():
    # Step 1: Define model path
    MODEL_NAME = "/cybershuttle_data/airavata-courses-deepseek-chat/deepseek-ai/deepseek-math-7b-instruct"

    # Step 2: Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_NAME,
        local_files_only=True
    )

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.pad_token_id = tokenizer.eos_token_id

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        local_files_only=True
    )

    generation_config = GenerationConfig.from_pretrained(
        MODEL_NAME,
        local_files_only=True
    )
    generation_config.pad_token_id = generation_config.eos_token_id

    # Step 3: Interactive prompt loop
    while True:
        user_input = input("\nEnter your prompt (or type 'exit' to quit): ")
        if user_input.lower() == 'exit':
            break

        # Step 4: Prepare chat messages
        messages = [{"role": "user", "content": user_input}]

        # Step 5: Tokenize using chat template
        input_tensor = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt"
        ).to(model.device)

        # Create attention mask
        attention_mask = input_tensor.ne(tokenizer.pad_token_id).long()

        # Step 6: Generate model output
        outputs = model.generate(
            input_ids=input_tensor,
            attention_mask=attention_mask.to(model.device),
            max_new_tokens=512,
            pad_token_id=generation_config.pad_token_id,
            do_sample=True,
            temperature=0.7
        )

        # Step 7: Decode and print model output
        result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
        print("\nModel Output:\n", result.strip())

if __name__ == "__main__":
    main()