# Sample inference 

In [1]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from peft import PeftModel, PeftConfig
import torch
import re
import numpy as np
ELEMENTS = ["Ag","Al","B","Be","Bi","Cd","Co","Cr","Cu","Er","Eu","Fe","Ga","Li","Mg", "Mn","Ni","Pb","Sc","Si","Sn","Ti","V","Zn","Zr"]
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


In [2]:
def load_trained_model():
    model_name = "ljbbbbbbb/aluminum-alloy-inverse-design"
    try:
        # 1. First, check what base model was used
        peft_config = PeftConfig.from_pretrained(model_name)
        print(f"Base model: {peft_config.base_model_name_or_path}")
        
        # 2. Load the base model (GPT2 in your case)
        tokenizer = GPT2Tokenizer.from_pretrained(peft_config.base_model_name_or_path)
        tokenizer.pad_token = tokenizer.eos_token
        
        base_model = GPT2LMHeadModel.from_pretrained(peft_config.base_model_name_or_path)
        
        # 3. Load your trained PEFT adapter
        model = PeftModel.from_pretrained(base_model, model_name)
        model = model.to(DEVICE)
        # Move to GPU if available
        if torch.cuda.is_available():
            model = model.half()  # Use float16 for efficiency
        
        print("✅ Trained model loaded successfully!")
        return model, tokenizer
        
    except Exception as e:
        print(f"Error loading model: {e}")
        return None, None

In [8]:

def parse_output(pred_text):
    """
    Convert model output (string or list) into a numeric list ordered by ELEMENTS.
    """
    values = {el: 0.0 for el in ELEMENTS}

    if isinstance(pred_text, (list, np.ndarray)):
        # assume already numeric list in same order
        for el, val in zip(ELEMENTS, pred_text):
            values[el] = float(val)
    elif isinstance(pred_text, str):
        # parse text like "Al: 85, Mg: 5, Zn: 10"
        matches = re.findall(r"([A-Za-z]+)\s*[:=]\s*([0-9.]+)", pred_text)
        for el, val in matches:
            if el in values:
                values[el] = float(val)
    else:
        raise ValueError("Unknown prediction format")
    # only return values > 0 in ELEMENTS as json fromat 
    values = {el: values[el] for el in ELEMENTS if values[el] > 0}
    return values

    # return [values[el] for el in ELEMENTS]

# Inference 
def predict(model, tokenizer, processing,tensile_strength, yield_strength, elongation):
    model.eval()
    prop_text = (
        f"Target -> Tensile: {tensile_strength}MPa, "
        f"Yield: {yield_strength}MPa, "
        f"Elongation: {elongation}%"
    )
    input_text = f"{prop_text} under Processing: {processing} | Suggested Composition:"
    print("Prompt:", input_text)
    inputs = tokenizer(input_text, return_tensors="pt").to(DEVICE)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=160,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    parse_response = parse_output(response)
    return parse_response


In [4]:

# Usage
model, tokenizer = load_trained_model()


Base model: gpt2
✅ Trained model loaded successfully!


In [9]:
# Test prediction
print("\n🧪 Testing prediction...")
prediction = predict(model, tokenizer, "Solutionised  + Artificially peak aged",300, 250, 12)
print(f"Prediction: {prediction}")


🧪 Testing prediction...
Prompt: Target -> Tensile: 300MPa, Yield: 250MPa, Elongation: 12% under Processing: Solutionised  + Artificially peak aged | Suggested Composition:
Prediction: {'Al': 0.9675987, 'Cu': 0.0007508, 'Mg': 0.0031206, 'Mn': 0.025, 'Si': 0.0007509, 'Ti': 0.0007509}
