In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from generate_LLM_sets import ConcatenatedFileDataset
import numpy as np

In [2]:
device = 'cuda'
model_path = 'merged_model/'

In [3]:
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device
)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


In [4]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-3-mini-4k-instruct")

In [5]:
test = torch.load('data/test.pt')

  test = torch.load('data/test.pt')


In [6]:
def create_message_column(row):
    messages = [
        {"content": row['prompt'], "role": "user"},
        {"content": '(', "role": "assistant"}
    ]
    return {"messages": messages}

def format_dataset_chatml(row):
    # Apply the chat template
    formatted_text = tokenizer.apply_chat_template(
        row["messages"], 
        add_generation_prompt=False, 
        tokenize=False
    )
    
    # Tokenize the text
    tokenized = tokenizer(
        formatted_text,
        truncation=True,
        max_length=4096,
        padding=False,
        return_tensors="pt",  # Return PyTorch tensors (or omit for lists)
    )
    
    return {
        "input_ids": tokenized["input_ids"][0],  # Assuming batch size of 1 for simplicity
        "attention_mask": tokenized["attention_mask"][0]  # Include this if needed
    }

In [7]:
test_dataset = test.map(create_message_column)
test_dataset = test_dataset.map(format_dataset_chatml)

In [8]:
prompts = []
for i in range(len(test)):
    prompts.append(test[i]['prompt'])

In [9]:
prompts

['Give the graph edgelist associated to the following features.-Number of nodes: 0.0-Number of edges: 50.0-Average degree: 589.0-Number of triangles: 23.56-Clustering coefficient: 3702.0-Max k cores: 0.6226034308779012-Number of communities: 18.0',
 'Give the graph edgelist associated to the following features.-Number of nodes: 1.0-Number of edges: 20.0-Average degree: 19.0-Number of triangles: 1.9-Clustering coefficient: 0.0-Max k cores: 0.0-Number of communities: 1.0',
 'Give the graph edgelist associated to the following features.-Number of nodes: 2.0-Number of edges: 28.0-Average degree: 165.0-Number of triangles: 11.785714285714286-Clustering coefficient: 387.0-Max k cores: 0.4742647058823529-Number of communities: 8.0',
 'Give the graph edgelist associated to the following features.-Number of nodes: 3.0-Number of edges: 47.0-Average degree: 1050.0-Number of triangles: 44.680851063829785-Clustering coefficient: 14914.0-Max k cores: 0.9736257997127562-Number of communities: 41.0',


In [10]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from typing import List
from tqdm import tqdm
import gc
def batch_generate(
    prompts: List[str],
    model,
    tokenizer,
    batch_size: int = 4,
    max_length: int = 100,
    temperature: float = 0.7,
    top_p: float = 0.1,
    show_progress: bool = True
) -> List[str]:
    """
    Generate responses for multiple prompts in batches
    
    Args:
        prompts: List of input prompts
        model: The loaded model
        tokenizer: The loaded tokenizer
        batch_size: Number of prompts to process simultaneously
        max_length: Maximum length of generated text
        temperature: Sampling temperature
        top_p: Nucleus sampling parameter
        show_progress: Whether to show progress bar
    
    Returns:
        List of generated responses
    """
    generated_responses = []
    
    # Process prompts in batches
    for i in tqdm(range(0, len(prompts), batch_size), disable=not show_progress):
        # Get batch of prompts
        batch_prompts = prompts[i:i + batch_size]
        
        # Tokenize batch
        inputs = tokenizer(
            batch_prompts,
            padding=False,
            truncation=True,
            return_tensors="pt"
        ).to(model.device)
        
        # Generate
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=max_length,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id,
                num_return_sequences=1
            )
        
        # Decode outputs
        batch_responses = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        generated_responses.extend(batch_responses)
        del batch_responses
        # Clear CUDA cache if needed
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()
    return generated_responses


# Generate responses in batches
responses = batch_generate(
    prompts=prompts[:1],
    model=model,
    tokenizer=tokenizer,
    batch_size=1,  # Process 2 prompts at a time
    max_length=4096,
    temperature=0.01,
    show_progress=True
)

generated_responses = np.array(responses)
generated_responses.tofile('output')

100%|██████████| 1/1 [02:00<00:00, 120.17s/it]


In [11]:
responses

['Give the graph edgelist associated to the following features.-Number of nodes: 0.0-Number of edges: 50.0-Average degree: 589.0-Number of triangles: 23.56-Clustering coefficient: 3702.0-Max k cores: 0.6226034308779012-Number of communities: 18.0-Modularity: 0.00016666666666666666-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of the graph: 589.0-Average degree of 