In [2]:
import sys

sys.path.append('/projectnb/textconv/llama/packages')

import fairscale

In [3]:
!pwd

/projectnb/textconv/llama


In [4]:
from fairscale.nn.model_parallel.initialize import (
    get_model_parallel_rank,
    initialize_model_parallel,
    model_parallel_is_initialized,
)
import torch
import random

In [5]:
from llama.generation import Llama, Dialog
from llama.model import ModelArgs, Transformer
from llama.tokenizer import Tokenizer

In [4]:


# Check if CUDA is available
if torch.cuda.is_available():
    # Get the number of CUDA devices
    num_cuda_devices = torch.cuda.device_count()
    print(f"Number of CUDA devices available: {num_cuda_devices}")

    # List the properties of each CUDA device
    for i in range(num_cuda_devices):
        device = torch.device(f'cuda:{i}')
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available on this system.")

Number of CUDA devices available: 1
Device 0: Tesla V100-SXM2-16GB


In [1]:
import os
os.environ['RANK'] = '0'
os.environ['WORLD_SIZE'] = '1'
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '8888' 
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [6]:
generator = Llama.build(
        ckpt_dir="llama-2-7b-chat/",
        tokenizer_path="tokenizer.model",
        max_seq_len=512, #max_seq_len....
        max_batch_size=6,
    )

> initializing model parallel with size 1
> initializing ddp with size 1
> initializing pipeline with size 1


RuntimeError: CUDA error: CUDA-capable device(s) is/are busy or unavailable
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [9]:
#from NoiseKD
import torch.nn as nn
def Linearize_Embedding(embedding_layer):
    embedding_weight_tensor = embedding_layer.weight.detach() 
    shape = embedding_weight_tensor.shape
    vocab_size = shape[0]
    embedding_dim = shape[1]
    lin = nn.Linear(vocab_size,embedding_dim, bias = False)
    #print(lin.weight.shape)
    #print(embedding_weight_tensor.shape)
    lin.weight = nn.Parameter(embedding_weight_tensor.T) #not sure about this transpose
    return lin

def batch_one_hot(input_sequences, vocab_size):
    batch_size = input_sequences.size(0)
    max_seq_length = input_sequences.size(1)
    
    # Create a tensor to store the one-hot encodings
    one_hot_input = torch.zeros(batch_size, max_seq_length, vocab_size)
    
    # Use scatter_ to set the appropriate elements to 1 in each batch
    one_hot_input.scatter_(2, input_sequences.unsqueeze(2), 1)
    return one_hot_input

In [10]:
float_embeddings = Linearize_Embedding(generator.model.tok_embeddings)

In [11]:
generator.model.tok_embeddings = float_embeddings #set, now it takes one hots

In [14]:


def random_float_tensor(a = 0.0 
                        ,b = 1.0
                        ,max_len=512
                       ,vocab_size =32_000 ):
        # Replace with your desired lower bound
         # Replace with your desired upper bound

    random_int = random.randint(1, max_len)  #this is the random_input lenght
    
    # Generate the random tensor
    random_tensor = torch.FloatTensor(1, random_int,vocab_size ).uniform_(a, b).to(torch.float16).to("cpu")
    #print(random_tensor.device)
    zero_tensor = torch.zeros(1, max_len - random_int, vocab_size, dtype=torch.float16).to("cpu") #this stays on the cpu for saving.
    #print(zero_tensor.device)
    cpu_tensor = torch.cat((random_tensor, zero_tensor), dim=1)
    
    return random_tensor,cpu_tensor 

In [18]:
data_dir =  "/projectnb/textconv/llama/tensor_dataset_2/"

if not os.path.exists(data_dir):
    # If it doesn't exist, create it
    os.makedirs(data_dir)

files_n = 5 #how many files to generate
batch_per_file = 5 #how many batches in each file.  
vocab_size = 32_000
max_len = 512
a = -1.0
b = 2.0
for i in range(files_n):
    input_tensor = torch.zeros(batch_per_file
                               ,max_len
                               ,vocab_size
                               ,dtype=torch.float16)
    target_tensor = torch.zeros(batch_per_file
                                ,vocab_size
                               ,dtype=torch.float32)
    for j in range(batch_per_file):
        pred_tensor,save_tensor = random_float_tensor(max_len=512
                       ,vocab_size =32_000)
        input_tensor[j] = save_tensor
        pred_tensor = pred_tensor.to(device)
        model_output = generator.model.forward(pred_tensor,0)
        target_tensor[j] = model_output[:,-1,:]
    #print(f"input{i}.pt")
    #print(input_tensor.shape,target_tensor.shape)
    #print(f"target{i}.pt")
    ##save
    input_path = f"{data_dir}input{i}.pt"
    target_path = f"{data_dir}target{i}.pt"
    torch.save(input_tensor, input_path)
    torch.save(target_tensor, target_path)

In [19]:
del generator


In [None]:
#make this a python file, and run it from cmd.

In [10]:
data_dir =  "/projectnb/textconv/llama/kd_data/"
max_number = -1
for filename in os.listdir(data_dir):
    if filename.startswith("input") and filename.endswith(".pt"):
        try:
            number = int(filename[len("input"):-len(".pt")])
            max_number = max(max_number, number)
        except ValueError:
            pass

if max_number != -1:
    print(f"The maximum file number found is: {max_number}")
else:
    print("No matching files found in the directory.")
    

No matching files found in the directory.


In [11]:
for i in range(max_number + 1, max_number + 11):
    print(i)

0
1
2
3
4
5
6
7
8
9
