In [None]:
%pip install transformers torch

In [9]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name_or_path = "fblgit/juanako-7b-UNA"
# Load the model
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    # low_cpu_mem_usage=True,
    # device_map="cuda:0"
)
# Create the tokenizer from the model object
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

# print(llm("AI is going to"))
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize

nltk.download('punkt')

Loading checkpoint shards: 100%|██████████| 3/3 [01:03<00:00, 21.20s/it]
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\wesle\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
import numpy as np

torch.set_default_tensor_type('torch.FloatTensor')

class DQNNetwork(nn.Module):
    def __init__(self, state_size, action_size, hidden_size):
        super(DQNNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, action_size)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class DQNAgent:
    def __init__(self, state_size, action_size, hidden_size, learning_rate, gamma):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = DQNNetwork(state_size, action_size, hidden_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)

    def select_action(self, state):
        state = state.float()
        state_tensor = pad_or_truncate(state, max_length=512)
        state_tensor = state_tensor.unsqueeze(0)  # Ensure it has a batch dimension

        if random.random() <= self.epsilon:
            return torch.tensor([random.randrange(self.action_size)], dtype=torch.long)
        else:
            with torch.no_grad():
                q_values = self.model(state_tensor)
            return torch.tensor([np.argmax(q_values.cpu().detach().numpy())], dtype=torch.long)


    def update(self, state, action, reward, next_state, done):
        state = pad_or_truncate(state, max_length=512).float()
        next_state = pad_or_truncate(next_state, max_length=512).float()

        reward = torch.tensor(reward, dtype=torch.float)
        done = torch.tensor(done, dtype=torch.float)

        # Compute Q values for current and next state
        q_values = self.model(state)
        q_next = self.model(next_state).detach()

        # Compute the expected Q values
        q_update = reward + self.gamma * q_next.max(1)[0] * (1 - done)

        # Compute loss
        loss = F.mse_loss(q_values.gather(1, action.unsqueeze(1)), q_update.unsqueeze(1))

        # Backpropagation
        self.optimizer.zero_grad()
        loss.backward()
        for param in self.model.parameters():
            param.grad.data.clamp_(-1, 1)
        self.optimizer.step()

        # Epsilon decay
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

def environment_step(model, input_ids, action):
    input_ids = torch.cat((input_ids, torch.tensor([[action]])), dim=1)
    with torch.no_grad():
        outputs = model(input_ids=input_ids)
    logits = outputs.logits
    next_token_id = torch.argmax(logits[:, -1, :], dim=-1).unsqueeze(0)
    reward = compute_reward(input_ids, next_token_id)
    done = next_token_id.item() == tokenizer.eos_token_id
    return input_ids, reward, done

def compute_perplexity(sequence):
    inputs = tokenizer(sequence, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
    # Perplexity is e^loss
    perplexity = torch.exp(outputs.loss)
    return 1 / (1 + np.log(perplexity.item() + 1))

def compute_reward(input_ids, next_token_id, target_context_embedding=None):
    """ Compute the reward for the generated sequence. """
    input_ids = input_ids.flatten()
    next_token_id = next_token_id.item() if isinstance(next_token_id, torch.Tensor) else next_token_id
    sequence_tokens = input_ids.tolist() + [next_token_id]
    
    # Decode the sequence tokens into a sentence and print it
    sequence_sentence = tokenizer.decode(sequence_tokens)
    print(f"Sequence: {sequence_sentence}")
    
    fluency_reward = compute_fluency(sequence_tokens)
    relevance_reward = 0
    if target_context_embedding is not None:
        sequence_embedding = compute_embeddings(tokenizer.decode(sequence_tokens))
        relevance_reward = compute_relevance(sequence_embedding, target_context_embedding)
    diversity_reward = lexical_diversity(sequence_tokens)
    perplexity_reward = -compute_perplexity(tokenizer.decode(sequence_tokens))
    fluency_reward = normalize_reward(fluency_reward)
    relevance_reward = normalize_reward(relevance_reward)
    diversity_reward = normalize_reward(diversity_reward)
    perplexity_reward = normalize_reward(perplexity_reward)
    weights = {'fluency': 0.5, 'relevance': 0.1, 'diversity': 0.1, 'perplexity': 0.7}
    total_reward = (weights['fluency'] * fluency_reward + 
                    weights['relevance'] * relevance_reward +
                    weights['diversity'] * diversity_reward + 
                    weights['perplexity'] * perplexity_reward)
    print(f"Fluency: {fluency_reward}, Relevance: {relevance_reward}, Diversity: {diversity_reward}, Perplexity: {perplexity_reward}, Total: {total_reward}")

    return total_reward

def normalize_reward(reward, min_reward=-1, max_reward=1):
    # Normalize reward to be within [min_reward, max_reward]
    return (reward - min_reward) / (max_reward - min_reward) * 2 - 1

def compute_fluency(tokens):
    sentences = [tokenizer.decode(sentence_tokens) for sentence_tokens in tokens]
    total_words = sum(len(word_tokenize(sentence)) for sentence in sentences)
    num_sentences = len(sentences)
    avg_sentence_length = total_words / num_sentences if num_sentences > 0 else 0

    # Define the ideal sentence length
    ideal_sentence_length = 20

    # Calculate the fluency score based on how close the average sentence length is to the ideal length
    fluency_score = 1 - abs(avg_sentence_length - ideal_sentence_length) / ideal_sentence_length

    # Ensure the fluency score is between 0 and 1
    fluency_score = max(0, min(fluency_score, 1))

    return fluency_score

import math

def lexical_diversity(tokens):
    words = [tokenizer.decode(token) for token in tokens]
    unique_words = set(words)
    num_types = len(unique_words)
    num_tokens = len(words)
    
    if num_tokens > 0 and num_types > 0:
        herdan_c = math.log(num_types) / math.log(num_tokens)
    else:
        herdan_c = 0

    return herdan_c

def compute_relevance(sequence_embedding, target_context_embedding):
    cosine_similarity = torch.nn.functional.cosine_similarity(sequence_embedding, target_context_embedding)
    return cosine_similarity.item()

def compute_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    outputs = model(inputs)
    return outputs.last_hidden_state.mean(dim=1)

class LLMDQN:
    def __init__(self, model, dqn_agent, tokenizer):
        self.model = model
        self.dqn_agent = dqn_agent
        self.tokenizer = tokenizer

    def generate_sequence(self, prompt):
        input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
        generated_sequence = []
        while not self.end_condition_met(input_ids):
            action = self.dqn_agent.select_action(input_ids)
            input_ids, reward = environment_step(self.model, input_ids, action)
            self.dqn_agent.update(input_ids, action, reward, input_ids)
            generated_sequence.append(action)
        return self.tokenizer.decode(generated_sequence)

    def end_condition_met(self, input_ids, max_length=50):
        return (input_ids[-1] == tokenizer.eos_token_id) or (input_ids.size(1) > max_length)

from tqdm.auto import tqdm

def train(llm_dqn, dqn_agent, num_episodes, target_context):
    for episode in tqdm(range(num_episodes), desc="Training Episodes"):
        input_ids = tokenizer.encode(target_context, return_tensors='pt')
        total_reward = 0
        done = False
        while not done:
            action = dqn_agent.select_action(input_ids)
            next_input_ids, reward, done = environment_step(llm_dqn.model, input_ids, action)
            dqn_agent.update(input_ids, action, reward, next_input_ids, done)
            input_ids = next_input_ids
            total_reward += reward
            print(f"Action: {action}, Reward: {reward}, Total Reward: {total_reward}")
            if done:
                print(f"Episode {episode + 1} Complete. Total Reward: {total_reward}")
                print(f"Generated Sequence: {llm_dqn.generate_sequence(target_context)}\n")


def pad_or_truncate(sequence, max_length=512, pad_token_id=0):
    sequence = sequence.view(-1)
    sequence_length = sequence.size(0)
    if sequence_length > max_length:
        return sequence[:max_length].unsqueeze(0)
    elif sequence_length < max_length:
        padding = torch.full((max_length - sequence_length,), pad_token_id, dtype=sequence.dtype)
        return torch.cat((sequence, padding), dim=0).unsqueeze(0)
    else:
        return sequence.unsqueeze(0)

state_size = 512
action_size = tokenizer.vocab_size
hidden_size = 128
learning_rate = 0.001
gamma = 0.99
target_context = "the quick brown fox jumps over "

dqn_agent = DQNAgent(state_size, action_size, hidden_size, learning_rate, gamma)
llm_dqn = LLMDQN(model, dqn_agent, tokenizer)

train(llm_dqn, dqn_agent, num_episodes=100, target_context=target_context)


Training Episodes:   0%|          | 0/100 [00:00<?, ?it/s]

Sequence: <s> the quick brown fox jumps over buchan
Fluency: 0.054166666666666696, Relevance: 0.0, Diversity: 1.0, Perplexity: -0.13848591339324035, Total: 0.05784037663671318
Action: tensor([22699]), Reward: 0.05784037663671318, Total Reward: 0.05784037663671318
Sequence: <s> the quick brown fox jumps over buch거n
Fluency: 0.05384615384615388, Relevance: 0.0, Diversity: 1.0, Perplexity: -0.11847582534427681, Total: 0.06768516425093854
Action: tensor([30013]), Reward: 0.06768516425093854, Total Reward: 0.12552554088765172
Sequence: <s> the quick brown fox jumps over buch거ils

Fluency: 0.050000000000000044, Relevance: 0.0, Diversity: 1.0, Perplexity: -0.11905108012302612, Total: 0.06547445993848697
Action: tensor([4544]), Reward: 0.06547445993848697, Total Reward: 0.1910000008261387
Sequence: <s> the quick brown fox jumps over buch거ils underarter

Fluency: 0.050000000000000044, Relevance: 0.0, Diversity: 1.0, Perplexity: -0.1089907549327751, Total: 0.07050462253361248
Action: tensor([128

In [3]:
import torch

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    print("CUDA (GPU support) is available in PyTorch!")
    print("Number of GPU devices available:", torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA (GPU support) is not available in PyTorch.")


CUDA (GPU support) is not available in PyTorch.
