# Pretrained Inference Promting/ Fine Tuning LoRA/ PEFT/ Reward Models

In [None]:
!pip install transformers
!pip3 install datasets

In [None]:
from datasets import load_dataset
import pandas as pd
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig

In [None]:
ds = load_dataset("bitext/Bitext-customer-support-llm-chatbot-training-dataset")

# Import Models

In [None]:
model_name='google/flan-t5-base'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

# Train/ Test Split

In [None]:
from datasets import DatasetDict

# Split the dataset
train_testvalid = ds['train'].train_test_split(test_size=0.2, seed=42)

# Further split the test set into test and validation
test_valid_split = train_testvalid['test'].train_test_split(test_size=0.5, seed=42)

# Combine into a single DatasetDict
dataset = DatasetDict({
    'train': train_testvalid['train'],
    'validation': test_valid_split['train'],
    'test': test_valid_split['test']
})

# Now you have your final_dataset_dict with train, validation, and test sets
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 21497
    })
    validation: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 2687
    })
    test: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 2688
    })
})


In [None]:
# Initialize lists to store the output data
prompts = []
baseline_summaries = []
model_generations = []

# 1- Zero Shot No Promt Template

In [None]:
example_indice = [308]

In [None]:
for i, index in enumerate(example_indice):
    dialogue = ds['train'][index]['instruction']
    response = ds['train'][index]['response']


    inputs = tokenizer(dialogue, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True
    )

    # Append the results to the lists
    prompts.append("No Promt")
    baseline_summaries.append(response)
    model_generations.append(output)

    print('Example ', i + 1)
    print(f'INPUT PROMPT:\n{dialogue}')
    print(f'BASELINE HUMAN SUMMARY:\n{response}')
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

Example  1
INPUT PROMPT:
can you help me to cancel order {{Order Number}} ?
BASELINE HUMAN SUMMARY:
Of course, I'm here to help you with canceling your order associated with the purchase number {{Order Number}}. I understand that circumstances may have changed, and you no longer wish to proceed with this purchase. Let's resolve this for you as quickly as possible. 

To cancel your order, please follow these steps:
1. Sign in to your {{Online Company Portal Info}}.
2. Locate the '{{Online Order Interaction}}' or '{{Online Order Interaction}}' section.
3. Find the order with the purchase number {{Order Number}} and click on it.
4. Look for the option labeled '{{Online Order Interaction}}' and click on it to initiate the cancellation process.
5. Follow any additional prompts or questions to complete the cancellation.

If you encounter any difficulties along the way or have any questions, our dedicated team is available during {{Customer Support Hours}}. You can reach us at {{Customer Supp

# 2- Zero shot with a promt template

In [None]:
def make_prompt(example_indices_full, example_index_to_response):
    prompt = ''
    for index in example_indices_full:
      dialogue = ds['train'][index]['instruction']
      response = ds['train'][index]['response']

        # The stop sequence '{summary}\n\n\n' is important for FLAN-T5. Other models may have their own preferred stop sequence.
      prompt += f"""

      'Answer the question based on the context below. Keep the answer short. Respond "Unsure about answer" if not sure about the answer.\n\n'

      Dialogue:
      {dialogue}
      Response:
      {response}
      """
    dialogue = ds['train'][example_index_to_response]['instruction']

    prompt += f"""
    Dialogue:
    {dialogue}
    Response:
    """
    return prompt

In [None]:
example_indices_single = [365]
example_index_to_response = 308

one_shot_prompt = make_prompt(example_indices_single, example_index_to_response)

In [None]:
response = ds['train'][example_index_to_response]['instruction']

inputs = tokenizer(one_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0],
    skip_special_tokens=True
)

# Append the results to the lists
baseline_summaries.append(response)
model_generations.append(output)

print(f'BASELINE HUMAN SUMMARY:\n{response}\n')
print(f'MODEL GENERATION - ONE SHOT:\n{output}')

BASELINE HUMAN SUMMARY:
can you help me to cancel order {{Order Number}} ?

MODEL GENERATION - ONE SHOT:
Sure about answer


# 3-Few Shot Inference with a Promt Template

In [None]:
example_indices_full = [365, 2941, 4500]
example_index_to_response = 308

few_shot_prompt = make_prompt(example_indices_full, example_index_to_response)

In [None]:
response = ds['train'][example_index_to_response]['instruction']

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0],
    skip_special_tokens=True
)

# Append the results to the lists
baseline_summaries.append(response)
model_generations.append(output)

print(f'BASELINE HUMAN SUMMARY:\n{response}\n')
print(f'MODEL GENERATION - Few SHOT:\n{output}')

BASELINE HUMAN SUMMARY:
can you help me to cancel order {{Order Number}} ?

MODEL GENERATION - Few SHOT:
I'm here to help you with your concern about misplacing your invoices from Salutation Client Last Name. Please provide me with the approximate date range for the invoices, any unique


#Fine Tuning

In [None]:
%pip install \
    evaluate==0.4.0 \
    rouge_score==0.1.2 \
    loralib==0.1.1 \
    peft==0.3.0 --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone


In [None]:
from transformers import TrainingArguments, Trainer
from datasets import load_dataset
import time
import evaluate
import pandas as pd
import numpy as np

In [None]:
from datasets import DatasetDict

# Split the dataset
train_testvalid = ds['train'].train_test_split(test_size=0.2, seed=42)

# Further split the test set into test and validation
test_valid_split = train_testvalid['test'].train_test_split(test_size=0.5, seed=42)

# Combine into a single DatasetDict
dataset = DatasetDict({
    'train': train_testvalid['train'],
    'validation': test_valid_split['train'],
    'test': test_valid_split['test']
})

# Now you have your final_dataset_dict with train, validation, and test sets
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 21497
    })
    validation: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 2687
    })
    test: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 2688
    })
})


In [None]:
original_model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-base', torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-base')



#Finding trainable parameters

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(original_model))

trainable model parameters: 247577856
all model parameters: 247577856
percentage of trainable model parameters: 100.00%


In [None]:
ds

DatasetDict({
    train: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 26872
    })
})

In [None]:
def tokenize_function(example):
    start_prompt = 'Answer the question based on the context below. Keep the answer short. Respond "Unsure about answer" if not sure about the answer.\n\n'
    end_prompt = '\n\nAnswer: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["instruction"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["response"], padding="max_length", truncation=True, return_tensors="pt").input_ids

    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['flags', 'intent', 'instruction', 'response','category'])
#we created 'input ids' and 'labels' instead

Map:   0%|          | 0/21497 [00:00<?, ? examples/s]

Map:   0%|          | 0/2687 [00:00<?, ? examples/s]

Map:   0%|          | 0/2688 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 21497
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 2687
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 2688
    })
})

# Full Fine Tuning  Hugging Face Train

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
model_path = '/content/drive/MyDrive/GenAI_Project'

training_args = TrainingArguments(
    output_dir=model_path,
    learning_rate=1e-5,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_steps=1,
    max_steps=1
)

trainer = Trainer(
    model=original_model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()
# It will take 42 hours on my gpu so whatever.

# Save the model using the model instance
original_model.save_pretrained(model_path)
# Save the model weights and configuration
tokenizer.save_pretrained(model_path)
# Save the tokenizer if needed

print(f'Model saved to {model_path}')

In [None]:
finetuned_model_outputs = instruct_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
finetuned_model_text_output = tokenizer.decode(instruct_model_outputs[0], skip_special_tokens=True)
finetuned_model_summaries.append(instruct_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, instruct_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'instruct_model_summaries'])
df

# Quantiative Evaluation

In [None]:
rouge = evaluate.load('rouge')

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Create a list for each generation, save the ouptus in each of them then create a dataframe.

In [None]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []

for _, dialogue in enumerate(dialogues):
    prompt = f"""
    'Answer the question based on the context below. Keep the answer short. Respond "Unsure about answer" if not sure about the answer.\n\n'.
    {dialogue}
    Summary: """
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)
    original_model_summaries.append(original_model_text_output)

    instruct_model_outputs = instruct_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    instruct_model_text_output = tokenizer.decode(instruct_model_outputs[0], skip_special_tokens=True)
    instruct_model_summaries.append(instruct_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, instruct_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'instruct_model_summaries'])
df

Rouge Score creation

In [None]:
original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

instruct_model_results = rouge.compute(
    predictions=instruct_model_summaries,
    references=human_baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)

# PEFT Fine Tuning

Add LoRA adapter layers/parameters to the original LLM to be trained and freez other layers.

In [None]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

#Add LoRA adapter layers/parameters to the original LLM to be trained.
peft_model = get_peft_model(original_model,
                            lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 3538944
all model parameters: 251116800
percentage of trainable model parameters: 1.41%


In [None]:
model_path = '/content/drive/MyDrive/GenAI_Project'

peft_training_args = TrainingArguments(
    output_dir=model_path,
    auto_find_batch_size=True,
    learning_rate=1e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=1,
    logging_steps=1,
    max_steps=1,
    logging_dir=f'{model_path}/logs',  # Directory for logs
    evaluation_strategy='steps',  # Evaluate every logging step
    save_steps=500,  # Save model every 500 steps
    save_total_limit=2,  # Keep only the last 2 checkpoints
)

peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets["train"],
)

max_steps is given, it will override any value given in num_train_epochs


In [None]:
peft_trainer.train()

peft_model_path= '/content/drive/MyDrive/GenAI_Project'

peft_trainer.model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

print(f'Model saved to {model_path}')

# Reward Model

In [None]:
# Installing the Reinforcement Learning library directly from github.
!pip install git+https://github.com/huggingface/trl.git

Collecting git+https://github.com/huggingface/trl.git
  Cloning https://github.com/huggingface/trl.git to /tmp/pip-req-build-h4hhdj1o
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/trl.git /tmp/pip-req-build-h4hhdj1o
  Resolved https://github.com/huggingface/trl.git to commit 10c2f63b2ac8564cca28aa1598a1f3ac6a5fc63c
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tyro>=0.5.11 (from trl==0.12.0.dev0)
  Downloading tyro-0.8.11-py3-none-any.whl.metadata (8.4 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl==0.12.0.dev0)
  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)
Downloading tyro-0.8.11-py3-none-any.whl (105 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.9/105.9 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)
Building wheels

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM, GenerationConfig
from datasets import load_dataset
from peft import PeftModel, PeftConfig, LoraConfig, TaskType

# trl: Transformer Reinforcement Learning library
from trl import PPOTrainer, PPOConfig, AutoModelForSeq2SeqLMWithValueHead
from trl import create_reference_model
from trl.core import LengthSampler

import torch
import evaluate

import numpy as np
import pandas as pd

# tqdm library makes the loops show a smart progress meter.
from tqdm import tqdm
tqdm.pandas()

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 21497
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 2687
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 2688
    })
})

# Preping Proximal Policy Optimization (PPO)

I could not train PEFT yet, so I will use PRETRAINED straight up. But normally we could use PEFT model and Lora trainable then bum!

In [None]:
ppo_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(original_model,
                                                               torch_dtype=torch.bfloat16,
                                                               is_trainable=True)

print(f'PPO model parameters to be updated (ValueHead + 769 params):\n{print_number_of_trainable_model_parameters(ppo_model)}\n')
print(ppo_model.v_head)

PPO model parameters to be updated (ValueHead + 769 params):
trainable model parameters: 3539713
all model parameters: 251117569
percentage of trainable model parameters: 1.41%

ValueHead(
  (dropout): Dropout(p=0.1, inplace=False)
  (summary): Linear(in_features=768, out_features=1, bias=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
)


In [None]:
ref_model = create_reference_model(ppo_model)

print(f'Reference model parameters to be updated:\n{print_number_of_trainable_model_parameters(ref_model)}\n')

Reference model parameters to be updated:
trainable model parameters: 0
all model parameters: 251117569
percentage of trainable model parameters: 0.00%



# Reward model

In [None]:
toxicity_model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
toxicity_tokenizer = AutoTokenizer.from_pretrained(toxicity_model_name, device_map="auto")
toxicity_model = AutoModelForSequenceClassification.from_pretrained(toxicity_model_name, device_map="auto")
print(toxicity_model.config.id2label)

{0: 'nothate', 1: 'hate'}


In [None]:
non_toxic_text = "Hello how are you? Can I return my pants"
toxic_text = "I hate you, I hate Turks and they should not be allowed to live in this country."

toxicity_input_ids = toxicity_tokenizer(non_toxic_text, return_tensors="pt").input_ids
toxicity_input_ids_toxic = toxicity_tokenizer(toxic_text, return_tensors="pt").input_ids

logits = toxicity_model(input_ids=toxicity_input_ids).logits
logits_hate = toxicity_model(toxicity_input_ids_toxic).logits
print(f'logits [not hate, hate]: {logits.tolist()[0]}')
print(f'logits for hate [not hate, hate]: {logits_hate.tolist()[0]}')

# Print the probabilities for [not hate, hate]
probabilities = logits.softmax(dim=-1).tolist()[0]
probabilities_hate = logits_hate.softmax(dim=-1).tolist()[0]
print(f'probabilities [not hate, hate]: {probabilities}')
print(f'probabilities For hate[not hate, hate]: {probabilities_hate}')

# get the logits for "not hate" - this is the reward!
not_hate_index = 0
nothate_reward = (logits[:, not_hate_index]).tolist()
hate_reward = (logits_hate[:, not_hate_index]).tolist()

print(f'reward for nonhate (high): {nothate_reward}')
print(f'reward For Hate (low): {hate_reward}')

logits [not hate, hate]: [4.2944135665893555, -3.6936700344085693]
logits for hate [not hate, hate]: [-4.414653301239014, 3.913118362426758]
probabilities [not hate, hate]: [0.9996606111526489, 0.0003393687366042286]
probabilities For hate[not hate, hate]: [0.00024165151990018785, 0.9997583031654358]
reward for nonhate (high): [4.2944135665893555]
reward For Hate (low): [-4.414653301239014]


# Toxicity Reward sentiment pipeline Prep

We will use Not Hate class in PPO for this problem.

In [None]:
device = 0 if torch.cuda.is_available() else "cpu"

sentiment_pipe = pipeline("sentiment-analysis",
                          model=toxicity_model_name,
                          device=device)
reward_logits_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "none", # Set to "none" to retrieve raw logits.
    "batch_size": 16
}

reward_probabilities_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "softmax", # Set to "softmax" to apply softmax and retrieve probabilities.
    "batch_size": 16
}

print(sentiment_pipe(non_toxic_text, **reward_logits_kwargs))

[{'label': 'nothate', 'score': 4.2944135665893555}, {'label': 'hate', 'score': -3.6936700344085693}]


There is a Toxicity evaluation metric

In [None]:
toxicity_evaluator = evaluate.load("toxicity",
                                    toxicity_model_name,
                                    module_type="measurement",
                                    toxic_label="hate")

Downloading builder script:   0%|          | 0.00/6.08k [00:00<?, ?B/s]

In [None]:
toxicity_score = toxicity_evaluator.compute(predictions=[
    non_toxic_text
])

print("Toxicity score for non-toxic text:")
print(toxicity_score["toxicity"])

toxicity_score = toxicity_evaluator.compute(predictions=[
    toxic_text
])

print("\nToxicity score for toxic text:")
print(toxicity_score["toxicity"])

Toxicity score for non-toxic text:
[0.0003393687366042286]

Toxicity score for toxic text:
[0.9997583031654358]


In [None]:
def evaluate_toxicity(model,
                      toxicity_evaluator,
                      tokenizer,
                      dataset,
                      num_samples):

    max_new_tokens=100

    toxicities = []
    input_texts = []
    for i, sample in tqdm(enumerate(dataset)):
        input_text = sample["instruction"]

        if i > num_samples:
            break

        input_ids = tokenizer(input_text, return_tensors="pt", padding=True).input_ids

        generation_config = GenerationConfig(max_new_tokens=max_new_tokens,
                                             top_k=0.0,
                                             top_p=1.0,
                                             do_sample=True)

        response_token_ids = model.generate(input_ids=input_ids,
                                            generation_config=generation_config)

        generated_text = tokenizer.decode(response_token_ids[0], skip_special_tokens=True)

        toxicity_score = toxicity_evaluator.compute(predictions=[(input_text + " " + generated_text)])

        toxicities.extend(toxicity_score["toxicity"])

    # Compute mean & std using np.
    mean = np.mean(toxicities)
    std = np.std(toxicities)

    return mean, std

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")

mean_before_detoxification, std_before_detoxification = evaluate_toxicity(model=ref_model,
                                                                          toxicity_evaluator=toxicity_evaluator,
                                                                          tokenizer=tokenizer,
                                                                          dataset=dataset["test"],
                                                                          num_samples=10)

print(f'toxicity [mean, std] before detox: [{mean_before_detoxification}, {std_before_detoxification}]')

11it [01:02,  5.65s/it]

toxicity [mean, std] before detox: [0.0005412627202035351, 0.0003899247884911085]





# RAG Implementation

In [None]:
!pip install langchain==0.1.5
!pip install --quiet langchain_experimental
!pip install torch
!pip install transformers
!pip install faiss-cpu
!pip install pypdf
!pip install sentence-transformers
!pip install unstructured==0.12.3
!pip install unstructured[pdf]==0.12.3
!pip install tiktoken
!pip install huggingface_hub
from huggingface_hub import login

hf_token = "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"
login(token=hf_token, add_to_git_credential=True)

In [None]:
!git clone https://github.com/djmjs/RAG.git

Cloning into 'RAG'...
remote: Enumerating objects: 12, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 12 (delta 3), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (12/12), 16.81 KiB | 5.60 MiB/s, done.
Resolving deltas: 100% (3/3), done.


In [None]:
from langchain.document_loaders import DirectoryLoader
import pandas as pd
# Load the CSV file
loader = DirectoryLoader('RAG', glob='*.csv')
documents = loader.load()

# Check the loaded documents
for doc in documents:
    print(doc)

page_content='


Id
Full Name
Address
age
gender
Latitude
Longitude
Email Adress
Order Datetime
Order Status
Order Total
Items
Total sales
Order Count
Rating
Average Rating


10
Adam Martinez
China, Beijing Shi
30
Female
40
116
adam.martinez@internalmail
2021-09-14 00:00:00
CANCELLED
209
Boy's Coat (Blue)
80
1
8
6


20
Adam Miller
193, Bannerghatta Main Rd
68
Female
13
78
adam.miller@internalmail
2021-09-18 00:00:00
COMPLETE
54
Boy's Coat (Blue)
10816
90
4
6


30
Adam Walker
Behrenstraße 42
70
Female
53
13
adam.walker@internalmail
2021-09-22 00:00:00
COMPLETE
43
Boy's Coat (Blue)
319
3
8
6


40
Adan Lamica
Behrenstraße 42
72
Female
53
13
adan.lamica@internalmail
2021-09-26 00:00:00
COMPLETE
305
Boy's Coat (Brown)
137
3
5
6


50
Adeline Iannotti
Floreasca Park 43 Soseaua
16
Female
44
26
adeline.iannotti@internalmail
2021-09-30 00:00:00
COMPLETE
153
Boy's Coat (Brown)
3936
64
6
6


60
Albert Altmeer
Juana Manso 1069,
31
Female
-35
-58
albert.altmeer@internalmail
2021-10-04 00:00:00
COMPL

In [None]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.document_loaders.csv_loader import CSVLoader

loaders = {
        '.csv' : CSVLoader,
    }

def create_directory_loader(file_type, directory_path):
            return DirectoryLoader(
                path=directory_path,
                glob=f"**/*{file_type}",
                loader_cls=loaders[file_type],
                )
path = 'RAG'
csv_loader = create_directory_loader('.csv', path)

In [None]:
documents = loader.load()
documents

[Document(metadata={'source': 'RAG/customer_orders.csv'}, page_content="\n\n\nId\nFull Name\nAddress\nage\ngender\nLatitude\nLongitude\nEmail Adress\nOrder Datetime\nOrder Status\nOrder Total\nItems\nTotal sales\nOrder Count\nRating\nAverage Rating\n\n\n10\nAdam Martinez\nChina, Beijing Shi\n30\nFemale\n40\n116\nadam.martinez@internalmail\n2021-09-14 00:00:00\nCANCELLED\n209\nBoy's Coat (Blue)\n80\n1\n8\n6\n\n\n20\nAdam Miller\n193, Bannerghatta Main Rd\n68\nFemale\n13\n78\nadam.miller@internalmail\n2021-09-18 00:00:00\nCOMPLETE\n54\nBoy's Coat (Blue)\n10816\n90\n4\n6\n\n\n30\nAdam Walker\nBehrenstraße 42\n70\nFemale\n53\n13\nadam.walker@internalmail\n2021-09-22 00:00:00\nCOMPLETE\n43\nBoy's Coat (Blue)\n319\n3\n8\n6\n\n\n40\nAdan Lamica\nBehrenstraße 42\n72\nFemale\n53\n13\nadan.lamica@internalmail\n2021-09-26 00:00:00\nCOMPLETE\n305\nBoy's Coat (Brown)\n137\n3\n5\n6\n\n\n50\nAdeline Iannotti\nFloreasca Park 43 Soseaua\n16\nFemale\n44\n26\nadeline.iannotti@internalmail\n2021-09-30 00:

- Fine tuning, preprocess tests (id's ect)
- PEFT
- Something wrong with the reward model
- Quantization

TRAIN   "PEFT"