# evaluation

In [1]:
import os
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from elasticsearch import Elasticsearch
from elasticsearch.exceptions import AuthenticationException, ConnectionError
from transformers import T5ForConditionalGeneration, T5Tokenizer
import json
import time

  from tqdm.autonotebook import tqdm, trange


## Load the .env file

In [2]:
load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')
elasticsearch_cloud_id = os.getenv('ES_CLOUD_ID')
elasticsearch_api_key = os.getenv('ES_API_KEY')
replicate_api_key = os.getenv('REPLICATE_API_KEY')

## index functions

In [3]:
def create_es_index(client, index_name):
    # Create mapping
    index_settings = {
        "settings": {
            "number_of_shards": 1,
            "number_of_replicas": 0
        },
        "mappings": {
            "properties": {
                "text": {"type": "text"},
                "text_vector": {"type": "dense_vector", "dims": 768},
            }
        }
    }

    client.indices.delete(index=index_name, ignore_unavailable=True)
    client.indices.create(index=index_name, body=index_settings)
    
    return client.indices.get_alias(index=index_name)

In [4]:
def create_t5_embedding(encoder, chunks):
    documents = []

    for sentence in chunks:
        temp_dict = {
            'text': sentence['text'],
            'text_vector': encoder.encode(sentence["text"]).tolist()
        }
        documents.append(temp_dict)
    
    return {'documents': documents}

In [5]:
def populate_es_index(documents, index_name, client):
    # add documents 
    for doc in documents:
        try:
            client.index(index=index_name, body=doc)
        except Exception as e:
            print(e)

    return index_name

## RAG functions

In [6]:
# search
def search(query, **kwargs):

    # Encode the query
    query_vector = kwargs['encoder'].encode(query).tolist()

    # Construct the search query
    search_query = {
        "size": kwargs['num_results'],  # Limit the number of results
        "query": {
            "script_score": {
                "query": {
                    "match_all": {}
                },
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'text_vector') + 1.0",
                    "params": {
                        "query_vector": query_vector
                    }
                }
            }
        }
    }
    # Execute the search query
    results = kwargs['vector_db_client'].search(index=kwargs['index_name'], body=search_query)
    results = results['hits']['hits']
    
    return results

In [7]:
# prompt
def build_prompt(query, search_results):
    prompt_template = """
    You're a podcast chat bot. Answer the QUESTION based on the CONTEXT from the RESULTS database.
    Use only the facts from the CONTEXT when answering the QUESTION.

    QUESTION: {question}
    
    CONTEXT: 
    {context}
    """.strip()
    
    context = ""
    
    for search_result in search_results:
        doc = search_result['_source']['text'] if '_source' in search_result.keys() else search_result['text']
        context = context + f"{doc}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()

    return prompt

In [8]:
# generate
def llm(prompt, **kwargs):
    inputs = kwargs['llm_tokenizer'](prompt, return_tensors="pt")
    outputs = kwargs['llm_client'].generate(
        inputs["input_ids"], 
        max_length=100,
        num_beams=5,
        do_sample=True,
        temperature=1.0,
        top_k=50,
        top_p=0.95,    
        )
    response = kwargs['llm_tokenizer'].decode(outputs[0], skip_special_tokens=True)
        
    return response

In [9]:
# rag 
def rag(query, **kwargs):

    search_results = search(
        query, 
        encoder=kwargs['encoder'] if 'encoder' in kwargs.keys() else None, 
        index_name=kwargs['index_name'], 
        vector_db_client=kwargs['vector_db_client'] if 'vector_db_client' in kwargs.keys() else None,        
        num_results=5
        )

    prompt = build_prompt(query, search_results)

    answer = llm(
        prompt, 
        llm_client=kwargs['llm_client'], 
        llm_tokenizer=kwargs['llm_tokenizer'] if 'llm_tokenizer' in kwargs.keys() else None
        )
    
    for word in answer.split():
        yield word + " "
        time.sleep(0.05)

## Main

In [10]:
# podcast option
episode_option = "1. Try a sample"
episode_option_selected=True    

In [11]:
# sentence encoder
sentence_encoder = "1. T5"
encoder=SentenceTransformer("sentence-transformers/sentence-t5-base")
sentence_encoder_selected=True



In [12]:
# transcription method
# skip

In [13]:
# vector database
index_name = "podcast-transcriber"
vector_db="2. Elasticsearch"
if elasticsearch_api_key != '' and elasticsearch_cloud_id != '':
    try:
        vector_db_client = Elasticsearch(cloud_id=elasticsearch_cloud_id, api_key=elasticsearch_api_key)
        response = vector_db_client.cluster.health()
        index=create_es_index(vector_db_client, index_name)
        vector_db_selected=True
        index_created=True
        print(f"Index {[k for k,v in index.items()][0]} was created successfully.")
    except AuthenticationException:
        print("Invalid API key or Cloud ID. Please provide a valid tokens.")
    except ConnectionError:
        print("Connection error. Could not connect to the cluster.")
    except Exception as e:
        print(f"An error occurred: {e}")

Index podcast-transcriber was created successfully.


In [14]:
# llm
llm_option = "2. FLAN-5"
llm_client = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
llm_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
llm_option_selected=True

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [15]:
# download
with open('sample/episode_details.json', 'r') as f:
    episode_details = json.load(f)

In [16]:
# transcribe
# skip

In [17]:
# encode
episode_details.update(create_t5_embedding(encoder, episode_details['chunks']))

In [18]:
# populate index
populate_es_index(episode_details['documents'], index_name, vector_db_client)

'podcast-transcriber'

In [19]:
# interact
query = "Who were the Mensheviks?"
response = rag(
    query, 
    encoder=encoder,
    index_name=index_name,
    vector_db_client=vector_db_client,
    num_results=5,
    llm_client=llm_client,
    llm_tokenizer=llm_tokenizer
    )
print(" ".join(list(response)))

Marxists 
