# Introduction

In [None]:
pip install openai langchain langchain-community pinecone pinecone-client python-dotenv langchain-pinecone tiktoken sentence_transformers chromadb grpc-gateway-protoc-gen-openapiv2

Collecting openai
  Downloading openai-1.51.0-py3-none-any.whl.metadata (24 kB)
Collecting langchain
  Downloading langchain-0.3.2-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Collecting pinecone
  Downloading pinecone-5.3.1-py3-none-any.whl.metadata (19 kB)
Collecting pinecone-client
  Downloading pinecone_client-5.0.1-py3-none-any.whl.metadata (19 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting langchain-pinecone
  Downloading langchain_pinecone-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting sentence_transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Collecting chromadb
  Downloading chromadb-0.5.11-py3-none-any.whl.metadata (6.8 kB)
Collecting grpc-gateway-protoc-

In [None]:
import openai
import langchain
import os
from langchain.document_loaders import PyPDFDirectoryLoader, DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
import logging

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv('.env')

logging.basicConfig(level=logging.DEBUG,
                    format='[%(levelname)s] - %(message)s ',
                    handlers=[
                        logging.FileHandler('/content/langchaindemo.log', mode='w'),
                        logging.StreamHandler(),
                    ],
                    force=True)
logger = logging.getLogger(__name__)
logger.info("Langchain Demo Initialized")

[INFO] - Langchain Demo Initialized 


# Data load

In [None]:
import requests
from bs4 import BeautifulSoup
import re

# Función para limpiar el contenido
def clean_text(text):
    # Eliminar anotaciones como [1], [2], etc., y caracteres Unicode no deseados
    text = re.sub(r'\[\d+\]', '', text)
    text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', text)  # Remover caracteres Unicode como '\u200b'

    # Eliminar contenido específico irrelevante (ejemplo: "Firma[editar datos en Wikidata]")
    text = re.sub(r'Firma\[editar datos en Wikidata\]', '', text)
    text = re.sub(r'\[editar datos.*?\]', '', text)

    # Corregir palabras pegadas por presencia de símbolos como ',', 'y', '.'
    text = re.sub(r'([a-zA-Z])([,.;])([a-zA-Z])', r'\1\2 \3', text)
    text = re.sub(r'([a-zA-Z])([A-Z])', r'\1 \2', text)  # Insertar espacio cuando hay palabras pegadas

    # Reemplazar múltiples espacios o saltos de línea por un solo espacio
    text = re.sub(r'\s+', ' ', text)

    return text.strip()

def get_wiki_content(page):
  url = "https://es.wikipedia.org/wiki/" + page
  response = requests.get(url)
  if response.status_code != 200:
    print(f"Error fetching the page: {response.status_code}")
    return None

  soup = BeautifulSoup(response.text, 'html.parser')
  body_content = soup.find('div', class_='mw-parser-output')
  # paragraphs = body_content.find_all('p')
  # content = "\n".join([para.get_text(strip=True) for para in paragraphs])
  content = body_content.get_text(strip=True)

  content = clean_text(content)

  return content

# docs = get_wiki_content("Elon_Musk")


# Función para cargar los mejores 5000 tweets de Trump
def load_trump_tweets(file_path):
    # Leer el archivo CSV
    df = pd.read_csv(file_path)

    # Ordenar por número de favoritos y seleccionar los 5000 mejores
    df = df.sort_values(by='favorites', ascending=False).head(5000)

    # Lista para almacenar los documentos
    tweets = []

    # Recorrer cada fila del DataFrame
    for _, row in df.iterrows():
        # Crear el contenido del tweet
        content = row['content']

        # Agregar menciones y hashtags si están presentes
        if pd.notna(row['mentions']):
            content += f"\nMentions: {row['mentions']}"
        if pd.notna(row['hashtags']):
            content += f"\nHashtags: {row['hashtags']}"

        # Crear un documento con el contenido y metadatos
        tweet_doc = Document(
            page_content=content,
            metadata={
                'source': 'twitter',
                'person': 'Donald Trump'
            }
        )

        # Añadir el documento a la lista de tweets
        tweets.append(tweet_doc)

    return tweets




# Función para cargar los mejores 5000 tweets de Elon Musk
def load_elon_tweets(file_path):
    # Leer el archivo CSV
    df = pd.read_csv(file_path)

    # Ordenar por número de favoritos y seleccionar los 5000 mejores
    df = df.sort_values(by='favorites', ascending=False).head(5000)

    # Lista para almacenar los documentos
    tweets = []

    # Recorrer cada fila del DataFrame
    for _, row in df.iterrows():
        # Crear el contenido del tweet
        content = row['text']

        # Agregar hashtags si están presentes
        if pd.notna(row['hashtags']):
            content += f"\nHashtags: {row['hashtags']}"

        # Crear un documento con el contenido y metadatos
        tweet_doc = Document(
            page_content=content,
            metadata={
                'source': 'twitter',
                'person': 'Elon Musk'
            }
        )

        # Añadir el documento a la lista de tweets
        tweets.append(tweet_doc)

    return tweets


In [None]:
import hashlib

def calcular_hash_sha256(cadena):
    # Convertir la cadena a bytes
    cadena_bytes = cadena.encode('utf-8')

    # Crear un objeto hash SHA-256
    hash_object = hashlib.sha256(cadena_bytes)

    # Obtener el hash como cadena hexadecimal
    hash_hex = hash_object.hexdigest()

    return hash_hex

# Pre-processing

In [None]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain_community.chat_models import ChatOpenAI
from langchain_community.llms import OpenAI, HuggingFaceHub
from langchain_community.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
import pandas as pd
from langchain_core.documents import Document

def split_text(text):
    """
    Get chunks from docs. Our loaded doc may be too long for most models, and even if it fits is can struggle to find relevant context. So we generate chunks
    :param docs: docs to be split
    :return: chunks
    """

    text_splitter = RecursiveCharacterTextSplitter( # recommended splitter for generic text
        chunk_size=2000,
        chunk_overlap=200,
        add_start_index=True
    )
    chunks = text_splitter.split_text(text)

    return chunks

# chunks = split_text(docs)
# print(chunks)

def get_docs(persons):
  docs = []

  for person in persons:
    wiki_content = get_wiki_content(person['wiki_page'])
    print(wiki_content)
    if wiki_content:
      wiki_chunks = split_text(wiki_content)
      #documento = Document(page_content=cadena_de_texto, metadata={"source": "texto de ejemplo"})
      docs.extend([Document(page_content=chunk, metadata={'source': 'wikipedia', 'person': person['name']}) for chunk in wiki_chunks])

      #docs.extend([{'content': chunk, 'metadata': {'source': 'wikipedia', 'person': person['name'], 'id': calcular_hash_sha256(chunk)}} for chunk in wiki_chunks])

  return docs


In [None]:
## ELON - TRUMP
# Obtener documentos de Wikipedia
persons = [
    {'name': 'Elon Musk', 'wiki_page': 'Elon_Musk', 'twitter_handle': 'elonmusk'},
    {'name': 'Donald Trump', 'wiki_page': 'Donald_Trump'}
]
docs = get_docs(persons)
print(docs)

# Cargar los documentos de los tweets
# elon_tweets_docs = load_elon_tweets('elon_musk_tweets.csv')
# trump_tweets_docs = load_trump_tweets('realdonaldtrump.csv')


# Agregar los documentos de los tweets a los docs existentes
# docs.extend(elon_tweets_docs)
# docs.extend(trump_tweets_docs)

# print(docs[1])

Elon Musk Elon Musk en 2023Director ejecutivode X Corp. Interino​12 de abril de 2023​-12 de junio de 2023Sucesor Linda Yaccarino Director ejecutivode Twitter, Inc. Interino28 de octubre de 2022-12 de abril de 2023Predecesor Parag Agrawal SucesorÉl mismo(como Director ejecutivo de X Corp.)Director general de Space XActualmente en el cargo Desde el 17 de diciembre de 2016Director general de Tesla, Inc. Actualmente en el cargo Desde el julio de 2016Presidente Jared Birchall Director general de The Boring Company Actualmente en el cargo Desde el 4 de julio de 2006Copresidente de Neuralink Actualmente en el cargo Desde el 1 de julio de 2003Presidente de Solar City Corporation Actualmente en el cargo Desde el 6 de mayo de 2002Información personal Nombre de nacimiento Elon Reeve Musk Nacimiento28 de junio de 1971(53 años)Pretoria(Sudáfrica)Residencia Bel-Air, Saskatchewan, Kingstony Boca Chica Nacionalidad Sudafricana(desde 1971)canadiense(desde 1989) yestadounidense(desde 2002)Religión Agnos

## Malena Pichot & Feinmann


Carga de la data de Wikipedia

In [None]:
## MALE PICHOT - FEINMANN

persons = [
    {'name': 'Malena Pichot', 'wiki_page': 'Malena_Pichot'},
    {'name': 'Eduardo Feinmann', 'wiki_page': 'Eduardo_Feinmann'}
]
docs2 = get_docs(persons)

# print(docs2)

[DEBUG] - Starting new HTTPS connection (1): es.wikipedia.org:443 
[DEBUG] - https://es.wikipedia.org:443 "GET /wiki/Malena_Pichot HTTP/11" 200 28556 
[DEBUG] - Starting new HTTPS connection (1): es.wikipedia.org:443 
[DEBUG] - https://es.wikipedia.org:443 "GET /wiki/Eduardo_Feinmann HTTP/11" 200 33038 


Malena Pichot Malena Pichot en 2012Información personal Nombre de nacimiento Malena Pichot​Nacimiento6 de julio de 1982 (42 años)Buenos Aires(Argentina)​​Nacionalidad Argentina Religión Atea Familia Pareja Leandro Lopatín Hijos1Información profesional Ocupación Actriz, comediante, escritora, guionista, directorayactivistafeminista Años activa2009-presente Malena Pichot(Buenos Aires, 6 de julio de 1982)​ es unaactriz, comediante, escritora, guionista, directorayactivistafeministaargentina.​En 2008 alcanzó la fama con sus videos de «La loca de mierda», publicados en You Tube.​​ Después de tener participaciones en unitarios, protagonizó y guionizó la serie Cualca(2012-2014), su spin-off Por ahora(2014), las miniseries Jorge(2013),Mundillo(2015),Tarde Baby(2018), el show Estupidez compleja(2018), el cortometraje Leonor(2020) y la película Finde(2021).​​Además, Pichot realiza presentaciones de stand up y conduce, desde 2016, el programa radial «Furia Bebé».​Biografía[editar]1982-2009: Prime

Cargamos el transcript (en formato .txt) y lo splitteamos

In [None]:
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_and_split_transcript(file_path):
    # Leer el contenido del archivo
    with open(file_path, 'r', encoding='utf-8') as file:
        transcript_text = file.read()

    # Crear un documento con el contenido y agregar los metadatos
    document = Document(
        page_content=transcript_text,
        metadata={
            'source': 'transcript',
            'author': 'Male Feinmann',
            'file_name': file_path
        }
    )

    # Split del texto usando RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=2000,  # Tamaño del chunk en caracteres
        chunk_overlap=200  # Superposición entre los chunks
    )

    # Dividir el contenido en partes más pequeñas
    split_documents = text_splitter.split_documents([document])

    return split_documents

# Ruta del archivo
file_path = 'male_feinmann.txt'

# Cargar y dividir el documento
split_documents = load_and_split_transcript(file_path)

# Mostrar el primer documento para verificar
print(split_documents[0])
print(len(split_documents))

page_content='Malena Pichot, buenas tardes, ¿qué tal, cómo va?  Hola Edu, ¿cómo estás?  Bien, bien, ¿y vos?  Muy bien.  ¿Por qué decís que soy un facho no inofensivo?  No, un facho inofensivo creo que sos.  Cambiaste, porque al Babi le dijiste que no, que era un animal.  Ah, sí, eso también.  Pero bueno, sos un poco animal, le dijiste pelotudos a unos chicos de 17 años.  Definime animal.  Eh, animal, ser vivo o carente de lenguaje.  Pero...  ¿Vos también sos animal?  No, yo no.  Vos no, muy bien.  Contame una cosa, ¿y entonces soy un facho porque qué le dije a un chico de 17?  Claro, le dijiste a unos chicos de 17 pelotudos de mierda cuando estaban haciendo una manifestación por algo.  Nunca.  ¿No? ¿No te acordás?  Eso es falso.  ¿Cómo que no?  Eso es falso.  ¿No le dijiste, no los puteaste?  Eso es falso. Esa frase es falsa.  ¿No le dijiste, qué le dijiste?  Yo dije...  ¿Son unos algo?  ¿Son unos?  ¿Son unos?  Pelotudos dijiste.  No, eso es falso.  ¿Son unos?  ¿Qué?  ¿Son unos?  ¿Te d

In [None]:
docs2.extend(split_documents)
print(len(docs2))

31


In [None]:
!pip -qqq install git+https://github.com/openai/whisper.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


## Whisper + Youtube

No funcional en Colab. Lo hicimos localmente

In [None]:
from pytube import YouTube
import whisper
import torch
import os

device = "cuda" if torch.cuda.is_available() else "cpu"
whisper_model = whisper.load_model("large", device=device)

def video_to_audio(video_URL, destination, final_filename):

  # Get the video
  video = YouTube(video_URL)

  # Convert video to Audio
  audio = video.streams.filter(only_audio=True).first()

  # Save to destination
  output = audio.download(output_path = destination)

  _, ext = os.path.splitext(output)
  new_file = final_filename + '.mp3'

  # Change the name of the file
  os.rename(output, new_file)

def convert(url):
  # Video to audio
  video_URL = url
  destination = "."
  final_filename = "audio_file_to_convert"
  video_to_audio(video_URL, destination, final_filename)

def transcribe():
  audio_file = "audio_file_to_convert.mp3"
  result = whisper_model.transcribe(audio_file)
  result_segments = result['segments']
  print(result_segments)
  return format_segments(result_segments)

def format_segments(result_segments):
    formatted_output = []

    for segment in result_segments:
        start_time = segment['start']
        end_time = segment['end']
        text = segment['text']

        # formatted_text = f"[{format_time_milliseconds(start_time)} --> {format_time_milliseconds(end_time)}] {text}"
        formatted_output.append(text)

    return " ".join(formatted_output)

# def format_time_milliseconds(seconds):
#     minutes, seconds = divmod(seconds, 60)
#     hours, minutes = divmod(minutes, 60)
#     milliseconds = int((seconds - int(seconds)) * 1000)
#     return f"{int(hours):01}:{int(minutes):01}:{int(seconds):02}.{milliseconds:03}"

# Save the formatted result to a text file
def dump_into_txt(formatted_result):
  output_file_path = 'transcribed_text.txt'
  with open(output_file_path, 'w') as output_file:
    output_file.write(formatted_result)
  print(f"Formatted result saved to {output_file_path}")

# url = "https://www.youtube.com/watch?v=pCDHwlT7mPU"
# convert(url)
# formatted = transcribe()
# dump_into_txt(formatted)

# DB vectorial

## Embeddings (HF)

In [None]:
from langchain_community.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings( #  embedding=OpenAIEmbeddings() rate limit
        model_name='sentence-transformers/all-MiniLM-L6-v2',
        model_kwargs={'device': 'cpu'} #TODO CHANGE IF NOT USING GPU
)

# vector = embeddings.embed_query("Hola como estas?")
# embedding_size = len(vector)  # HF 384 ; OPENAI 1536

  embeddings = HuggingFaceEmbeddings( #  embedding=OpenAIEmbeddings() rate limit
[DEBUG] - pydot initializing 
[DEBUG] - pydot 3.0.2 
[DEBUG] - pydot dot_parser module initializing 
[DEBUG] - pydot core module initializing 
[DEBUG] - Creating converter from 7 to 5 
[DEBUG] - Creating converter from 5 to 7 
[DEBUG] - Creating converter from 7 to 5 
[DEBUG] - Creating converter from 5 to 7 
[DEBUG] - Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client. 
[DEBUG] - etils.epath found. Using etils.epath for file I/O. 
[INFO] - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2 
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note t

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132886020128480 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock 
[DEBUG] - Lock 132886020128480 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/config_sentence_transformers.json HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132886020124016 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock 
[DEBUG] - Lock 132886020124016 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock 
[DEBUG] - https://huggingface.co:443 "GET /sentence-transformers/all-MiniLM-L6-v2/resolve/main/config_sentence_transformers.json HTT

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132886020124016 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock 
[DEBUG] - Lock 132886020124016 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/README.md HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132886020124352 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock 
[DEBUG] - Lock 132886020124352 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock 
[DEBUG] - https://huggingface.co:443 "GET /sentence-transformers/all-MiniLM-L6-v2/resolve/main/README.md HTTP/11" 200 10659 


README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132886020124352 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock 
[DEBUG] - Lock 132886020124352 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/modules.json HTTP/11" 200 0 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/sentence_bert_config.json HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132886020124352 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock 
[DEBUG] - Lock 132886020124352 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock 
[DEBUG] - https

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132886020124352 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock 
[DEBUG] - Lock 132886020124352 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/config.json HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132886020124544 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock 
[DEBUG] - Lock 132886020124544 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock 
[DEBUG] - https://huggingface.co:443 "GET /sentence-transformers/all-MiniLM-L6-v2/resolve/main/config.json HTTP/11" 200 612 


config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132886020124544 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock 
[DEBUG] - Lock 132886020124544 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/model.safetensors HTTP/11" 302 0 
[DEBUG] - Attempting to acquire lock 132886020124112 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock 
[DEBUG] - Lock 132886020124112 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock 
[DEBUG] - Starting new HTTPS connection (1): cdn-lfs.hf.co:443 
[DEBUG] - https://cdn-lfs.hf.co:443 

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132886020124112 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock 
[DEBUG] - Lock 132886020124112 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer_config.json HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132886008377024 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock 
[DEBUG] - Lock 132886008377024 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock 
[DEBUG] - https://huggingface.co:443 "GET /sentence-transformers/all-MiniLM-L6-v2/resolve/main/t

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132886008377024 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock 
[DEBUG] - Lock 132886008377024 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/vocab.txt HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132885996528176 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock 
[DEBUG] - Lock 132885996528176 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock 
[DEBUG] - https://huggingface.co:443 "GET /sentence-transformers/all-MiniLM-L6-v2/resolve/main/vocab.txt HTTP/11" 200 231508 


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132885996528176 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock 
[DEBUG] - Lock 132885996528176 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132885996527552 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock 
[DEBUG] - Lock 132885996527552 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock 
[DEBUG] - https://huggingface.co:443 "GET /sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json HTTP/11" 200 466247 


tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132885996527552 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock 
[DEBUG] - Lock 132885996527552 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/added_tokens.json HTTP/11" 404 0 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/special_tokens_map.json HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132886007990976 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock 
[DEBUG] - Lock 132886007990976 acquired on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock 
[DEBUG] - ht

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132886007990976 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock 
[DEBUG] - Lock 132886007990976 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock 
[DEBUG] - https://huggingface.co:443 "GET /api/models/sentence-transformers/all-MiniLM-L6-v2/revision/main HTTP/11" 200 6149 
[DEBUG] - Starting new HTTPS connection (1): huggingface.co:443 
[DEBUG] - https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/8b3219a92973c328a8e22fadcfa821b5dc75636a/1_Pooling/config.json HTTP/11" 200 0 
[DEBUG] - Attempting to acquire lock 132885996528080 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock 
[DEBUG] - Lock 132885996528080 acquired on /root/.cache/huggingface/hub/.locks/models--sent

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[DEBUG] - Attempting to release lock 132885996528080 on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock 
[DEBUG] - Lock 132885996528080 released on /root/.cache/huggingface/hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock 
[DEBUG] - https://huggingface.co:443 "GET /api/models/sentence-transformers/all-MiniLM-L6-v2 HTTP/11" 200 6149 


In [None]:
 pip install grpc-gateway-protoc-gen-openapiv2



## Pinecone. VectorStore

In [None]:
import pinecone
from pinecone import ServerlessSpec
from pinecone.grpc import PineconeGRPC as Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_pinecone import Pinecone
from google.colab import userdata


def get_vector_store(index_name, embeddings, embedding_size=384):
  """ Creates vector store from Pinecone for storing and managing embeddings.

    :param str index_name: The name of the index to create or retrieve from Pinecone.
    :param str embeddings: The embedding function to be used to generate embeddings
    :param int embedding_size: The size (dimension) of the embeddings. Defaults to 384 (e.g., for sentence-transformers/all-MiniLM-L6-v2).

    :return: PineconeVectorStore: An object representing the vector store in Pinecone for managing embeddings.

    :raise: ValueError: If the index creation fails due to invalid parameters or connection issues.
  """

  pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])  # Pinecone is initialized using an API key stored in the environment variable


  if index_name not in pc.list_indexes().names():        # Check whether an index with the given index_name already exists
      pc.create_index(
          name=index_name,          # Name of the index
          dimension=embedding_size, # Size of the vectors (embeddings)
          metric="cosine",          # Distance metric used to compare vectors
          spec=ServerlessSpec(      # Determines the infrastructure used
              cloud='aws',          # Specifies that the Pinecone index is hosted on AWS
              region='us-east-1'    # Specifies the region of the cloud provider
          )
      )


  return vectorstore

In [None]:
# Para los vectores de Donald & Elon
INDEX_NAME = "rag-bio-3"

# Para los vectores de Pichot & Feinmann
# INDEX_NAME = "rag-bio-2"

vectorstore = PineconeVectorStore(pinecone_api_key=os.environ['PINECONE_API_KEY'], index_name=INDEX_NAME, embedding=embeddings) # initializes a PineconeVectorStore object using the index_name and the provided embeddings model or function

[INFO] - Discovering subpackages in _NamespacePath(['/usr/local/lib/python3.10/dist-packages/pinecone_plugins']) 
[INFO] - Looking for plugins in pinecone_plugins.inference 
[INFO] - Installing plugin inference into Pinecone 


In [None]:
vectorstore.add_documents(docs)

[DEBUG] - response body: b'{"upsertedCount":32}' 
[DEBUG] - response body: b'{"upsertedCount":32}' 
[DEBUG] - response body: b'{"upsertedCount":32}' 
[DEBUG] - response body: b'{"upsertedCount":32}' 
[DEBUG] - response body: b'{"upsertedCount":32}' 
[DEBUG] - response body: b'{"upsertedCount":32}' 
[DEBUG] - response body: b'{"upsertedCount":32}' 
[DEBUG] - response body: b'{"upsertedCount":22}' 


['e1b83cbc-a167-43f6-82de-29f25ccc6062',
 'de414884-7486-4966-bd1b-76bcce16922b',
 '47b2294b-79f3-4bcf-a29d-7ac899b401ac',
 'dad85015-8f76-43ae-a9f7-2885aab434aa',
 '4fb9c611-3235-4cd3-9ef6-ded95871f510',
 'cd0585df-576b-4dc1-b76b-e0cf62e0bf92',
 '9268f37c-a653-4c54-a6cd-11708f0a34c0',
 '7d533cf0-6c04-4cfb-be2b-ea0418cb645e',
 'dd22931a-a64f-422e-8335-0e784eadbc28',
 '915d9219-211f-4eb8-b2d4-c80840b8e818',
 '6e97ebfd-a5d9-4f1b-b57e-f01723c6b8e7',
 '204fbe04-3139-4f2b-83ab-fc8b2378f3c3',
 '51304bac-0694-4fe8-99d8-c5872b8c5678',
 '3e2c4f24-0166-4aa6-b7c4-b23099a23e28',
 'd74eeb5f-9516-4832-9eed-a10cf6c41388',
 'dda0054b-b2c2-4855-aa21-889c7e23a34a',
 'cf1a34fa-a36e-40ad-b999-a56f2d4a12cc',
 'acfceb63-06bf-46fa-b4b3-aa64d8dbb557',
 '7d9f5de5-70e4-4aad-b036-ee9324c64df6',
 '7f7b2ed0-e40e-4205-88c7-9d3fa55f2071',
 'cfa1291e-e99a-4e3e-b2aa-b6371cddcb47',
 'f318419f-f41f-455c-978f-01efb5b70de4',
 '79672938-34d6-4bf8-94f4-507ac0a9169d',
 '83615504-3071-4ef4-9804-e7ed9ff3d9f0',
 'e7768f49-a230-

In [None]:
# male - feinmann
vectorstore.add_documents(docs2)

[DEBUG] - response body: b'{"upsertedCount":31}' 


['89c0d87f-b1c6-45d6-aae1-694ecd4266c1',
 '75d8befa-620e-4f61-991f-48fd31c51a72',
 'a9855bf7-ea80-44c3-9e84-537dfb1b54bc',
 '96d36ef7-925f-4695-a233-5b1350c5d177',
 '933f37f4-43ad-443e-9849-b1d23ab1e8b3',
 '020419cf-8a5f-475d-b08a-59be6f8d8fb6',
 '0189637b-7616-4c00-aba1-cbec6b2d8076',
 '6f4e879c-a9ca-489b-8c93-a73f5290532e',
 'eceb5818-bcd9-4cee-8837-23714681a6b5',
 '7998184a-1f8d-472c-b9f0-653525235507',
 'b16b0a8e-2d1a-48a2-ad89-109bb2f54585',
 '40bcfce8-c323-4310-92be-545e5f5ab431',
 'cf519806-9479-4311-8c78-ab3fa5fc936c',
 '2174aa61-fca7-4f6d-beba-d827e1829aa6',
 '5d9e18fb-4b70-4867-aa55-77bbf605c1c1',
 '5d445f60-c526-4908-ad3e-152f81d5f5bc',
 '70594b49-1deb-4e20-a47a-83db15a72754',
 '5822cc30-a780-4bb4-9074-84106bb04e2b',
 'd4f0b23c-2194-4353-997f-7f422e9f9d66',
 '5efbac15-4c66-4a00-b99e-9e3447dfe481',
 'b6d83370-5153-4f2c-a355-830208d7108c',
 'ad0ed23c-afdd-45c7-9070-62dceb2fbb6d',
 '1507388a-43a0-4e3a-919e-e951f189d376',
 'a5a613ce-e293-4252-89d7-8f85831ebf58',
 '6a8a1087-bbe4-

In [None]:
results = vectorstore.search(query="libertad", search_type="similarity", k=5)
results

[DEBUG] - response body: b'{"results":[],"matches":[{"id":"7f48348b-17e5-4d65-ab11-a5cc1f88dc04","score":0.443119317,"values":[],"metadata":{"person":"Elon Musk","source":"twitter","text":"@JayCartere @ScottAdamsSays I don\xe2\x80\x99t agree with everything Scott says, but Dilbert is legit funny &amp; insightful.\\n\\nWe\xe2\x80\xa6 https://t.co/GBIQPo61up"}},{"id":"3c2a6483-1a35-4b6c-98a8-941238c05eba","score":0.443119198,"values":[],"metadata":{"person":"elon_musk_tweets.csv","source":"twitter","text":"@JayCartere @ScottAdamsSays I don\xe2\x80\x99t agree with everything Scott says, but Dilbert is legit funny &amp; insightful.\\n\\nWe\xe2\x80\xa6 https://t.co/GBIQPo61up"}},{"id":"32c1dd47-b895-464e-a9cf-d7fe2c928834","score":0.425534636,"values":[],"metadata":{"person":"Donald Trump","source":"twitter","text":"LIBERATE MINNESOTA!"}},{"id":"65a53721-1980-4e16-913b-92ae3b11d517","score":0.409245193,"values":[],"metadata":{"person":"Elon Musk","source":"twitter","text":"@libsoftiktok Not

[Document(id='7f48348b-17e5-4d65-ab11-a5cc1f88dc04', metadata={'person': 'Elon Musk', 'source': 'twitter'}, page_content='@JayCartere @ScottAdamsSays I don’t agree with everything Scott says, but Dilbert is legit funny &amp; insightful.\n\nWe… https://t.co/GBIQPo61up'),
 Document(id='3c2a6483-1a35-4b6c-98a8-941238c05eba', metadata={'person': 'elon_musk_tweets.csv', 'source': 'twitter'}, page_content='@JayCartere @ScottAdamsSays I don’t agree with everything Scott says, but Dilbert is legit funny &amp; insightful.\n\nWe… https://t.co/GBIQPo61up'),
 Document(id='32c1dd47-b895-464e-a9cf-d7fe2c928834', metadata={'person': 'Donald Trump', 'source': 'twitter'}, page_content='LIBERATE MINNESOTA!'),
 Document(id='65a53721-1980-4e16-913b-92ae3b11d517', metadata={'person': 'Elon Musk', 'source': 'twitter'}, page_content='@libsoftiktok Not good'),
 Document(id='a28342d1-87cb-45d0-94cb-53ade70f3eaa', metadata={'person': 'elon_musk_tweets.csv', 'source': 'twitter'}, page_content='@libsoftiktok Not 

In [None]:
query = "Donald trump and Elon Musk"
query_male = "escuela"
results = vectorstore.search(query=query_male, search_type="similarity", k=5)  # Retorna los 5 documentos más similares

for result in results:
    print(f"Documento: {result.page_content}")
    print(f"Metadatos: {result.metadata}")
    # print(f"Score: {result.score}")
    print("-" * 40)

[DEBUG] - response body: b'{"results":[],"matches":[{"id":"8fe14f24-982a-4de3-ada5-c4bb5a9525ae","score":0.381457716,"values":[],"metadata":{"author":"Male Feinmann","file_name":"male_feinmann.txt","source":"transcript","text":"es de facho.  No. Ten\xc3\xa9s raz\xc3\xb3n. No. \xc2\xbfSab\xc3\xa9s qu\xc3\xa9 me acabo de dar cuenta? Que sos una persona amplia, que respetas a los otros, que escuch\xc3\xa1s...  \xc2\xbfSab\xc3\xa9s qu\xc3\xa9 te diste cuenta?  \xc2\xbfY qu\xc3\xa9?  \xc2\xbfSab\xc3\xa9s qu\xc3\xa9 te diste cuenta seguramente? Que no me conoc\xc3\xadas. Y que jam\xc3\xa1s hablaste conmigo. Si hubieses hablado conmigo antes, te hubieses dado cuenta que todo lo que dijiste fueron pavadas.  Bueno, tampoco tanta pavada, Edu. No seas as\xc3\xad. Porque te doy la mano y ya te agarr\xc3\xa1s el codo y me empez\xc3\xa1s a bardear de nuevo. \xc2\xbfTe das cuenta? Escuchame.  Claro, porque a la primera de cambio ya me di un azote. Me tengo que ir a la radio.  Bueno, bye. Besos.  Nos 

Documento: es de facho.  No. Tenés razón. No. ¿Sabés qué me acabo de dar cuenta? Que sos una persona amplia, que respetas a los otros, que escuchás...  ¿Sabés qué te diste cuenta?  ¿Y qué?  ¿Sabés qué te diste cuenta seguramente? Que no me conocías. Y que jamás hablaste conmigo. Si hubieses hablado conmigo antes, te hubieses dado cuenta que todo lo que dijiste fueron pavadas.  Bueno, tampoco tanta pavada, Edu. No seas así. Porque te doy la mano y ya te agarrás el codo y me empezás a bardear de nuevo. ¿Te das cuenta? Escuchame.  Claro, porque a la primera de cambio ya me di un azote. Me tengo que ir a la radio.  Bueno, bye. Besos.  Nos vemos por los pasillos del canal, Edu.  Besos.  Besos.  ¡Chau!  Gracias. Chau. Vamos a los títulos.
Metadatos: {'author': 'Male Feinmann', 'file_name': 'male_feinmann.txt', 'source': 'transcript'}
----------------------------------------
Documento: se llamóEllosy más tarde Campa-Pichot.​ Durante el 2011, tuvo su propio programa de radio llamado Frankenste

# RAG

In [None]:
query = "Adorni"
vectorstore.search(
    query=query,              # Return docs most similar to query using specified search type.
    search_type="similarity_score_threshold", # can be “similarity”, “mmr”, or “similarity_score_threshold”.
    k=5                       # return top k,
)

[DEBUG] - response body: b'{"results":[],"matches":[{"id":"5822cc30-a780-4bb4-9074-84106bb04e2b","score":0.224015698,"values":[],"metadata":{"person":"Eduardo Feinmann","source":"wikipedia","text":"lanacion. com. ar. 18 de febrero de 2021. Consultado el 19 de febrero de 2021.\xe2\x86\x91abcde Pepe, Gabriela (4 de octubre de 2013).\xc2\xabLa vida oculta del facho cool\xc2\xbb.Revista Noticias. Editorial Perfil S. A. Archivado desdeel originalel 18 de febrero de 2014. Consultado el 7 de abril de 2014. \xc2\xabEs el anti-\xc3\xaddolo de los j\xc3\xb3venes en las redes sociales. Es un reconocido hincha de Boca. Se puso de moda sobreactuando temas duros que horrorizan a la demagogia progre. Lujos, placeres y trauma.\xc2\xbb\xe2\x86\x91ab Iglesias, Fernanda(18 de diciembre de 2013).\xc2\xabEduardo Feinmann: \\"Conmigo no hay t\xc3\xa9rmino medio o me aman o me odian\\"\xc2\xbb.Personajes. tv, (lanacion. com). Consultado el 7 de abril de 2014.\xe2\x86\x91\xc2\xabTras protagonizar una escandal

[Document(id='5822cc30-a780-4bb4-9074-84106bb04e2b', metadata={'person': 'Eduardo Feinmann', 'source': 'wikipedia'}, page_content='lanacion. com. ar. 18 de febrero de 2021. Consultado el 19 de febrero de 2021.↑abcde Pepe, Gabriela (4 de octubre de 2013).«La vida oculta del facho cool».Revista Noticias. Editorial Perfil S. A. Archivado desdeel originalel 18 de febrero de 2014. Consultado el 7 de abril de 2014. «Es el anti-ídolo de los jóvenes en las redes sociales. Es un reconocido hincha de Boca. Se puso de moda sobreactuando temas duros que horrorizan a la demagogia progre. Lujos, placeres y trauma.»↑ab Iglesias, Fernanda(18 de diciembre de 2013).«Eduardo Feinmann: "Conmigo no hay término medio o me aman o me odian"».Personajes. tv, (lanacion. com). Consultado el 7 de abril de 2014.↑«Tras protagonizar una escandalosa pelea radial, Eduardo Feinmann y Leonardo Greco fueron suspendidos». Consultado el 19 de abril de 2018.↑«Un escándalo en el feudo Hadad».www. pagina12.com. ar. Consultado

In [None]:
def generate_response(db, prompt):
    """
    Generate a response with a LLM based on previous custom context
    :return: chatbot response
    """

    hf_llm = HuggingFaceHub(
        repo_id="HuggingFaceH4/zephyr-7b-beta",  # Model id
        task="text-generation",                  # Specific task the model is intended to perform
        model_kwargs={
            "max_new_tokens": 512,               # The maximum number of tokens to generate in the response.  Limits the length of the generated text to ensure responses are concise or fit within certain constraints.
            "top_k": 6,                          # Limits the sampling pool to the top k tokens, increasing focus on more likely tokens
            "temperature": 0.55,                  # Controls the randomness of predictions, with lower values making the output more deterministic. : Produces more focused and less random text by making the model more confident in its choices.
            "repetition_penalty": 1.2,           # Penalizes repeated tokens to avoid repetitive output.  Discourages the model from repeating the same token sequences, resulting in more varied and natural text.
        },
    )

    chain = RetrievalQA.from_chain_type( # Generate chat model based on previous llm
        llm=hf_llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 6}),
        verbose=False
    )

    response = chain.run(prompt)

    return response

## Inputs para Donald & Elon

In [None]:
def decorate_user_input(input):
  decoration = "Respond in first person as if you were Donald Trump in a short answer."
  # decoration = "Asnwer the tweet in tweet format as if you were Donald Trump."
  return decoration + input


In [None]:
# user_input = "What is priority for your next Tesla?"
user_input = "@DonaldTrump Its clear that if you dont win as president we wont get to Mars"
response = generate_response(vectorstore, decorate_user_input(user_input))
response

[DEBUG] - response body: b'{"results":[],"matches":[{"id":"58384a38-c2bb-43d5-aace-7bcd6b88b7e5","score":0.378278375,"values":[],"metadata":{"person":"Elon Musk","source":"wikipedia","text":"Consultado el 20 de marzo de 2020.\xe2\x86\x91Timothy P. Carney,\\"Carney: Green stimulus profiteer comes under I RS scrutiny\\", Washington Examiner. com, 14 de octubre de 2012.\xe2\x86\x91Sunlight Foundation (ed.).\xc2\xabSpace X blasts off literally and politically\xc2\xbb. Consultado el 20 de marzo de 2020.\xe2\x86\x91Salant, Jonathan D. (27 de septiembre de 2013).\xc2\xabBillionaires Battle as Bezos-Musk Companies Vie for Launch Pad\xc2\xbb. En Bloomberg Business, ed. Bloomberg. com. Consultado el 20 de marzo de 2020.\xe2\x86\x91\xc2\xabGoing to Mars with Elon Musk\xc2\xbb.On Innovation. com. 1 de junio de 2008. Archivado desdeel originalel 8 de enero de 2016. Consultado el 20 de marzo de 2020.\xe2\x86\x91Going To Mars \xc2\xabElon Musk\xc2\xbb. Consultado el 20 de marzo de 2020.\xe2\x86\x91An

'Use the following pieces of context to answer the question at the end. If you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\nConsultado el 20 de marzo de 2020.↑Timothy P. Carney,"Carney: Green stimulus profiteer comes under I RS scrutiny", Washington Examiner. com, 14 de octubre de 2012.↑Sunlight Foundation (ed.).«Space X blasts off literally and politically». Consultado el 20 de marzo de 2020.↑Salant, Jonathan D. (27 de septiembre de 2013).«Billionaires Battle as Bezos-Musk Companies Vie for Launch Pad». En Bloomberg Business, ed. Bloomberg. com. Consultado el 20 de marzo de 2020.↑«Going to Mars with Elon Musk».On Innovation. com. 1 de junio de 2008. Archivado desdeel originalel 8 de enero de 2016. Consultado el 20 de marzo de 2020.↑Going To Mars «Elon Musk». Consultado el 20 de marzo de 2020.↑Anderson, Ross (30 de septiembre de 2014).«The Elon Musk Interview on Mars Colonization».Aeon. Archivado desdeel originalel 10 de noviembre de 2015. 

## Inputs para Male & Feinmann

In [None]:
def decorate_user_input(input):
  decoration = "Responder en primera persona como si fueras Eduardo Feinmann."
  return decoration + input

# user_input = "Malena, qué opinas de la marihuana?"
# user_input = "Hola Eduardo, soy Malena. Qué opinas de ?"
user_input = "Me podría contar acerca de usted?"
response = generate_response(vectorstore, decorate_user_input(user_input))
response

[DEBUG] - response body: b'{"results":[],"matches":[{"id":"cf519806-9479-4311-8c78-ab3fa5fc936c","score":0.66722542,"values":[],"metadata":{"person":"Eduardo Feinmann","source":"wikipedia","text":"una entrevista ocurri\xc3\xb3 un cruce con Romina Manguel sobre el recuento de votos en las P AS O del mismo a\xc3\xb1o. Luego de esa discusi\xc3\xb3n, Feinmann decidi\xc3\xb3 renunciar al programa de Am\xc3\xa9rica T V.\\u200bEn diciembre de 2020 finaliz\xc3\xb3 su contrato con A24para pasar a formar parte de las filas de LN+a partir de febrero de 2021.\\u200b\\u200bVida personal[editar]Eduardo Feinmann mantiene desde 2017 una relaci\xc3\xb3n estable con Luc\xc3\xada Auat, abogada, oriunda de Santiago del Estero, con quien tiene una hija de nombre Esmeralda.\\u200b\\u200b\\u200b El fil\xc3\xb3sofo Jos\xc3\xa9 Pablo Feinmannes primo hermano de su padre Enrique. Eduardo ha manifestado que no exist\xc3\xada una buena relaci\xc3\xb3n entre ambos.\\u200b\\u200b Durante un altercado en la v\xc3\xa

'Use the following pieces of context to answer the question at the end. If you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\nuna entrevista ocurrió un cruce con Romina Manguel sobre el recuento de votos en las P AS O del mismo año. Luego de esa discusión, Feinmann decidió renunciar al programa de América T V.\u200bEn diciembre de 2020 finalizó su contrato con A24para pasar a formar parte de las filas de LN+a partir de febrero de 2021.\u200b\u200bVida personal[editar]Eduardo Feinmann mantiene desde 2017 una relación estable con Lucía Auat, abogada, oriunda de Santiago del Estero, con quien tiene una hija de nombre Esmeralda.\u200b\u200b\u200b El filósofo José Pablo Feinmannes primo hermano de su padre Enrique. Eduardo ha manifestado que no existía una buena relación entre ambos.\u200b\u200b Durante un altercado en la vía pública en noviembre de 2015, su medio hermano, Diego Feinmann, fue asesinado de un disparo por el novio de su expareja.\u2

# Post-processing

In [None]:
def postprocess_response(response):
    # answer_start_string = "Respond in first person as if you where the one describing yourself."
    answer_start = response.find("Helpful Answer: ")
    if answer_start != -1:
        answer = response[answer_start + len("Helpful Answer: "):].strip()
    else:
        answer = response.strip()

    return answer


In [None]:
postprocess_response(response)

'As President Trump has been working tirelessly since taking office to make America great again by creating jobs, securing our borders, and making sure American values are upheld. My policies have led to an increase in economic growth and prosperity for all citizens, while also strengthening our national security. I am committed to continuing this progress and ensuring that America remains the greatest country in the world.'

## Twitter (X) Conversation Simulation


In [None]:
def simulate_twitter_conversation(db, num_turns=5):
    """
    Simulates a Twitter-like conversation between Elon Musk and Donald Trump.

    :param db: The vectorstore to use for retrieval.
    :param num_turns: The number of conversation turns to simulate.
    """
    participants = ['Donald Trump', 'Elon Musk']
    current_speaker = participants[0]

    # Initial prompt to start the conversation
    prompt = "@realDonaldTrump What's your take on the future of electric cars? #ElectricRevolution"

    for turn in range(num_turns):
        # Decorate the input to simulate the speaker on Twitter
        decorated_prompt = f"Respond as if you were {current_speaker} on Twitter. Keep it under 300 characters. Reply directly to the conversation."

        # Generate a response
        response = generate_response(db, decorated_prompt + " " + prompt)
        response_text = postprocess_response(response)

        # Limit the response to 300 characters and strip excess whitespace
        response_text = response_text[:300].strip()

        # Print the conversation turn
        print(f"{current_speaker}: {response_text}\n")

        # Switch the speaker for the next turn
        current_speaker = participants[(turn + 1) % 2]

        # Use the latest response as the new prompt for the next turn, including a direct mention
        prompt = f"@{participants[(turn + 1) % 2]} {response_text}"

# Run the conversation simulation
simulate_twitter_conversation(vectorstore, num_turns=5)


[DEBUG] - response body: b'{"results":[],"matches":[{"id":"7b9adfdf-8ea5-4563-a565-68e537b7e51e","score":0.601753652,"values":[],"metadata":{"person":"Elon Musk","source":"twitter","text":"@TeslaOwnersEBay @Twitter What more could you ask for in a CEO!?"}},{"id":"ea71efab-7755-43a1-8777-fa96c22e5742","score":0.601753592,"values":[],"metadata":{"person":"elon_musk_tweets.csv","source":"twitter","text":"@TeslaOwnersEBay @Twitter What more could you ask for in a CEO!?"}}],"namespace":"","usage":{"readUnits":6}}' 
[DEBUG] - response body: b'{"results":[],"matches":[{"id":"8fd0c78a-1651-44da-bb4b-d34cbaec454b","score":0.687919319,"values":[],"metadata":{"person":"Elon Musk","source":"twitter","text":"@MuskUniversity And Tesla is getting it done"}},{"id":"96b79abc-7124-41d9-83c9-3238ad0d6fda","score":0.68791914,"values":[],"metadata":{"person":"elon_musk_tweets.csv","source":"twitter","text":"@MuskUniversity And Tesla is getting it done"}}],"namespace":"","usage":{"readUnits":6}}' 


Donald Trump: @TeslaOwnersEBay @Twitter @realDonaldTrump The future is bright for electric cars, and Elon Musk is leading the charge. With innovative technology & a commitment to sustainability, Tesla is at the forefront of this revolution. Let's support clean energy and embrace progress! #ElectricRevolution



[DEBUG] - response body: b'{"results":[],"matches":[{"id":"0d48d5fc-f72e-43d8-9749-3a8c53ce5907","score":0.630997956,"values":[],"metadata":{"person":"Elon Musk","source":"twitter","text":"@austinhinson_ @Tesla Note, I am putting a lot of time personally into advancing Tesla service to make it awesome.\xe2\x80\xa6 https://t.co/azYUlRxJqp"}},{"id":"2b8e0387-fb67-4a15-b380-1cbfb178dd8e","score":0.630997658,"values":[],"metadata":{"person":"elon_musk_tweets.csv","source":"twitter","text":"@austinhinson_ @Tesla Note, I am putting a lot of time personally into advancing Tesla service to make it awesome.\xe2\x80\xa6 https://t.co/azYUlRxJqp"}}],"namespace":"","usage":{"readUnits":6}}' 


Elon Musk: Thanks for your kind words, but I prefer to stay out of politics. At Tesla, we focus on advancing sustainable transportation through cutting-edge tech & relentless innovation. Join us in driving towards a better tomorrow! 🚀🌍🔋 #CleanEnergyNow



[DEBUG] - response body: b'{"results":[],"matches":[{"id":"0ccf7ce7-79a4-4023-ac21-d294c127744a","score":0.724882305,"values":[],"metadata":{"person":"elon_musk_tweets.csv","source":"twitter","text":"@Teslarati @JohnnaCrider1 Accelerating sustainable energy is our primary mission"}},{"id":"15e4b497-ff1e-4434-a62d-0cd20f3c67e8","score":0.724882126,"values":[],"metadata":{"person":"Elon Musk","source":"twitter","text":"@Teslarati @JohnnaCrider1 Accelerating sustainable energy is our primary mission"}}],"namespace":"","usage":{"readUnits":6}}' 


Donald Trump: .@ElonMusk is doing an incredible job leading @Tesla and pushing the boundaries of clean energy technology. His vision and determination are inspiring, and his company's impact will be felt for generations to come. Let's all do our part to support this important work! #SustainableFuture



[DEBUG] - response body: b'{"results":[],"matches":[{"id":"0ccf7ce7-79a4-4023-ac21-d294c127744a","score":0.658294141,"values":[],"metadata":{"person":"elon_musk_tweets.csv","source":"twitter","text":"@Teslarati @JohnnaCrider1 Accelerating sustainable energy is our primary mission"}},{"id":"15e4b497-ff1e-4434-a62d-0cd20f3c67e8","score":0.658293962,"values":[],"metadata":{"person":"Elon Musk","source":"twitter","text":"@Teslarati @JohnnaCrider1 Accelerating sustainable energy is our primary mission"}}],"namespace":"","usage":{"readUnits":6}}' 


Elon Musk: Thank you for your kind words, @[username]. Our team at Tesla shares your passion for accelerating the world's transition to sustainable energy. We won't stop until we've made a significant difference in mitigating climate change and creating a more resilient future for us all. Together, let's keep

Donald Trump: "@Teslarati @JohnnaCrider1 Accelerating sustainable energy is our primary mission" - that's the spirit! As President of this great country, I fully support companies like Tesla who prioritize sustainability over profits. Let's work together to make America the leader in clean tech innovation and com

