# Langchain Demo with telegram data

In [None]:
import os
import pinecone
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv()) # read local .env file

from langchain.llms import OpenAIChat
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

## Using existing Pinecone index

In [None]:
# Index initialization
from semantic_search_generator import SemanticSearchGenerator

channel_id = "@runonflux"
model_name = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
INDEX_NAME = "telegram-embeddings"

generator = SemanticSearchGenerator(model_name)

# initialize connection to pinecone (get API key at app.pinecone.io)
pinecone.init(
    api_key=os.environ["PINECONE_APIKEY"],
    environment="us-west1-gcp"
)

# connect to index
index = pinecone.Index(INDEX_NAME)

In [None]:
from langchain.chains import LLMChain

prompt_template = """Use the chat messages (not sorted in any particular order) below to answer the given user query:
    messages_list: {messages}
    query: {query}
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["messages", "query"])
llm = OpenAIChat(temperature=0)
chain = LLMChain(llm=llm, prompt=PROMPT)


In [None]:
def search_results(query, limit=50):
    query_emb = generator.encode_messages(query)

    results = index.query(
      vector=query_emb.tolist(),
      top_k=limit,
      include_values=False,
      include_metadata=True
    )

    messages = []
    for item in results["matches"]:
        # print(f"\nscore {item['score']}")
        # print(item["metadata"]["clean_message"])
        messages.append(item["metadata"]["clean_message"])

    return messages
    

In [None]:
query = "what is good about this project?"
messages = search_results(query)
inputs = [{"message": msg} for _, msg in zip(range(len(messages)), messages)]
result = chain.run({"messages":inputs, "query":query})
print(result)

In [None]:
query = "what do users complain about this project?"
messages = search_results(query)
inputs = [{"index": i, "message": msg} for i, msg in zip(range(len(messages)), messages)]
result = chain.run({"messages":inputs, "query":query})
print(result)

## Using Chroma DB

In [2]:
# import
import pandas as pd
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.document_loaders import DataFrameLoader
from langchain.vectorstores import Chroma

# load telegram messages
channel_id = "@runonflux"
df = pd.read_csv(f"notebooks/data/{channel_id}.csv")

df_loader = DataFrameLoader(df, page_content_column="clean_message")
docs = df_loader.load()

In [None]:
# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# comput embeddings and load to chroma
db = Chroma.from_documents(docs, embedding_function)

In [5]:
# time to query
def search_db(db_client, query: str, top_k = 100):
    query = "use cases"
    docs = db_client.similarity_search(query, k=top_k)

    # print results
    for item in docs[:10]:
        print(f"- {item.page_content}")

### Peristent Chroma client

In [22]:
import chromadb

persistent_client = chromadb.PersistentClient()

CLEAR_COLLECTION= True

if CLEAR_COLLECTION:
    try:
        persistent_client.delete_collection(f"embeddings_collection_{channel_id[1:]}")
    except Exception as e:
        print("unable to delete ", e)

unable to delete  Collection embeddings_collection_runonflux does not exist.


In [23]:
# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

client = Chroma.from_documents(
    docs, 
    embedding_function, 
    client=persistent_client, 
    collection_name = f"embeddings_collection_{channel_id[1:]}"
)

In [24]:
query = "use cases"
search_db(client, query)

- Okay, so where do you run into problems?
- Can you please write in simple words the usecases of flux coin ? Services which requires flux token ?
- Harness the power of decentralisation Explore RunOnFlux use cases Let's read onhttpstwitter.comHouseofChimerastatus1709901477689364513
- in addition to securing the network you also provide a real world case for solving problems  eg genome sequencing, graphics rendering, etc...
- Dont use discord. Whats the drama? Fill us in
- plz let me know if you are in need of my skills.
- What are the $FLUX usecases?Let's find out httpstwitter.comHouseofChimerastatus1671902756808818690
- Do you have any learning materials in this area? I don't quite understand how to operate it specifically.
- What we need is one major user to take excess capacity
- proof of concept is like the first iteration  will it work the way its planned  are there any changes needed  testing the concept is good


## Using OpenAI embeddings

In [27]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DataFrameLoader
from IPython.display import display, Markdown
import pandas as pd

In [28]:
channel_id = "@runonflux"
df = pd.read_csv(f"notebooks/data/{channel_id}.csv")
df_loader = DataFrameLoader(df, page_content_column="clean_message")
docs = df_loader.load()

In [29]:
import chromadb

persistent_client = chromadb.PersistentClient()

CLEAR_COLLECTION= True

if CLEAR_COLLECTION:
    try:
        persistent_client.delete_collection(f"openai_embeddings_{channel_id[1:]}")
    except Exception as e:
        print("unable to delete ", e)

unable to delete  Collection openai_embeddings_runonflux does not exist.


In [30]:
embeddings_function = OpenAIEmbeddings()

client = Chroma.from_documents(
    docs, 
    embedding_function, 
    client=persistent_client, 
    collection_name = f"openai_embeddings_{channel_id[1:]}"
)

In [31]:
query = "use cases"
search_db(client, query)

- Okay, so where do you run into problems?
- Can you please write in simple words the usecases of flux coin ? Services which requires flux token ?
- Harness the power of decentralisation Explore RunOnFlux use cases Let's read onhttpstwitter.comHouseofChimerastatus1709901477689364513
- in addition to securing the network you also provide a real world case for solving problems  eg genome sequencing, graphics rendering, etc...
- should i just use this instead then ?
- Dont use discord. Whats the drama? Fill us in
- plz let me know if you are in need of my skills.
- What are the $FLUX usecases?Let's find out httpstwitter.comHouseofChimerastatus1671902756808818690
- Do you have any learning materials in this area? I don't quite understand how to operate it specifically.
- What we need is one major user to take excess capacity


In [32]:
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), chain_type="stuff", retriever=client.as_retriever(search_type="mmr", search_kwargs={'fetch_k': 30}), return_source_documents=True)

In [36]:
query = "What are some use cases of flux?"
response = qa({"query":query})

In [38]:
response["result"]

'Some potential use cases of Flux include:\n\n1. Decentralized Finance (DeFi): Flux can be used for various DeFi applications such as lending, borrowing, and decentralized exchanges.\n\n2. Gaming: Flux can enable in-game economies, item ownership, and trading of virtual assets.\n\n3. Supply Chain Management: Flux can be utilized to track and verify the provenance of goods, ensuring transparency and efficiency in supply chain operations.\n\n4. Content Monetization: Flux can provide a platform for creators to monetize their digital content, such as music, videos, and art, through decentralized marketplaces.\n\n5. Decentralized Governance: Flux can facilitate decentralized decision-making processes, allowing token holders to participate in governance and voting.\n\nThese are just some potential use cases, and the actual applications of Flux may expand as the ecosystem evolves.'