<a href="https://colab.research.google.com/github/anilkumarKanasani/LangchainSideProjects/blob/main/3_Find_similar_things.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installation

In [117]:
!pip install langchain -q
!pip install Openai -q
!pip install tiktoken -q
!pip install faiss-cpu -q
!pip install environs -q
!pip install streamlit -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m67.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [118]:
from environs import Env
env = Env()
# Read .env into os.environ
env.read_env("./env")

## Simple Embedding Trails

In [100]:
from langchain.embeddings import OpenAIEmbeddings

# Preparing the model instance
emb_model_instance = OpenAIEmbeddings()

In [97]:
our_text = "Hello buddy, How are you ?"
text_embedding = emb_model_instance.embed_query(our_text)
text_embedding[:5] , len(text_embedding)

([-0.0007375669341444657,
  -0.0011846385375510193,
  0.004039422897024591,
  -0.04515867854349896,
  -0.021880208484231264],
 1536)

In [109]:
cluster_of_words = ["School", "College", "KinderGarden", "University",
                    "circket", "Football", "Tennis", "Basketball",
                    "Apple", "Orange", "Banana"
                    ]

cluster_of_embed = []

for word in cluster_of_words:
  cluster_of_embed.append(emb_model_instance.embed_query(word))

In [110]:
cluster_of_embed[0][:5], len(cluster_of_embed[0])

([0.005531371094329331,
  0.009264198113213265,
  -0.004652460096353183,
  -0.02226122277964918,
  -0.02296706656959415],
 1536)

In [115]:
our_new_word = "Black Board"
our_new_embed = emb_model_instance.embed_query(our_new_word)

In [116]:
from IPython.terminal.embed import embed
from openai.embeddings_utils import cosine_similarity

for word,embed in zip(cluster_of_words, cluster_of_embed):
  print(word, " has a scrore of ", str(round(cosine_similarity(embed, our_new_embed),2)) , " with ", our_new_word)

School  has a scrore of  0.82  with  Black Board
College  has a scrore of  0.81  with  Black Board
KinderGarden  has a scrore of  0.79  with  Black Board
University  has a scrore of  0.8  with  Black Board
circket  has a scrore of  0.78  with  Black Board
Football  has a scrore of  0.79  with  Black Board
Tennis  has a scrore of  0.79  with  Black Board
Basketball  has a scrore of  0.81  with  Black Board
Apple  has a scrore of  0.78  with  Black Board
Orange  has a scrore of  0.78  with  Black Board
Banana  has a scrore of  0.8  with  Black Board


## Real Time Embeddings App

### Preparing Vector DB

In [121]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

# Preparing the model instance
emb_model_instance = OpenAIEmbeddings()

from langchain.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path="data/myData.csv",
                   csv_args={"delimiter": ',',
                             "quotechar": '"',
                             "fieldnames":["Words"]
                             })

data = loader.load()

db = FAISS.from_documents(data, emb_model_instance)
db.save_local("faiss_index")

### Actual APP

In [128]:
%%writefile app.py
import streamlit as st
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

# Preparing the model instance
emb_model_instance = OpenAIEmbeddings()


st.set_page_config(page_title="Educate Kids", page_icon=":robot:")
st.header("Hey, Ask me some thing, I will give you similar words ")

new_db = FAISS.load_local("faiss_index", emb_model_instance)


user_input = st.text_input("You : ", key=input)
submit = st.button("Generate similar Words")

if submit:
  similar_words = new_db.similarity_search(user_input)
  for wrd in similar_words[:2]:
    st.text(wrd.page_content)

Overwriting app.py


In [None]:
!streamlit run app.py & npx localtunnel --port 8501