### Importing Packages

In [None]:
from langchain_openai import OpenAI, ChatOpenAI, OpenAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline, HuggingFaceEmbeddings
from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity
import os

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

### LLM's

In [2]:
llm = OpenAI(
    model="gpt-3.5-turbo-instruct",
)

In [None]:
result = llm.invoke("What is the capital of France?")
result

### ChatModels

In [3]:
model = ChatOpenAI(
    model="gpt-4",
    temperature=0.7,
    max_completion_tokens=1000
)

In [None]:
result = model.invoke("Write a five lines of poetry about the coding")
print(result.content)

In [4]:
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.2,
    max_output_tokens=100
)

In [5]:
result = model.invoke("Write a five lines of poetry about the machine learning")
print(result.content)

From data's depths, a pattern's gleam,
A learning mind, a coded dream.
Algorithms weave, a subtle art,
Predicting futures, playing a part.
Machines evolve, a future bright.


In [6]:
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.1-8B-Instruct",
    task="text-generation"
)

model = ChatHuggingFace(
    llm=llm,
)

In [7]:
result = model.invoke("Write a five lines of poetry about the data science")
print(result.content)

In realms of code, where data reigns,
We seek the truth, through endless domains.
With algorithms keen, we mine the gold,
Of insights hidden, yet to be told.
In data's depths, our stories unfold.


In [8]:
os.environ['HF_HOME'] = "D:/huggingface_cache"

In [None]:
llm = HuggingFacePipeline.from_model_id(
    model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation", 
    pipeline_kwargs={
        "temperature": 0.7,
        "max_new_tokens": 1000
    }
)

model = ChatHuggingFace(
    llm=llm,
)

In [None]:
result = model.invoke("Write a five lines of poetry about the data structures")
print(result.content)

### Embedding Models

In [9]:
embedding = OpenAIEmbeddings(
    model="text-embedding-3-large",
    dimensions=32,
)

In [None]:
result = embedding.embed_query("Abdullah is good developer")
print(str(result))

In [None]:
documents = [
    "Abdullah is a good developer",
    "He is working on a new project",
    "The project is about AI and machine learning",
    "He loves to code and solve problems",
    "His favorite programming language is Python"
]

results = embedding.embed_documents(documents)
print(str(result))

In [None]:
embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [None]:
documents = [
    "Abdullah is a good developer",
    "He is working on a new project",
    "The project is about AI and machine learning",
    "He loves to code and solve problems",
    "His favorite programming language is Python"
]

vectors = embedding.embed_documents(documents)

print(str(vectors))

### Similarity Search

In [None]:
embedding = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=300)

In [None]:
documents = [
    "Virat Kohli is an Indian cricketer known for his aggressive batting and leadership.",
    "MS Dhoni is a former Indian captain famous for his calm demeanor and finishing skills.",
    "Sachin Tendulkar, also known as the 'God of Cricket', holds many batting records.",
    "Rohit Sharma is known for his elegant batting and record-breaking double centuries.",
    "Jasprit Bumrah is an Indian fast bowler known for his unorthodox action and yorkers."
]

doc_embeddings = embedding.embed_documents(documents)

In [None]:
query = 'tell me about bumrah'

query_embedding = embedding.embed_query(query)

In [None]:
scores = cosine_similarity([query_embedding], doc_embeddings)[0]

In [None]:
index, score = sorted(list(enumerate(scores)),key=lambda x:x[1])[-1]

In [None]:
print(query)
print(documents[index])
print("Similarity Score is:", score)