### Creating First Embeddings


In [1]:
### Huggingface And OpenAI Models

from  langchain_huggingface import HuggingFaceEmbeddings

## Inialize a simple Embedding model(no API key needed!)
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

embeddings

HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [5]:
##  create first embeddings
text = "Hello, I am learning about embedding"

embedding = embeddings.embed_query(text)
print(f"Text : {text}")
print(f"Embedding length: {len(embedding)}")
print(embedding)


Text : Hello, I am learning about embedding
Embedding length: 384
[-0.012541021220386028, -0.08428976684808731, -0.003736011451110244, -0.0036944840103387833, 0.029054399579763412, 0.061269763857126236, 0.012031006626784801, 0.017726929858326912, 0.04343545809388161, -0.028131818398833275, 0.02011863887310028, 0.07650147378444672, 0.045305728912353516, 0.0022559911012649536, -0.05790175497531891, 0.014729894697666168, 0.052186448127031326, 0.05656284838914871, -0.09237099438905716, -0.02778216265141964, -0.028742702677845955, -0.05491914600133896, 0.011186933144927025, -0.10679484158754349, 0.013341332785785198, -0.01886415109038353, -0.0051790750585496426, 0.07339488714933395, 0.10817254334688187, -0.07271407544612885, 0.05805009976029396, -0.03648953139781952, -0.0010906701209023595, 0.0577571839094162, -0.07553373277187347, 0.11141461133956909, 0.02548898197710514, -0.013313240371644497, -0.07460542023181915, -0.010939469560980797, 0.01912962831556797, 0.01431373693048954, -0.023572

In [7]:
sentences = [
    "The cat sat on the mat",
    "A feline rested on the rug",
    "The dog played in the yard",
    "I love programming in python",
    "Python is my favourite programming language"
]

embedding_sentence = embeddings.embed_documents(sentences)

print(embedding_sentence[0])
print(embedding_sentence[1])


[0.1304018646478653, -0.011870092712342739, -0.028117021545767784, 0.05123866721987724, -0.05597447603940964, 0.030191542580723763, 0.0301612988114357, 0.024698395282030106, -0.018370550125837326, 0.05876676365733147, -0.02495318278670311, 0.0601542592048645, 0.039831746369600296, 0.033230509608983994, -0.061311349272727966, -0.049373116344213486, -0.05486350879073143, -0.04007606953382492, 0.056429121643304825, 0.039156582206487656, -0.03473709151148796, -0.013247676193714142, 0.031966209411621094, -0.06349921226501465, -0.06017857789993286, 0.07823451608419418, -0.02830391190946102, -0.04744282737374306, 0.04035929590463638, -0.006630908697843552, -0.0667409598827362, -0.004191359970718622, -0.025311656296253204, 0.053341712802648544, 0.01742810197174549, -0.09792360663414001, 0.006061285734176636, -0.06524165719747543, 0.045572616159915924, 0.023641815409064293, 0.0765848457813263, -0.010264340788125992, -0.004076807294040918, -0.06232285499572754, 0.033705271780490875, 0.0186611302

In [8]:


# Popular models comparison
models = {
    "all-MiniLM-L6-v2": {
        "size": 384,
        "description": "Fast and efficient, good quality",
        "use_case": "General purpose, real-time applications"
    },
    "all-mpnet-base-v2": {
        "size": 768,
        "description": "Best quality, slower than MiniLM",
        "use_case": "When quality matters more than speed"
    },
    "all-MiniLM-L12-v2": {
        "size": 384,
        "description": "Slightly better than L6, bit slower",
        "use_case": "Good balance of speed and quality"
    },
    "multi-qa-MiniLM-L6-cos-v1": {
        "size": 384,
        "description": "Optimized for question-answering",
        "use_case": "Q&A systems, semantic search"
    },
    "paraphrase-multilingual-MiniLM-L12-v2": {
        "size": 384,
        "description": "Supports 50+ languages",
        "use_case": "Multilingual applications"
    }
}

print("üìä Popular Open Source Embedding Models:\n")
for model_name, info in models.items():
    print(f"Model: sentence-transformers/{model_name}")
    print(f"  üìè Embedding size: {info['size']} dimensions")
    print(f"  üìù Description: {info['description']}")
    print(f"  üéØ Use case: {info['use_case']}\n")


üìä Popular Open Source Embedding Models:

Model: sentence-transformers/all-MiniLM-L6-v2
  üìè Embedding size: 384 dimensions
  üìù Description: Fast and efficient, good quality
  üéØ Use case: General purpose, real-time applications

Model: sentence-transformers/all-mpnet-base-v2
  üìè Embedding size: 768 dimensions
  üìù Description: Best quality, slower than MiniLM
  üéØ Use case: When quality matters more than speed

Model: sentence-transformers/all-MiniLM-L12-v2
  üìè Embedding size: 384 dimensions
  üìù Description: Slightly better than L6, bit slower
  üéØ Use case: Good balance of speed and quality

Model: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
  üìè Embedding size: 384 dimensions
  üìù Description: Optimized for question-answering
  üéØ Use case: Q&A systems, semantic search

Model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
  üìè Embedding size: 384 dimensions
  üìù Description: Supports 50+ languages
  üéØ Use case: Multilingual appl