In [None]:
from huggingface_hub import snapshot_download
# visit https://huggingface.co/google/gemma-2-9b-it to accept the terms.
print("Downloading Gemma 2 9B IT...")

snapshot_download(
    repo_id="google/gemma2b-it",
    local_dir="LLMs/Gemma2b-it",
    local_dir_use_symlinks=False,
)

print("Gemma 2b IT download complete.")

  from .autonotebook import tqdm as notebook_tqdm


Downloading Gemma 2 9B IT...


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.
Fetching 12 files: 100%|██████████| 12/12 [03:49<00:00, 19.09s/it]

Gemma 2b IT download complete.





In [None]:
print("Downloading Gemma 2b...")

snapshot_download(
    repo_id="google/gemma2-9b",
    local_dir="LLMs/Gemma2-9b",
    local_dir_use_symlinks=False,
)

print("Gemma 2b download complete.")

Downloading Gemma 2b...


Fetching 12 files: 100%|██████████| 12/12 [02:39<00:00, 13.27s/it]

Gemma 2b download complete.





In [None]:
print("Downloading Phi-3-mini...")

snapshot_download(
    repo_id="microsoft/Phi-3-mini-128k-instruct",
    local_dir="LLMs/Phi3-mini",
    local_dir_use_symlinks=False,
)

print("Phi-3-mini download complete.")

Downloading Gemma 2b...


Fetching 19 files: 100%|██████████| 19/19 [01:01<00:00,  3.23s/it]

Gemma 2b download complete.





## Link

- Hugging face page: https://huggingface.co/sentence-transformers
- Website: https://sbert.net/
- Models: https://sbert.net/docs/sentence_transformer/pretrained_models.html
- Image-Text: https://huggingface.co/sentence-transformers/clip-ViT-L-14
- Image & Text model CLIP, which maps text and images to a shared vector space. For applications of the models, have a look in our documentation SBERT.net - Image Search

## Sentence Transformers

Characteristics of Sentence Transformer (a.k.a bi-encoder) models:

- Calculates a fixed-size vector representation (embedding) given texts or images.
- Embedding calculation is often efficient, embedding similarity calculation is very fast.
- Applicable for a wide range of tasks, such as semantic textual similarity, semantic search, clustering, classification, paraphrase mining, and more.
- Often used as a first step in a two-step retrieval process, where a Cross-Encoder (a.k.a. reranker) model is used to re-rank the top-k results from the bi-encoder.


In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")

# sentences to encode
sentences = [
    "The weather is lovely today.",
    "It's so sunny outside!",
    "He drove to the stadium.",
]

# calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings.shape)

# calculate the embedding similarities
similarities = model.similarity(embeddings, embeddings)
print(similarities)
# tensor([[1.0000, 0.6660, 0.1046],
#         [0.6660, 1.0000, 0.1411],
#         [0.1046, 0.1411, 1.0000]])

| Weight Class | Model Name                              | Dimensions | Description                                                                 |
|--------------|------------------------------------------|------------|-----------------------------------------------------------------------------|
| Lightweight  | all-MiniLM-L6-v2                         | 384        | Speed. The most downloaded model. Best for high-volume, real-time apps. |
| Lightweight  | all-MiniLM-L12-v2                        | 384        | Best Value. Adds 6 more layers than L6 for better accuracy with minimal latency penalty. |
| Midweight    | all-distilroberta-v1                    | 768        | Fast Semantic. A distilled version of RoBERTa; faster than MPNet but better than MiniLM. |
| Midweight    | all-mpnet-base-v2                       | 768        | Accuracy. The highest-performing general-purpose model in this repo. |
| Heavyweight  | clip-ViT-L-14                           | 768        | Multimodal Heavy. Handles both images and text. High memory usage compared to text-only. |
| Heavyweight  | paraphrase-multilingual-mpnet-base-v2   | 768        | Global Reach. Supports 50+ languages. Computationally heavy due to the large vocabulary. |


In [None]:
embedding_models = {
    # Lightweight (fast)
    "MiniLM-L6": "sentence-transformers/all-MiniLM-L6-v2",
    
    # Midweight (balanced)
    "MPNet-Base": "sentence-transformers/all-mpnet-base-v2",
    
    # Heavyweight (multilingual)
    "Multilingual-MPNet": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
    
    # Multimodal (image + text)
    "CLIP-L14": "sentence-transformers/clip-ViT-L-14"
}

for name, repo_id in embedding_models.items():
    print(f"Downloading {name} ({repo_id}):")
    snapshot_download(
        repo_id=repo_id,
        local_dir=f"./Embeddings/{name}",
        local_dir_use_symlinks=False
    )

print("Models are stored in the './Embeddings/' folder.")

  from .autonotebook import tqdm as notebook_tqdm
For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


--- Downloading MiniLM-L6 (sentence-transformers/all-MiniLM-L6-v2) ---


Fetching 30 files: 100%|██████████| 30/30 [00:42<00:00,  1.40s/it]


--- Downloading MPNet-Base (sentence-transformers/all-mpnet-base-v2) ---


Fetching 28 files: 100%|██████████| 28/28 [02:29<00:00,  5.33s/it]


--- Downloading Multilingual-MPNet (sentence-transformers/paraphrase-multilingual-mpnet-base-v2) ---


Fetching 27 files: 100%|██████████| 27/27 [06:48<00:00, 15.12s/it]


--- Downloading CLIP-L14 (sentence-transformers/clip-ViT-L-14) ---


Fetching 13 files: 100%|██████████| 13/13 [01:39<00:00,  7.63s/it]


Download complete. Models are stored in the './embeddings/' folder.





In [None]:
import torch
print(torch.cuda.is_available())

In [None]:
import os
from PIL import Image
from sentence_transformers import util

base_path = "./Embeddings"

model_light = SentenceTransformer(os.path.join(base_path, "MiniLM-L6"), device="cuda")
sentences = ["The cat sits outside", "A man is playing guitar"]
embeddings = model_light.encode(sentences)

score_light = util.cos_sim(embeddings, model_light.encode(["A person playing an instrument"]))
print(f"Lightweight Embedding Shape: {embeddings.shape}")
print(f"Lightweight Similarity Score: {score_light.tolist()}") 

model_mid = SentenceTransformer(os.path.join(base_path, "MPNet-Base"), device="cuda")
query = "A person playing an instrument"
query_emb = model_mid.encode(query)

score = util.cos_sim(query_emb, model_mid.encode(["A man is playing guitar"]))
print(f"Midweight Similarity Score: {score.item():.4f}")

model_multi = SentenceTransformer(os.path.join(base_path, "Multilingual-MPNet"), device="cuda")
es_text = "El gato está afuera"
en_text = "The cat sits outside"

sim = util.cos_sim(model_multi.encode(es_text), model_multi.encode(en_text))
print(f"Cross-lingual Similarity: {sim.item():.4f}")

model_clip = SentenceTransformer(os.path.join(base_path, "CLIP-L14"), device="cuda")
img = Image.open('two_dogs_in_snow.jpg') 
img_emb = model_clip.encode(img)
text_emb = model_clip.encode(["dogs playing in winter", "a sunny beach"])

clip_scores = util.cos_sim(img_emb, text_emb)
print(f"CLIP Text-Image Scores: {clip_scores}")

Lightweight Embedding Shape: (2, 384)
Lightweight Similarity Score: [[0.05952562019228935], [0.6251088380813599]]
Midweight Similarity Score: 0.6149
Cross-lingual Similarity: 0.8960
CLIP Text-Image Scores: tensor([[0.2773, 0.1695]])


## INSTRUCTOR models

Some INSTRUCTOR models, such as hkunlp/instructor-large, are natively supported in Sentence Transformers. These models are special, as they are trained with instructions in mind. Notably, the primary difference between normal Sentence Transformer models and Instructor models is that the latter do not include the instructions themselves in the pooling step.

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("hkunlp/instructor-large")
embeddings = model.encode(
    [
        "Dynamical Scalar Degree of Freedom in Horava-Lifshitz Gravity",
        "Comparison of Atmospheric Neutrino Flux Calculations at Low Energies",
        "Fermion Bags in the Massive Gross-Neveu Model",
        "QCD corrections to Associated t-tbar-H production at the Tevatron",
    ],
    prompt="Represent the Medicine sentence for clustering: ",
)
print(embeddings.shape)
# => (4, 768)

In [None]:
model = SentenceTransformer("hkunlp/instructor-large")

query = "where is the food stored in a yam plant"
query_instruction = (
    "Represent the Wikipedia question for retrieving supporting documents: "
)

corpus = [
    'Yams are perennial herbaceous vines native to Africa, Asia, and the Americas and cultivated for the consumption of their starchy tubers in many temperate and tropical regions. The tubers themselves, also called "yams", come in a variety of forms owing to numerous cultivars and related species.',
    "The disparate impact theory is especially controversial under the Fair Housing Act because the Act regulates many activities relating to housing, insurance, and mortgage loansâ€”and some scholars have argued that the theory's use under the Fair Housing Act, combined with extensions of the Community Reinvestment Act, contributed to rise of sub-prime lending and the crash of the U.S. housing market and ensuing global economic recession",
    "Disparate impact in United States labor law refers to practices in employment, housing, and other areas that adversely affect one group of people of a protected characteristic more than another, even though rules applied by employers or landlords are formally neutral. Although the protected classes vary by statute, most federal civil rights laws protect based on race, color, religion, national origin, and sex as protected traits, and some laws include disability status and other traits as well.",
]
corpus_instruction = "Represent the Wikipedia document for retrieval: "

query_embedding = model.encode(query, prompt=query_instruction)
corpus_embeddings = model.encode(corpus, prompt=corpus_instruction)
similarities = util.cos_sim(query_embedding, corpus_embeddings)
print(similarities)

# => tensor([[0.8835, 0.7037, 0.6970]])

## Cross Encoders:

### Usage

### Characteristics of Cross Encoder (a.k.a reranker) models:

- Calculates a similarity score given pairs of texts.

- Generally provides superior performance compared to a Sentence Transformer (a.k.a. bi-encoder) model.

- Often slower than a Sentence Transformer model, as it requires computation for each pair rather than each text.

- Due to the previous 2 characteristics, Cross Encoders are often used to re-rank the top-k results from a Sentence Transformer model.


In [None]:
from sentence_transformers import CrossEncoder

# 1. Load a pre-trained CrossEncoder model
model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

# 2. Predict scores for a pair of sentences
scores = model.predict([
    ("How many people live in Berlin?", "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."),
    ("How many people live in Berlin?", "Berlin is well known for its museums."),
])
# => array([ 8.607138 , -4.3200774], dtype=float32)

# 3. Rank a list of passages for a query
query = "How many people live in Berlin?"
passages = [
    "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.",
    "Berlin is well known for its museums.",
    "In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.",
    "The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.",
    "The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019",
    "An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.",
    "Berlin is subdivided into 12 boroughs or districts (Bezirke).",
    "In 2015, the total labour force in Berlin was 1.85 million.",
    "In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.",
    "Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.",
]
ranks = model.rank(query, passages)

# Print the scores
print("Query:", query)
for rank in ranks:
    print(f"{rank['score']:.2f}\t{passages[rank['corpus_id']]}")
"""
Query: How many people live in Berlin?
8.92    The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.
8.61    Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.
8.24    An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.
7.60    In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.
6.35    In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.
5.42    Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.
3.45    In 2015, the total labour force in Berlin was 1.85 million.
0.33    Berlin is subdivided into 12 boroughs or districts (Bezirke).
-4.24   The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019
-4.32   Berlin is well known for its museums.
"""

#