In [12]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

sentences = [
    'The new course is awesome',
    'This recent course is so good',
]


model = SentenceTransformer('Supabase/gte-small')
embeddings = model.encode(sentences)
print(cos_sim(embeddings[0], embeddings[1]))

No sentence-transformers model found with name Supabase/gte-small. Creating a new one with MEAN pooling.


tensor([[0.8980]])


In [23]:
# Two lists of sentences
sentences1 = [
    "The cat sits outside",
    "A man is playing guitar",
    "The new movie is awesome",
]

sentences2 = [
    "The dog plays in the garden",
    "A woman watches TV",
    "The recent movie is good",
]

# Compute embedding for both lists
embeddings1 = model.encode(sentences1, convert_to_tensor=True)
embeddings2 = model.encode(sentences2, convert_to_tensor=True)

# Compute cosine-similarities
cosine_scores = cos_sim(embeddings1, embeddings2)

# Output the pairs with their score
for i in range(len(sentences1)):
    print("{} \t\t {} \t\t Score: {:.4f}".format(
        sentences1[i], sentences2[i], cosine_scores[i][i]
    ))

The cat sits outside 		 The dog plays in the garden 		 Score: 0.8200
A man is playing guitar 		 A woman watches TV 		 Score: 0.7016
The new movie is awesome 		 中文是这个 		 Score: 0.7315


In [4]:
# Our sentences to encode
sentences = [
    "This framework generates embeddings for each input sentence",
    "Sentences are passed as a list of string.",
    "The quick brown fox jumps over the lazy dog."
]

# Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

# Print the embeddings
for sentence, embedding in zip(sentences, embeddings):
    print("Sentence:", sentence)
    print("Embedding:", embedding)
    print("")

Sentence: This framework generates embeddings for each input sentence
Embedding: [-4.56867397e-01 -6.03742227e-02  2.77018119e-02 -1.49324179e-01
 -2.58534886e-02  3.99035126e-01 -1.55862886e-02  2.39103317e-01
  9.95152146e-02  1.49657950e-01 -3.45252067e-01 -4.33488905e-01
  6.84537470e-01  2.49792576e-01  3.92542094e-01  3.05619299e-01
 -2.38010809e-01  3.97295952e-01 -4.60436225e-01 -1.37540206e-01
  5.90817750e-01 -2.84304321e-01  1.05978318e-01 -5.92266202e-01
 -1.59350246e-01  4.13091660e-01 -1.64931923e-01 -7.34146163e-02
 -3.01011503e-01 -1.89854681e+00  2.36650500e-02 -5.51726103e-01
  7.99842179e-01 -4.33843769e-02 -2.60188311e-01 -1.74996033e-01
 -4.91537154e-01  4.09644246e-01 -1.80871114e-01  2.30171144e-01
  2.36194909e-01  2.71462560e-01  2.17981953e-02 -6.09191358e-01
 -2.04823941e-01 -5.56082666e-01 -6.08014047e-01  7.78803442e-05
 -8.24697390e-02 -2.05188245e-01 -7.09771439e-02 -4.21118379e-01
 -9.76332128e-02  8.62646103e-02  2.12224007e-01  1.12527400e-01
  2.59943

In [5]:
from sentence_transformers.util import semantic_search

docs = [
    "A man is eating food.",
    "A man is eating a piece of bread.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
    "A woman is playing violin.",
    "Two men pushed carts through the woods.",
    "A man is riding a white horse on an enclosed ground.",
    "A monkey is playing drums.",
    "A cheetah is running behind its prey.",
]

docs_embeddings = model.encode(docs, convert_to_tensor=True)

query = "tell me about music"
query_embedding = model.encode(query, convert_to_tensor=True)

hits = semantic_search(query_embedding, docs_embeddings, top_k=2)
hits

for hit in hits[0]:
    print(docs[hit['corpus_id']], "(Score: %.4f)" % hit['score'])

A woman is playing violin. (Score: 0.7764)
A monkey is playing drums. (Score: 0.7570)


In [6]:
import tiktoken

def split_large_text(large_text, max_tokens):
    enc = tiktoken.get_encoding("cl100k_base")
    tokenized_text = enc.encode(large_text)

    chunks = []
    current_chunk = []
    current_length = 0

    for token in tokenized_text:
        current_chunk.append(token)
        current_length += 1

        if current_length >= max_tokens:
            chunks.append(enc.decode(current_chunk).rstrip(' .,;'))
            current_chunk = []
            current_length = 0

    if current_chunk:
        chunks.append(enc.decode(current_chunk).rstrip(' .,;'))

    return chunks

Why use tokens?

> By breaking words into smaller parts (tokens), LLMs can better handle new or unusual words by understanding their building blocks. It also helps the model grasp the nuances of language, such as different word forms and contextual meanings.

[source](https://kelvin.legal/understanding-large-language-models-words-versus-tokens/#:~:text=By%20breaking%20words%20into%20smaller,word%20forms%20and%20contextual%20meanings.)

In [7]:
import tiktoken

sent = "If we split a text by number of characters, it is not obvious how many tokens these chunks will be."


print(len(sent.split()))

enc = tiktoken.get_encoding("cl100k_base")
encoded = enc.encode(sent)

print(len(encoded))
tokens = [enc.decode_single_token_bytes(x) for x in encoded]
print(tokens)
print(len(tokens))


decoded = enc.decode(encoded)
print(len(decoded.split()))
decoded


20
22
[b'If', b' we', b' split', b' a', b' text', b' by', b' number', b' of', b' characters', b',', b' it', b' is', b' not', b' obvious', b' how', b' many', b' tokens', b' these', b' chunks', b' will', b' be', b'.']
22
20


'If we split a text by number of characters, it is not obvious how many tokens these chunks will be.'

In [24]:
import google.generativeai as genai
import time
import os

import getpass

GOOGLE_API_KEY = getpass.getpass()


In [25]:

genai.configure(api_key=GOOGLE_API_KEY)
gen_model = genai.GenerativeModel('gemini-pro')

doc = """The new iPad Pro

apple.com
https://www.apple.com
Powerful AI capabilities — Thinpossible. All-new pro design. The thinnest Apple product ever. Order now. Introducing M4, the next generation of Apple silicon. Next-level performance. iPadOS 17. M4 chip. Silver.
Tech specs
Extreme performance by design. Discover more.
Compare iPad models
Which iPad is right for you? Learn more.
Order the new iPad Pro
Thrill. Sleeker. Order the new iPad Pro now.
Why iPad
Yes, it does that. And then some. Shop iPad.
Apple Trade In
Trade in. Upgrade. Save. It's a win-win-win. Learn more.

iPad Pro

Apple
https://www.apple.com › ipad-pro
new ipad pro features from www.apple.com
iPad Pro features the M4 chip, 11-inch or 13-inch Ultra Retina XDR display, Wi-Fi 6E, 5G, and support for Apple Pencil Pro.
$83.25
‎Buy · ‎Tech Specs · ‎Why iPad · ‎Learn more
People also ask
What does new iPad Pro do?
What cool things does the iPad Pro do?
Is the new iPad Pro OLED?
Is there a new iPad coming out in 2024?
Feedback

Apple unveils stunning new iPad Pro with M4 chip and ...

Apple
https://www.apple.com › newsroom › 2024/05 › apple-u...
8 hours ago — M4 features an entirely new display engine to enable the precision, color, and brightness of the Ultra Retina XDR display. With a new CPU, a ...
Pros and cons: Stunningly thin and light design ⋅ Incredibly precise contrast ⋅ Brilliant colors ⋅ View full list
‎Pro Cameras · ‎Apple Pencil Pro · ‎Powerful Ipados Features
Discussions and forums
Apple unveils stunning new iPad Pro with the world's ... - Reddit

Reddit  ·  
r/apple · 870+ comments · 7h ago
Apple Unveils Stunning New iPad Pro With the World's Most ...

TechPowerUp  ·  
7h ago
New iPad Pro may actually be powered by the M4 chip ...

Reddit  ·  
r/apple · 440+ comments · 1w ago
See more
Videos

8:43
M4 iPad Pro Event: Everything Apple Announced in 8 Minutes!
YouTube · MacRumors
6 hours ago

3:28
Apple releases new iPad Pro with M4 chip, teasing AI features
Yahoo Finance · Josh Lipton
3 hours ago

5:37
Apple Reveals iPad Pros With M4 Chips - Video
CNET
7 hours ago
Feedback
View all

New iPad Pro, Air: Prices, release dates, features ...

USA Today
https://www.usatoday.com › story › tech › 2024/05/07
8 hours ago — The new iPad Pro will feature a display using two OLED panels, which Apple is calling Tandem OLED. It also includes the M4 chip, a jump from the ...
Pros and cons: Faster WiFi ⋅ Quarter-pound lighter than its predecessor ⋅ Weighs less than a pound ⋅ View full list
Top stories

Apple
Apple unveils stunning new iPad Pro with M4 chip and Apple Pencil Pro
8 hours ago

USA Today
New iPad Pro, Air: Prices, release dates, features announced by Apple
8 hours ago

CNN
We got our hands on the 2024 iPad Pro and iPad Air — here’s what we think
2 hours ago
More news

iPad Pro - Technical Specifications

Apple
https://www.apple.com › iPad › iPad Pro
new ipad pro features from www.apple.com
11-inch iPad Pro and 13-inch iPad Pro Technical Specifications · 12MP Wide camera, ƒ/1.8 aperture · Digital zoom up to 5x · Five‑element lens · Adaptive True Tone ...

A new iPad Pro is coming: Here are four things to expect

9to5Mac
https://9to5mac.com › new-ipad-pro-features-rumors
new ipad pro features from 9to5mac.com
Apr 29, 2024 — A new iPad Pro is coming: Here are four things to expect · OLED screens · New Magic Keyboard · M4 chip · A much thinner design · Wrap up · Featured.
Pros and cons: Significantly higher brightness ⋅ Better contrast ⋅ View full list

iPad Pro 2024 release date, price, specs and upgrades

Tom's Guide
https://www.tomsguide.com › news › ipad-pro-2024
In this new $129 stylus, you've got a wealth of new features, including rollover to intelligently switch pen styles, haptic feedback to really feel your inputs ...
‎New iPad Pro and new iPad... · ‎iPad Air · ‎Apple set to announce new...
Shop all Memorial Day deal categories

Grills

TVs

Home Appliances

Electronics

Women's Apparel

Men's Apparel
Shop all Memorial Day deal categories

The 4 most exciting iPad Pro 2024 features (and what they ...

ZDNet
https://www.zdnet.com › ... › Computing › Tablets › iPad
4 hours ago — 1. OLED Displays with Ultra Retina XDR · 2. Thinner and lighter than ever · 3. M4 processors · 4. New apps, with a new Apple Pencil.
Pros and cons: Exceedingly light ⋅ An impressively blinding 1,600 nits for HDR ⋅ Very thin ⋅ View full list
"""
test_splited = split_large_text(doc, 30)

docs_embeddings = model.encode(test_splited, convert_to_tensor=True)

query = "tell me about new ipad features"
query_embedding = model.encode(query, convert_to_tensor=True)

hits = semantic_search(query_embedding, docs_embeddings, top_k=10)
needed_docs = []
for hit in hits[0]:
    # print(test_splited[hit['corpus_id']], "(Score: %.4f)" % hit['score'])
    needed_docs.append(test_splited[hit['corpus_id']])

corpus = '\n'.join(needed_docs)

response = gen_model.generate_content(query + '\n' + corpus)
print(response.text)


**New iPad Features**

**Rollover to Intelligent Switch Pen Styles**

* Allows users to seamlessly switch between different pen styles, such as pencil, marker, or brush, simply by rolling the Apple Pencil over the display.

**Haptic Feedback**

* Provides tactile feedback to users when they interact with the device, enhancing immersion and providing a more natural user experience.

**Scribble and Quick Note**

* Enables users to jot down notes or create sketches directly on the screen and convert handwriting to text using Apple's Scribble feature.
* Quick Note allows users to quickly access and edit notes from the side of the screen.

**M2 Chip**

* Powers the iPad with faster performance, improved graphics, and enhanced machine learning capabilities.

**Wi-Fi 6E**

* Offers lightning-fast wireless connectivity, reducing latency and improving network performance for streaming, gaming, and other data-intensive tasks.

**USB-C with Thunderbolt 4**

* Provides versatile connectivity for h

In [18]:
response = gen_model.generate_content(query)
print(response.text)

**iPad Pro 2021**

* **M1 Chip:** Apple's most powerful chip provides exceptional performance for demanding tasks like video editing, 3D modeling, and high-resolution gaming.
* **Mini-LED Display (12.9-inch model only):** Delivers stunning brightness, contrast, and color accuracy for an immersive visual experience.
* **Thunderbolt/USB 4:** Supports high-speed data transfer, external displays, and fast charging.
* **5G Connectivity:** Enables ultra-fast cellular data speeds for seamless streaming, gaming, and online collaboration.
* **Center Stage Front Camera:** Automatically adjusts the camera to keep you centered in video calls, even when you move around.
* **LiDAR Sensor:** Enhances augmented reality experiences, improves depth detection for photography, and enables advanced scanning capabilities.

**iPad Air 2022**

* **M1 Chip:** Provides impressive performance for multitasking, gaming, and creative applications.
* **USB-C Connectivity:** Enables fast data transfer, external displ