In [149]:
import httpx
from bs4 import BeautifulSoup

r = httpx.get("https://www.apple.com/watch/compare/")
html_doc = r.text
soup = BeautifulSoup(html_doc, "html.parser")
devices_src = soup.find_all("div", class_="device-content with-list-bullet")

In [173]:
from typing import List

from attrs import define
from pyrsistent import PVector, pvector


@define(frozen=True)
class Watch:
    name: str
    features: PVector

    def __lt__(self, other: "Watch") -> bool:
        if len(self.features) < len(other.features):
            return True
        if (len(self.features) == len(other.features)) and (self.name < other.name):
            return True
        return False

In [172]:
from collections import defaultdict
from pprint import pprint

watches = []

for device in devices_src:
    watches.append(
        Watch(
            name=device.h3.text,
            features=PVector(li.text for li in device.find_all("li")),
        )
    )

watches.sort()
pprint([(_.name, len(_.features)) for _ in watches])

TypeError: PVector() takes no arguments

In [171]:
w = watches[0]
type(w.features)

list

In [168]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("all-MiniLM-L6-v2")

embeddings = defaultdict()

for watch in watches:
    print(watch.name)
    embeddings[watch] = model.encode(watch.features, convert_to_tensor=True)

Apple Watch Series  1


TypeError: unhashable type: 'list'

Apple Watch Ultra
Apple Watch Series 8
Apple Watch SE
Apple Watch Series 7
Apple Watch Series 6
Apple Watch SE (1st generation)
Apple Watch Series 5
Apple Watch Series 4
Apple Watch Series 3
Apple Watch Series 2
Apple Watch Series  1


In [40]:
pprint(embeddings["Apple\xa0Watch SE"])

tensor([[ 0.0666,  0.0771,  0.0277,  ..., -0.0602, -0.0356,  0.0013],
        [-0.0110,  0.0457,  0.0328,  ..., -0.0742, -0.0464,  0.1485],
        [ 0.0238,  0.0566,  0.0154,  ..., -0.0261, -0.0221, -0.0075],
        ...,
        [-0.0247, -0.0805,  0.0373,  ..., -0.0267, -0.1534,  0.0227],
        [-0.0330,  0.0042, -0.0531,  ..., -0.0202,  0.0959,  0.0395],
        [ 0.0483, -0.0286, -0.0597,  ..., -0.0134, -0.0157, -0.0246]])


In [55]:
def dd():
    return defaultdict(dd)


scores = defaultdict(dd)
key_list = list(features.keys())

for i in range(len(key_list)):
    key_i = key_list[i]
    for j in range(i + 1, len(key_list)):
        key_j = key_list[j]
        score = util.cos_sim(embeddings[key_i], embeddings[key_j])
        scores[key_i][key_j] = score

In [67]:
first = key_list[0]
second = key_list[2]
print(
    (matrix := scores[first][second]).size(),
    len(features[first]),
    len(features[second]),
)

torch.Size([16, 14]) 16 14


In [153]:
@define(frozen=True)
class FeatureNode:
    watch: Watch
    index: int

    def name(self):
        return watch.features[index]


feature_nodes = [
    FeatureNode(watch, index)
    for watch in devices
    for index in range(len(watch.features))
]
print(len(feature_nodes))

155


In [84]:
import torch

argmax_vector = torch.argmax(matrix, dim=1)
for i in range(len(argmax_vector)):
    j = argmax_vector[i].item()
    if (s := matrix[i][j].item()) < 0.999:
        print(i, j, s, features[first][i], "|", features[second][j])

0 0 0.8283575773239136 49mm case | 40mm or 44mm case
1 1 0.8876988887786865 Always-On Retina LTPO OLED display, up to 2000 nits | Retina LTPO OLED display, up to 1000 nits
2 2 0.4469667673110962 Flat sapphire crystal display | Ion-X glass display
3 3 0.9448041319847107 GPS + Cellular model | GPS and GPS + Cellular models
4 4 0.905308723449707 S8 SiP with 64-bit dual-core processor; W3 wireless chip; U1 chip (Ultra Wideband)25 | S8 SiP with 64-bit dual-core processor; W3 wireless chip
5 5 0.921658456325531 Digital Crown with haptic feedback; Action button | Digital Crown with haptic feedback
6 6 0.597714900970459 Temperature sensor;1 blood oxygen sensor;12 electrical heart sensor and third-generation optical heart sensor | Second-generation optical heart sensor
7 7 0.8941822052001953 High and low heart rate notifications, irregular rhythm notification, and ECG app13 | High and low heart rate notifications and irregular rhythm notification14
9 9 0.930390477180481 Water resistant 100 mete

In [97]:
argmax_vector = torch.argmax(matrix, dim=0)
for j in range(len(argmax_vector)):
    i = argmax_vector[j].item()
    if (s := matrix[i][j].item()) < 0.999:
        print(i, j, s, features[first][i], "|", features[second][j])

0 0 0.8283575773239136 49mm case | 40mm or 44mm case
1 1 0.8876988887786865 Always-On Retina LTPO OLED display, up to 2000 nits | Retina LTPO OLED display, up to 1000 nits
2 2 0.4469667673110962 Flat sapphire crystal display | Ion-X glass display
3 3 0.9448041319847107 GPS + Cellular model | GPS and GPS + Cellular models
4 4 0.905308723449707 S8 SiP with 64-bit dual-core processor; W3 wireless chip; U1 chip (Ultra Wideband)25 | S8 SiP with 64-bit dual-core processor; W3 wireless chip
5 5 0.921658456325531 Digital Crown with haptic feedback; Action button | Digital Crown with haptic feedback
6 6 0.597714900970459 Temperature sensor;1 blood oxygen sensor;12 electrical heart sensor and third-generation optical heart sensor | Second-generation optical heart sensor
7 7 0.8941822052001953 High and low heart rate notifications, irregular rhythm notification, and ECG app13 | High and low heart rate notifications and irregular rhythm notification14
9 9 0.930390477180481 Water resistant 100 mete

In [82]:
print("|", (one := features[first][14]), "|", (two := features[second][13]), "|")
print(one == two)

| 32GB capacity | 32GB capacity |
True


In [91]:
import itertools

print(len(key_list))
pprint(combs := list(itertools.combinations(key_list, 2)))
print(len(combs))

11
[('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch Series\xa08'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch SE'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch Series\xa07'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch Series\xa06'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch\xa0SE (1st\xa0generation)'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch Series\xa05'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch Series\xa04'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch Series\xa03'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch Series\xa02'),
 ('Apple\xa0Watch\xa0Ultra', 'Apple\xa0Watch Series\xa0 1'),
 ('Apple\xa0Watch Series\xa08', 'Apple\xa0Watch SE'),
 ('Apple\xa0Watch Series\xa08', 'Apple\xa0Watch Series\xa07'),
 ('Apple\xa0Watch Series\xa08', 'Apple\xa0Watch Series\xa06'),
 ('Apple\xa0Watch Series\xa08', 'Apple\xa0Watch\xa0SE (1st\xa0generation)'),
 ('Apple\xa0Watch Series\xa08', 'Apple\xa0Watch Series\xa05'),
 ('Apple\xa0Watch Series\xa08', 'Apple\xa0Watch Series\xa04'),
 ('Apple

In [120]:
t = torch.Tensor([[1, -7, 3], [8, -15, 6]])
print(t)
print(t.size())

tensor([[  1.,  -7.,   3.],
        [  8., -15.,   6.]])
torch.Size([2, 3])


In [121]:
torch.max(t, dim=0)

torch.return_types.max(
values=tensor([ 8., -7.,  6.]),
indices=tensor([1, 0, 1]))

In [122]:
torch.max(t, dim=1)

torch.return_types.max(
values=tensor([3., 8.]),
indices=tensor([2, 0]))

In [123]:
torch.argmax(t, dim=0)

tensor([1, 0, 1])

In [124]:
torch.argmax(t, dim=1)

tensor([2, 0])

In [125]:
torch.max(t)

tensor(8.)

In [126]:
torch.argmax(t)

tensor(3)

In [155]:
a = [2, 1, 3]
pprint(sorted(a))
pprint(a.sort())
pprint(a)

[1, 2, 3]
None
[1, 2, 3]
