# Finding Similar Shoes Using Vector Search in Db2

Some general words, introduction, and rough outline


In [1]:
import pandas as pd
import os, csv
import random
from dotenv import load_dotenv
import numpy as np
import ollama
%load_ext sql

%config SqlMagic.dsn_filename = '.db2conn'
%config SqlMagic.displaylimit = 20
%config SqlMagic.named_parameters="enabled"
# load more settings from .env
load_dotenv(os.getcwd()+"/.env", override=True)

True

## Setting up Db2 Connection

In [2]:
%sql --section db2
%sql --connections

current,url,alias
*,db2://db2inst1:***@localhost:50000/testdb,db2


# Setting up a Shoes Table in Db2

In [3]:
# Drop the table if it exists
%sql DROP TABLE IF EXISTS SHOES
# Create the table
sql="""
    CREATE TABLE IF NOT EXISTS SHOES (
        SKU VARCHAR(8),
        PRODUCT_NAME VARCHAR(40),
        BRAND VARCHAR(20),
        CLASS VARCHAR(5),
        S_TYPE VARCHAR(7),
        MATERIAL VARCHAR(20),
        COLOR VARCHAR(10),
        WEATHER_RESISTANCE VARCHAR(10),
        ARCH_SUPPORT VARCHAR(4),
        SIZE FLOAT,
        PRICE FLOAT,
        RATING FLOAT,
        STORE_ID BIGINT,
        CITY VARCHAR(40)
    );
    """

%sql {{sql}}

In [4]:
# Generate data
cities = [
    "Frankfurt", "Berlin", "Munich", "Hamburg"
]

# Definitions
brands = ['Zentrax', 'FootFlex', 'StrideOne', 'Loopic', 'RunXpress', 'ComfRun']
types = ['Running', 'Walking', 'Trail']
classes = ['Men', 'Women']
materials = ['Synthetic', 'Knit', 'Leather']
colors = ['Black', 'White']
arch_supports = ['High', 'Flat']
weather_resistances = ['Waterproof', 'Resistant']
sizes = [round(s, 1) for s in range(6, 13)] + [s + 0.5 for s in range(6, 13)]
store_ids = range(1, 21)

# Helper: create a fake product name
def create_product_name(brand, shoe_type):
    return f"{brand} {random.choice(['Ultra', 'Flex', 'Pro', 'X', 'Max'])} {shoe_type}"

# Helper: create fake keywords
def generate_keywords(shoe_type, material):
    keywords = [shoe_type.lower(), material.lower()]
    keywords += random.sample(['lightweight', 'durable', 'breathable', 'cushioned', 'supportive', 'flexible'], 3)
    return ', '.join(keywords)

def generate_shoe_data(n=500):
    data = []
    used_skus = set()

    for _ in range(n):
        brand = random.choice(brands)
        shoe_type = random.choice(types)
        shoe_class = random.choice(classes)
        material = random.choice(materials)
        size = random.choice(sizes)
        color = random.choice(colors)
        arch = random.choice(arch_supports)
        weather = random.choice(weather_resistances)
        store_id = random.choice(store_ids)
        city = random.choice(cities)
                
        price = round(random.uniform(29.99, 149.99), 2)
        rating = round(random.uniform(3.0, 5.0), 1)
        product_name = create_product_name(brand, shoe_type)

        # Ensure SKU uniqueness
        while True:
            sku = f"{brand[:3].upper()}-{random.randint(1000, 9999)}"
            if sku not in used_skus:
                used_skus.add(sku)
                break

        data.append({
            'SKU': sku,
            'PRODUCT_NAME': product_name,
            'BRAND': brand,
            'CLASS': shoe_class,
            'S_TYPE': shoe_type,
            'MATERIAL': material,
            'COLOR': color,
            'WEATHER_RESISTANCE': weather,
            'ARCH_SUPPORT': arch,
            'SIZE': size,
            'PRICE': price,
            'RATING': rating,
            'STORE_ID': store_id,
            'CITY': city
        })

    return pd.DataFrame(data)

# Generate and save
df_shoes = generate_shoe_data(500)


In [5]:
# A look at the generated data
df_shoes.head()

Unnamed: 0,SKU,PRODUCT_NAME,BRAND,CLASS,S_TYPE,MATERIAL,COLOR,WEATHER_RESISTANCE,ARCH_SUPPORT,SIZE,PRICE,RATING,STORE_ID,CITY
0,COM-4108,ComfRun Ultra Running,ComfRun,Women,Running,Synthetic,White,Resistant,Flat,6.0,124.28,3.3,2,Hamburg
1,RUN-3597,RunXpress Ultra Running,RunXpress,Women,Running,Leather,White,Resistant,Flat,10.0,103.47,4.1,17,Frankfurt
2,COM-3044,ComfRun Pro Running,ComfRun,Women,Running,Leather,Black,Waterproof,High,10.0,85.34,4.3,15,Hamburg
3,FOO-5477,FootFlex X Trail,FootFlex,Women,Trail,Synthetic,White,Waterproof,Flat,7.5,114.72,4.5,8,Munich
4,STR-3479,StrideOne Pro Walking,StrideOne,Women,Walking,Knit,Black,Waterproof,Flat,7.5,55.62,4.0,10,Berlin


In [6]:
# Define columns that define features for embedding
embedding_cols = ['S_TYPE', 'MATERIAL', 'COLOR', 'WEATHER_RESISTANCE', 'ARCH_SUPPORT']
# The output matches the columns and output shown in the previous cell (see above)
df_shoes[embedding_cols].head()

Unnamed: 0,S_TYPE,MATERIAL,COLOR,WEATHER_RESISTANCE,ARCH_SUPPORT
0,Running,Synthetic,White,Resistant,Flat
1,Running,Leather,White,Resistant,Flat
2,Running,Leather,Black,Waterproof,High
3,Trail,Synthetic,White,Waterproof,Flat
4,Walking,Knit,Black,Waterproof,Flat


# Generating embedding vectors for the shoes

In [7]:
# Combine all embedding columns into a single string for each row, including column names
# The key/value pairs are separated by ' [SEP] '
df_shoes['COMBINED'] = df_shoes.apply(
    lambda row: ' [SEP] '.join([f"{col_name}: {row[col_name]}" for col_name in embedding_cols]), 
    axis=1
)

In [8]:
# Show the same columns plus the new COMBINED column
cols_to_show = ['S_TYPE', 'MATERIAL', 'COLOR', 'WEATHER_RESISTANCE', 'ARCH_SUPPORT', 'COMBINED']
df_shoes[cols_to_show].head()

Unnamed: 0,S_TYPE,MATERIAL,COLOR,WEATHER_RESISTANCE,ARCH_SUPPORT,COMBINED
0,Running,Synthetic,White,Resistant,Flat,S_TYPE: Running [SEP] MATERIAL: Synthetic [SEP...
1,Running,Leather,White,Resistant,Flat,S_TYPE: Running [SEP] MATERIAL: Leather [SEP] ...
2,Running,Leather,Black,Waterproof,High,S_TYPE: Running [SEP] MATERIAL: Leather [SEP] ...
3,Trail,Synthetic,White,Waterproof,Flat,S_TYPE: Trail [SEP] MATERIAL: Synthetic [SEP] ...
4,Walking,Knit,Black,Waterproof,Flat,S_TYPE: Walking [SEP] MATERIAL: Knit [SEP] COL...


In [9]:
df_shoes.iloc[0]['COMBINED']

'S_TYPE: Running [SEP] MATERIAL: Synthetic [SEP] COLOR: White [SEP] WEATHER_RESISTANCE: Resistant [SEP] ARCH_SUPPORT: Flat'

Instead of generating embeddings with an AI model, you can also use the following to load already generated data. In that case, uncomment the two last lines in the following cell, then run the cell. Then, skip the cell that uses the local Ollama service.

In [10]:
# Instead of generating new data, load pregenerated data from a CSV file and use it instead.

# Uncomment to use
#df_shoes=pd.read_csv('shoes_data_with_vectors.csv')
#df_shoes.head()

Generate the embeddings using a local Ollama service.

In [11]:
# Make list from combined columns
row_combined = df_shoes['COMBINED'].tolist()
# Run batch processing for generation of embeddings
embedding_model=os.getenv('embedding_model')
response = ollama.embed(model=embedding_model, input=row_combined)
shoe_vectors = response["embeddings"]
df_shoes['EMBEDDING'] = shoe_vectors
# remove the column with the input values
df_shoes.drop(['COMBINED'], axis=1, inplace=True)


In [12]:
# show a sample vector value
df_shoes.iloc[0]['EMBEDDING']

[-0.0066919164,
 -0.0067627705,
 0.035581388,
 0.01915025,
 -0.041133996,
 -0.025800483,
 0.015560401,
 0.010293875,
 0.0017078872,
 0.009238132,
 0.023544708,
 0.010541141,
 -0.044832896,
 0.0069413967,
 -0.026893023,
 -0.0093074385,
 -0.03695127,
 0.01832269,
 -0.007099077,
 -0.036428977,
 0.03930852,
 0.007538843,
 0.026775623,
 -0.07010296,
 -0.03777198,
 -0.019128175,
 -0.057726946,
 -0.019424608,
 -0.038014285,
 -0.12285742,
 0.06201886,
 -0.023491858,
 -0.046087738,
 -0.079872265,
 -0.028934386,
 -0.05833537,
 0.058513075,
 -0.041578144,
 0.01645957,
 0.030895026,
 -0.044253983,
 0.06054578,
 -0.02674873,
 0.03568194,
 0.13163511,
 0.051323142,
 -0.06396264,
 0.036397923,
 0.02629684,
 -0.035780832,
 0.01467025,
 -0.101407394,
 -0.022469657,
 0.021284709,
 -0.01806098,
 -0.077553146,
 0.0008761278,
 -0.01296103,
 -0.042471897,
 -0.072351895,
 0.06524442,
 0.024972914,
 0.05510607,
 -0.045815207,
 -0.022857746,
 0.01900346,
 0.01712755,
 0.014009925,
 0.0018175672,
 -0.022687154,

# Add vector column to SHOES table and then insert the data

In [13]:
# Extract the dimensions, they vary by model
# The dimension is needed to set up the vector column in Db2 and to insert data
vector_dimension=len(shoe_vectors[0])
vector_dimension

384

### Adding a `VECTOR` column

Alter the SHOES table and add the vector column.
Note that the dimension needs to fit with the generated embeddings

In [14]:
%%sql
ALTER TABLE SHOES
ADD COLUMN EMBEDDING VECTOR({{vector_dimension}}, FLOAT32);

In [15]:
# DESCRIBE the table to show schema. Note the VECTOR-typed column EMBEDDING
%sql CALL SYSPROC.ADMIN_CMD('describe table shoes')


colname,typeschema,typename,length,scale,nullable
SKU,SYSIBM,VARCHAR,8,0,Y
PRODUCT_NAME,SYSIBM,VARCHAR,40,0,Y
BRAND,SYSIBM,VARCHAR,20,0,Y
CLASS,SYSIBM,VARCHAR,5,0,Y
S_TYPE,SYSIBM,VARCHAR,7,0,Y
MATERIAL,SYSIBM,VARCHAR,20,0,Y
COLOR,SYSIBM,VARCHAR,10,0,Y
WEATHER_RESISTANCE,SYSIBM,VARCHAR,10,0,Y
ARCH_SUPPORT,SYSIBM,VARCHAR,4,0,Y
SIZE,SYSIBM,DOUBLE,8,0,Y


Insert the data into SHOES table by looping over the data frame. Not efficient, but ok for this example.

In [16]:
# Turn regular output off to not have 500 outputs
%config SqlMagic.feedback=0
sql="""
insert into shoes values
(:sku, :product, :brand, :rclass, :rtype, :material, :color, :wr, :arch_s,
:rsize, :price, :rating, :storeid, :city, VECTOR(:vector_str ,{vector_dimension}, FLOAT32))
""".format(vector_dimension=vector_dimension)

for index, row in df_shoes.iterrows():
    sku, product, brand, rclass, rtype, material, color, wr, arch_s, rsize, price,\
     rating, storeid, city, embedding = row
    vector_str = "[" + ", ".join(map(str, embedding)) + "]"
    %sql {{sql}}
    
# Turn regular output back on
%config SqlMagic.feedback=1

## Work with the inserted data

In [17]:
# The row count should match the number of generated data records
%sql SELECT count(*) FROM SHOES

1
500


In [18]:
# Search for Men shoes of size 12
sql = """ 
    SELECT SKU, PRODUCT_NAME, BRAND, S_TYPE, MATERIAL, COLOR, WEATHER_RESISTANCE, ARCH_SUPPORT, PRICE, RATING, CITY
    FROM SHOES 
    WHERE CLASS = 'Men' AND Size = 12 
    """

shoe_search = %sql {{sql}}

shoe_search

sku,product_name,brand,s_type,material,color,weather_resistance,arch_support,price,rating,city
RUN-5475,RunXpress Pro Trail,RunXpress,Trail,Leather,Black,Waterproof,Flat,99.88,3.4,Hamburg
STR-6282,StrideOne Ultra Walking,StrideOne,Walking,Leather,Black,Resistant,High,40.66,3.6,Berlin
STR-9543,StrideOne X Running,StrideOne,Running,Synthetic,Black,Resistant,High,119.88,4.0,Frankfurt
FOO-5925,FootFlex Pro Walking,FootFlex,Walking,Knit,White,Waterproof,Flat,141.85,3.1,Frankfurt
LOO-2474,Loopic Flex Running,Loopic,Running,Knit,White,Resistant,Flat,148.97,4.2,Frankfurt
STR-6338,StrideOne Pro Trail,StrideOne,Trail,Leather,White,Waterproof,High,109.89,3.0,Berlin
LOO-2600,Loopic Flex Running,Loopic,Running,Leather,Black,Resistant,Flat,55.75,4.0,Hamburg
STR-3051,StrideOne Flex Trail,StrideOne,Trail,Synthetic,Black,Waterproof,Flat,104.35,4.4,Berlin
STR-6717,StrideOne X Trail,StrideOne,Trail,Synthetic,Black,Waterproof,High,121.33,4.6,Berlin
ZEN-5092,Zentrax X Running,Zentrax,Running,Synthetic,White,Waterproof,High,106.41,5.0,Munich


In [19]:
# Turn the result into a DataFrame
df_shoe_search = shoe_search.DataFrame()
# extract SKUs
sku_list = df_shoe_search['sku']
# Pick a random SKU as our "choice"
my_choice_sku = random.choice(sku_list)
#print the selected SKU
my_choice_sku

'STR-9543'

In [20]:
# What is the full record for "our" choice?
%sql select * from SHOES where SKU='{{my_choice_sku}}'

sku,product_name,brand,class,s_type,material,color,weather_resistance,arch_support,size,price,rating,store_id,city,embedding
STR-9543,StrideOne X Running,StrideOne,Men,Running,Synthetic,Black,Resistant,High,12.0,119.88,4.0,17,Frankfurt,"[-0.00804766454,0.0142711513,0.0479723066,0.0179347415,-0.0527827553,-0.0421824865,-0.00418013521,0.0146426354,0.0117360828,0.0239865948,0.0314052664,0.0122069856,-0.0366671458,-0.0109188668,-0.0328385718,0.00260491366,-0.0543492772,0.0229650307,0.00919163134,-0.0184465423,0.0340503007,0.0216670185,-0.0017491472,-0.043405626,-0.0433642007,-0.0308004115,-0.0613762885,-0.022381762,-0.0378128216,-0.12925075,0.058760535,-0.00580603909,-0.0407534465,-0.0696063042,-0.0276438892,-0.0664377511,0.0552443005,-0.0407722853,0.00181255967,0.0392529294,-0.0502978936,0.0513685271,-0.0253936853,0.0346058644,0.136314556,0.0200962927,-0.0705449432,0.0598856509,0.0340808667,-0.0340649746,0.0224885494,-0.0841481015,-0.0121248597,0.0201703925,-0.0100423628,-0.0910894275,0.00891262945,-0.0196279809,-0.0444235131,-0.0712868571,0.0488413796,0.0215413254,0.0233362969,-0.0227787606,-0.0243539773,0.0133445458,0.0104398057,0.0114574069,0.00711879553,-0.039794676,0.0296247844,-0.0900854468,0.0282851793,-0.0257395916,0.0717618838,-0.0963960215,0.0100832693,-0.0116324751,-0.0345158689,0.0356070884,-0.0106515121,0.027537303,-0.0904619545,-0.0486288518,0.0888134688,-0.0246652775,-0.0350048877,0.0371161103,0.019181991,0.0967474729,-0.00640220009,0.0105346078,0.0918611065,0.0675495863,0.0387377031,-0.0545584597,-0.186622426,0.0273360349,-0.0906318277,-0.0136825051,-0.00347459456,-0.0266002063,0.0458309799,-0.0475196317,0.0430248529,-0.0447918773,0.0567867756,0.0545393266,-0.00542048272,-0.0871337503,-0.0408986397,0.116828866,-0.00170327548,-0.0361008942,-0.0382751971,0.0175622385,0.0475871265,-0.0889507234,-0.00337231741,-0.0289432704,0.0145659195,0.0218483545,-0.0395771824,-0.0774102211,0.0859030485,0.00354709546,0.00200342131,-0.00482314546,-0.0592267402,-0.0246350225,0.0235742144,0.0539262556,-0.0715135559,0.049430009,-0.0030394108,0.0345760435,-0.018433556,0.00163251022,0.0553177223,-0.00653340342,-0.00314083532,-0.0646405965,0.0138665242,-0.0637988597,0.0178666841,-0.0123781441,-0.126097903,-0.0190616306,-0.0440658145,-0.00513061089,-0.0300212838,-0.0560119189,-0.019667957,0.000454245514,0.00963662006,0.00566465873,-0.0171966199,0.0395296291,0.00976374373,0.0103650698,-0.0312638246,0.0164715908,-0.037239477,0.0383094922,-0.0575924776,0.0189644434,0.0173293129,-0.00505831279,-0.0664074421,0.06172809,0.0520794466,-0.00971339736,-0.00637046155,-0.00725948252,-0.0710557401,0.047877755,-0.0090113841,-0.0336372368,0.0532540269,-0.00677920924,-0.0402949713,0.0428436212,0.0542672686,-0.0397797339,-0.0889953002,-0.0368013307,-0.0345294699,-0.0967675522,0.0138484323,-0.00728107849,-0.00229934533,0.00865177251,-0.0146356365,-0.0152537785,0.0431238376,-0.0414877236,-0.0133322813,0.014043496,-0.0169632249,-0.0326561108,0.0542029701,-0.032985419,0.112521566,0.0219604317,0.126522943,0.0488761403,0.0280354768,0.00967151206,-0.0290091336,0.0130046466,0.0239212234,-0.0225760583,0.0214354601,-0.0554510467,0.0243703779,-0.00908514671,-0.0797962844,-0.0388535038,0.0105743175,-0.0267390385,0.0948624834,-0.0566208102,-0.0486287773,0.0949055329,-0.0215130448,-0.0543842837,0.0239821449,0.00183735334,-0.0438286439,0.00390979461,-0.0238921344,0.069727309,-0.035428334,-0.00310149183,-0.0698863715,-0.0586121194,-0.118415341,-0.0334751904,-0.0747838542,-0.0374041758,0.0434081182,0.138565123,0.00158755877,0.0018171363,0.102584794,0.0975645408,0.0115123549,0.00777733279,-0.0265309159,0.019515533,0.0296696238,0.0853096247,0.051907517,-0.00307219476,0.00959833432,0.0410843566,-0.00417150185,-0.0155590642,0.00741342083,-0.0229721684,0.0936773866,-0.092596963,0.0043206499,0.00591878081,0.0229633357,0.0200030636,0.0131529057,0.0626782551,-0.0337965228,0.0133516267,-0.0144637013,0.00927688275,-0.0121492092,-0.0336858407,0.0769652948,-0.0201796666,-0.0394827761,-0.0173254758,-0.0146112135,0.0121137183,0.0344144255,0.0370463245,-0.019716043,-0.0341439433,-0.0122486018,-0.00400557742,-0.0229934417,-0.0462931022,-0.0151052885,-0.014397895,0.0263991151,-0.0647155344,-0.00567872496,-0.00363415666,0.0760924891,0.13356553,-0.0979527906,-0.0837548226,0.0819552913,-0.0266631618,0.024290489,0.0158439726,-0.0561658032,-0.0375206694,0.00552535895,0.0410950109,0.0135337282,-0.0253205132,-0.0256263539,-0.0516302139,-0.0492724963,-0.00358573534,-0.00516277552,-0.0214120466,-0.0422527604,0.06584429,0.0188401323,0.00182324636,-0.0512678996,-0.00694998587,0.0302728824,-0.100456074,-0.0907270387,0.0848627314,0.204877198,-0.0352221206,-0.0242214929,0.0209715106,0.0701638684,0.0912731662,-0.0666381866,-0.0456570685,0.0368963517,0.0480581634,0.166227266,-0.0333415866,-0.0834569186,0.147041172,0.0385342054,0.0164552424,-0.0291073844,-0.0246378928,0.0141929723,0.00835219398,0.0819542706,-0.0155107826,-0.0310709514,-0.031428121,-0.0123364488,0.0549391806,-0.00883831922,-0.06624192,0.0592446774,-0.0546305291,-0.0473743156,0.0450222529,-0.0633105859,-0.0169074237,-0.0219554864,-0.00690122787,-0.014038749,0.0627252236,-0.078346841,0.0317529216,0.0502818935,0.0225651097,-0.107657388,-0.0252449494,-0.0753404275,-0.0585095063,-0.0190260913,-0.0290415399,-0.00772321876,0.0315934084,0.0227858908,-0.00163619732,-0.00536571071,-0.0770511925,-0.0161447339,-0.0985101163,0.035552714,-0.0226381477,-0.156419024,0.00364128267]"


Searching for similar 'Men' shoes (type, material, color, weather resistance, arch support) at the Frankfurt location with size 12

In [21]:
# SQL query using VECTOR_DISTANCE and the EMBEDDING from the selected shoe (my_choice_sku)
sql = f"""
SELECT 
    SKU, 
    PRODUCT_NAME, 
    BRAND, 
    S_TYPE, 
    MATERIAL, 
    COLOR, 
    WEATHER_RESISTANCE, 
    ARCH_SUPPORT, 
    PRICE, 
    RATING,
    VECTOR_DISTANCE(
        (SELECT EMBEDDING FROM SHOES WHERE SKU = '{my_choice_sku}'), 
        EMBEDDING, 
        EUCLIDEAN
    ) AS DISTANCE
FROM 
    SHOES
WHERE 
    SKU <> '{my_choice_sku}'
    AND CITY = 'Frankfurt'
    AND SIZE = 12
    AND CLASS = 'Men'
ORDER BY 
    DISTANCE ASC
FETCH FIRST 10 ROWS ONLY
""".format(my_choice_sku=my_choice_sku)

top_shoes = %sql {{sql}}
top_shoes

sku,product_name,brand,s_type,material,color,weather_resistance,arch_support,price,rating,distance
LOO-2474,Loopic Flex Running,Loopic,Running,Knit,White,Resistant,Flat,148.97,4.2,0.3515224632027593
LOO-5783,Loopic Max Walking,Loopic,Walking,Knit,Black,Resistant,Flat,109.52,4.9,0.4553188320388047
FOO-5925,FootFlex Pro Walking,FootFlex,Walking,Knit,White,Waterproof,Flat,141.85,3.1,0.4979490287450195


The output above should show a mix of same values with - top to down - increasing variety.

Next, the same query again, but using UNION ALL to show "our" row as first one for better comparison of similarity. We limit the result set to only 5 similar records.

In [22]:
# SQL query using VECTOR_DISTANCE and the EMBEDDING from the selected shoe (my_choice_sku)
sql = f"""
(SELECT 
    SKU, 
    PRODUCT_NAME, 
    BRAND, 
    S_TYPE, 
    MATERIAL, 
    COLOR, 
    WEATHER_RESISTANCE, 
    ARCH_SUPPORT, 
    PRICE, 
    RATING,
    0 AS DISTANCE
FROM
    SHOES
WHERE
    SKU = '{my_choice_sku}')
UNION ALL
(SELECT 
    SKU, 
    PRODUCT_NAME, 
    BRAND, 
    S_TYPE, 
    MATERIAL, 
    COLOR, 
    WEATHER_RESISTANCE, 
    ARCH_SUPPORT, 
    PRICE, 
    RATING,
    VECTOR_DISTANCE(
        (SELECT EMBEDDING FROM SHOES WHERE SKU = '{my_choice_sku}'), 
        EMBEDDING, 
        EUCLIDEAN
    ) AS DISTANCE
FROM 
    SHOES
WHERE 
    SKU <> '{my_choice_sku}'
    AND CITY = 'Frankfurt'
    AND SIZE = 12
    AND CLASS = 'Men'
ORDER BY 
    DISTANCE ASC
FETCH FIRST 5 ROWS ONLY)
ORDER BY DISTANCE ASC
""".format(my_choice_sku=my_choice_sku)

%sql {{sql}}

sku,product_name,brand,s_type,material,color,weather_resistance,arch_support,price,rating,distance
STR-9543,StrideOne X Running,StrideOne,Running,Synthetic,Black,Resistant,High,119.88,4.0,0.0
LOO-2474,Loopic Flex Running,Loopic,Running,Knit,White,Resistant,Flat,148.97,4.2,0.3515224632027593
LOO-5783,Loopic Max Walking,Loopic,Walking,Knit,Black,Resistant,Flat,109.52,4.9,0.4553188320388047
FOO-5925,FootFlex Pro Walking,FootFlex,Walking,Knit,White,Waterproof,Flat,141.85,3.1,0.4979490287450195


Compare the first row (our shoe) to the other similar shoes.

# Cleanup and Tools

In [23]:
# We could DROP the created table SHOES if required. But we keep it, so additional queries could be run.
# Uncomment if needed
#%sql DROP TABLE SHOES

In [24]:
# Export the shoe data to keep it for history and more experiments

# Uncomment if needed
""" df_shoes.to_csv(
    'shoes_data_with_vectors.csv',
    index=False,
    quoting=csv.QUOTE_NONNUMERIC
)
 """

" df_shoes.to_csv(\n    'shoes_data_with_vectors.csv',\n    index=False,\n    quoting=csv.QUOTE_NONNUMERIC\n)\n "

In [25]:
# Close the database connection
%sql --close db2
%sql --connections

current,url,alias
