## setup

In [22]:
import redis
import json
from multiprocessing import Process, cpu_count
import json
import pandas as pd
import numpy as np
import time
from redis.commands.search.query import Query


from typing import List

In [25]:
played_files = ["a", "b", "c"]


def construct_keys(played_files):
    for filename in played_files:
        yield f"files:{filename}_t00s00.mid"


for f in construct_keys(played_files):
    print(f)

files:a_t00s00.mid
files:b_t00s00.mid
files:c_t00s00.mid


In [2]:
r = redis.Redis(host="localhost", port=6379, decode_responses=True)

In [17]:
qv = r.json().get("files:20240213-100-06_0263-0268_t00s00", "$.pitch_histogram")
qv = np.asanyarray(qv, dtype=np.float32)
qv

array([[0.        , 0.02009272, 0.28316873, 0.        , 0.08284624,
        0.17722532, 0.        , 0.06463682, 0.        , 0.37203017,
        0.        , 0.        ]], dtype=float32)

In [None]:
r.ft(f"idx:files_pitch_histogram_vss").search(
    Query(f"(*)=>[KNN 20 @pitch_histogram $query_vector AS vector_score]")
    .sort_by("vector_score")
    .return_fields("vector_score", "id")
    .dialect(4),
    {"query_vector": np.array(qv, dtype=np.float32).tobytes()},
)
.docs 

In [21]:
res = r.execute_command(
    "FT.SEARCH",
    "idx:files_pitch_histogram_vss",
    "*=>[KNN 30 @pitch_histogram $BLOB AS score]",
    "LIMIT",
    10,
    10,  # Get matches 11-20
    "PARAMS",
    2,
    "BLOB",
    qv.tobytes(),
    "DIALECT",
    4,
)
for n in res:
    print(n)

10
files:20240213-100-06_0283-0287_t00s02
['score', '0.0041396021843', '$', '[{"track":"20240213-100-06","segment":"0283-0287","transforms":"t00s02","pitch_histogram":[0.0,0.04786716029047966,0.2698582410812378,0.0,0.05729515850543976,0.1694573312997818,0.0,0.060355622321367264,0.0,0.3951664865016937,0.0,0.0],"clamp":[0.7596527934074402,0.0114656463265419,-0.34824737906455994,-0.4862318933010102,0.24372151494026184,0.7492575645446777,-0.3150612711906433,-0.411616176366806,-0.1630178689956665,0.19928401708602905,-0.09650658816099168,0.02925308048725128,-0.22404679656028748,0.7676444053649902,0.15005961060523987,0.2831387221813202,-0.477946013212204,-0.143755242228508,0.012199679389595984,-0.6936224102973938,-0.45638877153396606,0.4014859199523926,-0.03219752013683319,0.13961392641067505,-0.05867241322994232,-0.1358805000782013,0.18660882115364075,0.3951746225357056,-0.5462151169776917,-0.2028193175792694,0.6680154204368591,-0.07931586354970932,0.7911400198936462,1.0687260627746582,0.296

In [7]:
r.json().get("files:20231220-080-01_0000-0005_t00s00")

{'track': '20231220-080-01',
 'segment': '0000-0005',
 'transforms': 't00s00',
 'pitch_histogram': [0.0,
  0.0,
  0.18383146822452545,
  0.0,
  0.0,
  0.0,
  0.0,
  0.8161685466766357,
  0.0,
  0.0,
  0.0,
  0.0],
 'clamp': [0.7469334006309509,
  -0.6023911237716675,
  -0.5720416903495789,
  0.21939745545387268,
  0.6667446494102478,
  0.05316021293401718,
  -0.9208922386169434,
  -0.5827749371528625,
  0.3166855275630951,
  -0.7300933003425598,
  0.21709121763706207,
  -0.3671600818634033,
  0.40134063363075256,
  0.5924677848815918,
  0.2598113417625427,
  0.13732169568538666,
  -0.501660943031311,
  -0.17503714561462402,
  -0.28646379709243774,
  -0.3767327666282654,
  0.06578344851732254,
  -0.13951413333415985,
  0.045503586530685425,
  0.25990572571754456,
  0.3551941514015198,
  -0.534128725528717,
  0.1335119754076004,
  -0.14570826292037964,
  -0.6765349507331848,
  -0.6132699251174927,
  0.8570048809051514,
  -0.5646653771400452,
  0.38882240653038025,
  1.170788049697876,
  

In [17]:
dataset_path = "../data/datasets/careful"
properties_path = "../data/outputs/careful.json"
metric = "pitch_histogram"

In [18]:
# FT.CREATE idx:table ON JSON PREFIX 1 cmp: SCHEMA $.sim AS sim NUMERIC $.row_file AS row_file TEXT $.col_file AS col_file TEXT $.metric AS metric TEXT

In [19]:
# redis setup
redis_url = "redis://localhost:6379"
r = redis.Redis(host="localhost", port=6379, db=0)
# r = redis.Redis(redis_url)


# load from fs
properties = {}
with open(properties_path, "r") as f:
    properties = json.load(f)

names = list(properties.keys())
names.sort()

num_processes = cpu_count()
rows_per_process = len(names) // num_processes  # type: ignore
extra_rows = len(names) % num_processes  # type: ignore
print(f"{len(names)} & {num_processes} -> {rows_per_process} + {extra_rows}")

3868 & 12 -> 322 + 4


## general tests

In [20]:
name1 = "20240121-70-06_0096-0104.mid"  # names[0]
name2 = "20240227-76-05_0128-0136.mid"  # names[-1]
print(f"{name1} {name2} {metric}")
r.json().get(f"cmp:{name1}:{name2}:{metric}")

20240121-70-06_0096-0104.mid 20240227-76-05_0128-0136.mid pitch_histogram


{'sim': 0.9699342364326304,
 'mutations': {'shift': 0, 'trans': 10},
 'row_file': '20240121-70-06_0096-0104.mid',
 'col_file': '20240227-76-05_0128-0136.mid',
 'metric': 'pitch_histogram'}

In [21]:
def scan_keys(r, pattern):
    cursor = 0
    keys = []
    while True:
        cursor, new_keys = r.scan(cursor, match=pattern)
        keys.extend(new_keys)
        if cursor == 0:
            break
    return keys


# Pattern to match
pattern = "20231220-80-01_0000-0008.mid:*:pitch_histogram"

# Get all keys matching the pattern
# matching_keys = scan_keys(r, pattern)
# print(f"Keys matching pattern '{pattern}': {matching_keys}")

In [23]:
def process_json_keys(redis_conn):
    cursor = "0"
    while cursor != 0:
        cursor, keys = redis_conn.scan(cursor=cursor, count=1000)
        for key in keys:
            key_type = redis_conn.execute_command("TYPE", key)
            if key_type == b"ReJSON-RL":
                value = redis_conn.json().get(key)

                if value:
                    row_file, col_file, metric = str(key).split(":")

                    value["row_file"] = row_file[2:]
                    value["col_file"] = col_file
                    value["metric"] = metric

                    # print(f"Key: {key}, Data: {value}")
                    r.json().set(key, "$", value)

            else:
                # Ignore non-JSON objects
                continue
        print(f"finished section {cursor}")


# Call the function
process_json_keys(r)
print("DONE")

ValueError: too many values to unpack (expected 3)

## build df

In [None]:
r.json().set("test", "$", {"sim": 0.5}, nx=True)
print(r.json().get("test"))

{'sim': 0.5}


In [None]:
n = 4000
data = np.zeros((n, n), dtype=np.float16)

# Measure the time taken to populate the DataFrame
start_time = time.time()

for i in range(n):
    pipeline = r.pipeline()
    for j in range(n):
        pipeline.execute_command("JSON.GET", "test", "$.sim")

    results = pipeline.execute()

    data[i, :] = [float(result[0]) for result in results]

df = pd.DataFrame(data)
memory_usage = df.memory_usage(index=True).sum()
df = df.astype(np.float16)

end_time = time.time()
elapsed_time = end_time - start_time

# Verify the DataFrame
print(df.head())
print(f"Time taken to generate DataFrame: {elapsed_time:.2f} seconds")
print(f"Memory usage of DataFrame: {memory_usage / (1024 * 1024):.2f} MB")
del df

   0     1     2     3     4     5     6     7     8     9     ...  3990  \
0   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  ...   0.5   
1   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  ...   0.5   
2   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  ...   0.5   
3   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  ...   0.5   
4   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  ...   0.5   

   3991  3992  3993  3994  3995  3996  3997  3998  3999  
0   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  
1   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  
2   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  
3   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  
4   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5   0.5  

[5 rows x 4000 columns]
Time taken to generate DataFrame: 207.29 seconds
Memory usage of DataFrame: 30.52 MB


In [40]:
big_df = pd.read_feather("../outputs/records/chunks/sim.feather")
big_df

Unnamed: 0,20231220-80-01_0000-0008.mid,20231220-80-01_0008-0016.mid,20231220-80-01_0016-0024.mid,20231220-80-01_0024-0032.mid,20231220-80-01_0032-0040.mid,20231220-80-01_0040-0048.mid,20231220-80-01_0048-0056.mid,20231220-80-01_0056-0064.mid,20231220-80-01_0064-0072.mid,20231220-80-01_0072-0080.mid,...,20240227-76-05_0872-0880.mid,20240227-76-05_0880-0888.mid,20240227-76-05_0888-0896.mid,20240227-76-05_0896-0904.mid,20240227-76-05_0904-0912.mid,20240227-76-05_0912-0920.mid,20240227-76-05_0920-0928.mid,20240227-76-05_0928-0936.mid,20240227-76-05_0936-0944.mid,20240227-76-05_0944-0952.mid
20231220-80-01_0000-0008.mid,"{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 0.9965457582448798, 'transformations':...","{'sim': 0.9899494936611666, 'transformations':...","{'sim': 0.9778024140774096, 'transformations':...","{'sim': 0.943879807448539, 'transformations': ...","{'sim': 0.9463204468147676, 'transformations':...","{'sim': 0.8131156281817418, 'transformations':...",...,"{'sim': 0.8534631283962076, 'transformations':...","{'sim': 0.7576029726386662, 'transformations':...","{'sim': 0.7813903150950317, 'transformations':...","{'sim': 0.642857142857143, 'transformations': ...","{'sim': 0.7321293603831283, 'transformations':...","{'sim': 0.8626157662974291, 'transformations':...","{'sim': 0.799456337008225, 'transformations': ...","{'sim': 0.8025343000034281, 'transformations':...","{'sim': 0.7533507240540993, 'transformations':...","{'sim': 0.7931747686306172, 'transformations':..."
20231220-80-01_0008-0016.mid,"{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 0.9965457582448798, 'transformations':...","{'sim': 0.9899494936611666, 'transformations':...","{'sim': 0.9778024140774096, 'transformations':...","{'sim': 0.943879807448539, 'transformations': ...","{'sim': 0.9463204468147676, 'transformations':...","{'sim': 0.8131156281817418, 'transformations':...",...,"{'sim': 0.8534631283962076, 'transformations':...","{'sim': 0.7576029726386662, 'transformations':...","{'sim': 0.7813903150950317, 'transformations':...","{'sim': 0.642857142857143, 'transformations': ...","{'sim': 0.7321293603831283, 'transformations':...","{'sim': 0.8626157662974291, 'transformations':...","{'sim': 0.799456337008225, 'transformations': ...","{'sim': 0.8025343000034281, 'transformations':...","{'sim': 0.7533507240540993, 'transformations':...","{'sim': 0.7931747686306172, 'transformations':..."
20231220-80-01_0016-0024.mid,"{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 0.9965457582448798, 'transformations':...","{'sim': 0.9899494936611666, 'transformations':...","{'sim': 0.9778024140774096, 'transformations':...","{'sim': 0.943879807448539, 'transformations': ...","{'sim': 0.9463204468147676, 'transformations':...","{'sim': 0.8131156281817418, 'transformations':...",...,"{'sim': 0.8534631283962076, 'transformations':...","{'sim': 0.7576029726386662, 'transformations':...","{'sim': 0.7813903150950317, 'transformations':...","{'sim': 0.642857142857143, 'transformations': ...","{'sim': 0.7321293603831283, 'transformations':...","{'sim': 0.8626157662974291, 'transformations':...","{'sim': 0.799456337008225, 'transformations': ...","{'sim': 0.8025343000034281, 'transformations':...","{'sim': 0.7533507240540993, 'transformations':...","{'sim': 0.7931747686306172, 'transformations':..."
20231220-80-01_0024-0032.mid,"{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0000000000000002, 'transformations':...","{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 0.9965457582448798, 'transformations':...","{'sim': 0.9899494936611666, 'transformations':...","{'sim': 0.9778024140774096, 'transformations':...","{'sim': 0.943879807448539, 'transformations': ...","{'sim': 0.9463204468147676, 'transformations':...","{'sim': 0.8131156281817418, 'transformations':...",...,"{'sim': 0.8534631283962076, 'transformations':...","{'sim': 0.7576029726386662, 'transformations':...","{'sim': 0.7813903150950317, 'transformations':...","{'sim': 0.642857142857143, 'transformations': ...","{'sim': 0.7321293603831283, 'transformations':...","{'sim': 0.8626157662974291, 'transformations':...","{'sim': 0.799456337008225, 'transformations': ...","{'sim': 0.8025343000034281, 'transformations':...","{'sim': 0.7533507240540993, 'transformations':...","{'sim': 0.7931747686306172, 'transformations':..."
20231220-80-01_0032-0040.mid,"{'sim': 0.9965457582448798, 'transformations':...","{'sim': 0.9965457582448798, 'transformations':...","{'sim': 0.9965457582448798, 'transformations':...","{'sim': 0.9965457582448798, 'transformations':...","{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 0.998274373174996, 'transformations': ...","{'sim': 0.9860251439617608, 'transformations':...","{'sim': 0.9518172686249524, 'transformations':...","{'sim': 0.9561495664805124, 'transformations':...","{'sim': 0.7934255358474034, 'transformations':...",...,"{'sim': 0.8474334841157704, 'transformations':...","{'sim': 0.7582124648373588, 'transformations':...","{'sim': 0.7769835478923697, 'transformations':...","{'sim': 0.6228410989030498, 'transformations':...","{'sim': 0.7144004000643362, 'transformations':...","{'sim': 0.831251872614463, 'transformations': ...","{'sim': 0.758756972902415, 'transformations': ...","{'sim': 0.7640004302475342, 'transformations':...","{'sim': 0.761977612286838, 'transformations': ...","{'sim': 0.7501813194503223, 'transformations':..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20240227-76-05_0912-0920.mid,"{'sim': 0.8626157662974291, 'transformations':...","{'sim': 0.8626157662974291, 'transformations':...","{'sim': 0.8626157662974291, 'transformations':...","{'sim': 0.8626157662974291, 'transformations':...","{'sim': 0.831251872614463, 'transformations': ...","{'sim': 0.8056094727078104, 'transformations':...","{'sim': 0.7957243195347741, 'transformations':...","{'sim': 0.7681184937687652, 'transformations':...","{'sim': 0.7624036113378543, 'transformations':...","{'sim': 0.7708852926477885, 'transformations':...",...,"{'sim': 0.8349559988330127, 'transformations':...","{'sim': 0.8457507107007098, 'transformations':...","{'sim': 0.823305575653129, 'transformations': ...","{'sim': 0.6277796681948944, 'transformations':...","{'sim': 0.6985736519140601, 'transformations':...","{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 0.9851766296709796, 'transformations':...","{'sim': 0.9844184570401529, 'transformations':...","{'sim': 0.962045364823023, 'transformations': ...","{'sim': 0.9753866695556777, 'transformations':..."
20240227-76-05_0920-0928.mid,"{'sim': 0.7994563370082252, 'transformations':...","{'sim': 0.7994563370082252, 'transformations':...","{'sim': 0.7994563370082252, 'transformations':...","{'sim': 0.7994563370082252, 'transformations':...","{'sim': 0.758756972902415, 'transformations': ...","{'sim': 0.7268155677785234, 'transformations':...","{'sim': 0.7311916410746073, 'transformations':...","{'sim': 0.7186578032750838, 'transformations':...","{'sim': 0.7265203749494369, 'transformations':...","{'sim': 0.7429147904803625, 'transformations':...",...,"{'sim': 0.7946116145953565, 'transformations':...","{'sim': 0.850453079394491, 'transformations': ...","{'sim': 0.8297853200806409, 'transformations':...","{'sim': 0.628144264792177, 'transformations': ...","{'sim': 0.6793724050611532, 'transformations':...","{'sim': 0.9851766296709796, 'transformations':...","{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 0.9985274292887676, 'transformations':...","{'sim': 0.9927544126397548, 'transformations':...","{'sim': 0.9981338997523383, 'transformations':..."
20240227-76-05_0928-0936.mid,"{'sim': 0.8025343000034281, 'transformations':...","{'sim': 0.8025343000034281, 'transformations':...","{'sim': 0.8025343000034281, 'transformations':...","{'sim': 0.8025343000034281, 'transformations':...","{'sim': 0.7640004302475342, 'transformations':...","{'sim': 0.733568405374703, 'transformations': ...","{'sim': 0.724567225155638, 'transformations': ...","{'sim': 0.7126268242184063, 'transformations':...","{'sim': 0.7193208367710449, 'transformations':...","{'sim': 0.7400908033911633, 'transformations':...",...,"{'sim': 0.8116788047290703, 'transformations':...","{'sim': 0.8517356546420747, 'transformations':...","{'sim': 0.8251217494134858, 'transformations':...","{'sim': 0.6193775172500324, 'transformations':...","{'sim': 0.6824998474012772, 'transformations':...","{'sim': 0.9844184570401529, 'transformations':...","{'sim': 0.9985274292887676, 'transformations':...","{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 0.9906349377629022, 'transformations':...","{'sim': 0.9962265598515896, 'transformations':..."
20240227-76-05_0936-0944.mid,"{'sim': 0.7533507240540993, 'transformations':...","{'sim': 0.7533507240540993, 'transformations':...","{'sim': 0.7533507240540993, 'transformations':...","{'sim': 0.7533507240540993, 'transformations':...","{'sim': 0.761977612286838, 'transformations': ...","{'sim': 0.7785606697091179, 'transformations':...","{'sim': 0.7892444535289956, 'transformations':...","{'sim': 0.7813983916059347, 'transformations':...","{'sim': 0.7890769196279143, 'transformations':...","{'sim': 0.7460681857293563, 'transformations':...",...,"{'sim': 0.8117601749610763, 'transformations':...","{'sim': 0.8630530705855471, 'transformations':...","{'sim': 0.8381070343133826, 'transformations':...","{'sim': 0.663666114047659, 'transformations': ...","{'sim': 0.6859021515814326, 'transformations':...","{'sim': 0.962045364823023, 'transformations': ...","{'sim': 0.9927544126397548, 'transformations':...","{'sim': 0.9906349377629022, 'transformations':...","{'sim': 1.0, 'transformations': {'shift': 0, '...","{'sim': 0.995897977082197, 'transformations': ..."


In [41]:
a, b = "".split("_")
a

ValueError: not enough values to unpack (expected 2, got 1)