# Chord DHT Analysis

## Load the data

In [1]:
import ast

import pandas as pd

df = pd.read_csv('../data/computer_scientists.pp.csv')

# Keep only the last names, the first education and the number of awards of each scientist.
df['name'] = df['name'].apply(lambda x: x.split(' ')[-1])
df['education'] = df['education'].apply(ast.literal_eval).apply(lambda x: x[0] if x else None)
df['awards'] = df['awards'].apply(ast.literal_eval).apply(lambda x: len(x))

# Drop all rows with missing values in the education column.
df = df.dropna(subset=["education"], ignore_index=True)

df

Unnamed: 0,name,education,awards
0,Khan,University of Malaya,0
1,Aaronson,Cornell University,4
2,Abebe,Cornell University,0
3,Abelson,Princeton University,1
4,Abiteboul,University of Southern California,6
...,...,...,...
523,Zedan,University of Bristol,0
524,Zilberstein,"University of California, Berkeley",0
525,Zimmerman,Purdue University,0
526,Zuckerberg,Harvard University,0


## Benchmark the Chord DHT

In [3]:
from src.chord_dht.chord import Chord
import logging

logging.disable()

M = [1, 2, 4, 8]

chords = {m: Chord(m) for m in M}

for m, chord in chords.items():
    for i in range(2 ** m):
        chord.join(i)

### Insert

In [4]:
for _, row in df.iterrows():
    for m, chord in chords.items():
        chord.insert(row['education'], {'name': row['name'], 'awards': row['awards']})

### Lookup

In [6]:
from decimal import Decimal, getcontext
import time

# Set the precision (number of significant digits)
getcontext().prec = 50

key = 'Yale University'

for m, chord in chords.items():
    t1 = Decimal(time.time())
    value = chord.lookup(key)
    t2 = Decimal(time.time())

    elapsed_time = t2 - t1
    print(f'{m} bits: {value} {elapsed_time} seconds')

1 bits: None 0E-21 seconds
2 bits: None 0E-21 seconds
4 bits: None 0E-21 seconds
8 bits: None 0E-21 seconds


### Deletion

### Deletion