In [1]:
from src.linear_hashing import LinearHashing
from random import sample, randint
from math import ceil
import pandas as pd
import plotly.express as px


def generate_keys(amount, start=0, end=10**5):
    random_numbers = set()
    while len(random_numbers) < amount:
        random_numbers.add(randint(start, end))

    return list(random_numbers)


def average(numbers):
    return sum(numbers) / len(numbers)


def linePlot(data, x, y, y_range, color=None, markers=True, title=None):
    if not color:
        plot_data = data.groupby(x)[y].mean().reset_index()
    else:
        plot_data = data.groupby([x, color])[y].mean().reset_index()

    fig = px.line(
        plot_data,
        x=x,
        y=y,
        title=title or f"{y} em funcao do {x}",
        markers=markers,
        color=color,
    )
    fig.update_yaxes(range=y_range)
    fig.show()


test_ps = [1, 5, 10, 20, 50]
test_alphas = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# Desempenho quanto ao espaço

In [2]:
data = []

for p in test_ps:
    for alpha in test_alphas:
        alphas = []
        p_stars = []

        for _ in range(10):
            linear_hashing = LinearHashing(p, alpha)
            keys = generate_keys(p * 1000)
            for key in keys:
                linear_hashing.insert(key)

            alphas.append(linear_hashing.alpha)
            p_stars.append(linear_hashing.p_star)

        avg_alpha = average(alphas)
        avg_p_star = average(p_stars)

        data.append([p, alpha, avg_alpha, avg_p_star])

data = pd.DataFrame(data, columns=["p", "alpha máximo", "alpha médio", "p* médio"])
data

Unnamed: 0,p,alpha máximo,alpha médio,p* médio
0,1,0.2,0.199988,1.658329
1,1,0.3,0.299895,1.627163
2,1,0.4,0.399904,1.533194
3,1,0.5,0.499925,1.579246
4,1,0.6,0.59988,1.580353
5,1,0.7,0.69979,1.687078
6,1,0.8,0.799744,1.837145
7,1,0.9,0.898957,2.13585
8,5,0.2,0.199996,1.17451
9,5,0.3,0.29994,1.109912


In [3]:
linePlot(data, x="alpha máximo", y="alpha médio", y_range=[0, 1])

In [4]:
linePlot(data, x="p", y="alpha médio", y_range=[0, 1])

In [5]:
linePlot(data, x="alpha máximo", y="p* médio", y_range=[0, 4])

In [6]:
linePlot(data, x="p", y="p* médio", y_range=[0, 4])

# Desempenho quanto ao número médio de acessos


In [7]:
data = []

for p in test_ps:
    for alpha in test_alphas:
        S_avg_accesses = []
        C_avg_accesses = []

        for _ in range(10):
            linear_hashing = LinearHashing(p, alpha)

            size = p * 1000
            sample_size = ceil(size * 0.20)

            random_keys = generate_keys(size + sample_size)

            keys = random_keys[sample_size:]

            for key in keys:
                linear_hashing.insert(key)

            S_sample = random_keys[:sample_size]
            C_sample = sample(keys, sample_size)

            S_sample_search_accesses = []
            C_sample_search_accesses = []
            for i in range(sample_size):
                S_sample_search_accesses.append(linear_hashing.search(S_sample[i])[1])
                C_sample_search_accesses.append(linear_hashing.search(C_sample[i])[1])

            S_avg_accesses.append(average(S_sample_search_accesses))
            C_avg_accesses.append(average(C_sample_search_accesses))

        data.append([p, alpha, average(S_avg_accesses), "Sem sucesso"])
        data.append([p, alpha, average(C_avg_accesses), "Com sucesso"])


data = pd.DataFrame(
    data, columns=["p", "alpha máximo", "número médio de acessos", "tipo"]
)
data

Unnamed: 0,p,alpha máximo,número médio de acessos,tipo
0,1,0.2,0.01100,Sem sucesso
1,1,0.2,1.25400,Com sucesso
2,1,0.3,0.01600,Sem sucesso
3,1,0.3,1.28550,Com sucesso
4,1,0.4,0.02300,Sem sucesso
...,...,...,...,...
75,50,0.7,22.45100,Com sucesso
76,50,0.8,47.58208,Sem sucesso
77,50,0.8,24.50705,Com sucesso
78,50,0.9,135.15125,Sem sucesso


In [8]:
linePlot(data, x="p", y="número médio de acessos", y_range=[0, 60], color="tipo")

In [9]:
linePlot(
    data, x="alpha máximo", y="número médio de acessos", y_range=[0, 60], color="tipo"
)

# Desempenho durante a inclusão dos n registros


In [10]:
data = []

p = 10
max_alpha = 0.85

alphas = []
p_stars = []
l_maxes = []


def iteration_averages(arr):
    return [sum(item[i] for item in arr) / len(arr) for i in range(len(arr[0]))]


for _ in range(10):
    i_alphas = []
    i_p_stars = []
    i_l_maxes = []

    linear_hashing = LinearHashing(p, max_alpha)
    keys = generate_keys(10000)
    for key in keys:
        linear_hashing.insert(key)
        i_alphas.append(linear_hashing.alpha)
        i_p_stars.append(linear_hashing.p_star)
        i_l_maxes.append(linear_hashing.l_max)

    alphas.append(i_alphas)
    p_stars.append(i_p_stars)
    l_maxes.append(i_l_maxes)


average_ith_alphas = iteration_averages(alphas)
average_ith_p_stars = iteration_averages(p_stars)
average_ith_l_maxes = iteration_averages(l_maxes)


for i in range(len(average_ith_alphas)):
    data.append([i + 1, average_ith_alphas[i], "alpha médio"])
    data.append([i + 1, average_ith_p_stars[i], "p*"])
    data.append([i + 1, average_ith_l_maxes[i], "L max"])

data = pd.DataFrame(data, columns=["i", "valores observados", "tipo"])
data

Unnamed: 0,i,valores observados,tipo
0,1,0.050000,alpha médio
1,1,1.000000,p*
2,1,1.000000,L max
3,2,0.100000,alpha médio
4,2,1.000000,p*
...,...,...,...
29995,9999,2.943371,p*
29996,9999,6.000000,L max
29997,10000,0.848034,alpha médio
29998,10000,2.943615,p*


In [12]:
linePlot(
    data,
    x="i",
    y="valores observados",
    y_range=None,
    color="tipo",
    markers=False,
    title="Alpha médio, Número de páginas e L max por iteração",
)