In [93]:
import kmapper as km
import numpy as np
import pandas as pd
from faker import Faker
from sklearn.cluster import DBSCAN

In [94]:
# Crea una instancia de Faker
faker = Faker()

# Genera una lista de 500 nombres aleatorios en inglés
nombres = [faker.name() for _ in range(500)]

In [95]:
# crear DataFrame con 3 columnas y 500 filas
df = pd.DataFrame(np.random.randint(1, 101, size=(500, 3)), columns=['Parcial 1', 'Parcial 2', 'Parcial 3'])

# agregar cuarta columna con promedio de toda la fila
df['Promedio'] = df.mean(axis=1)
df["Promedio"] = df["Promedio"].astype(int)
df["Nombre"] = nombres
df["Nombre"] = df["Nombre"] + ' ' + df["Promedio"].astype(str)

In [96]:
# mostrar primeras 3 filas del DataFrame
print(df.head(3))

   Parcial 1  Parcial 2  Parcial 3  Promedio           Nombre
0         37         47         58        47    Glenn Paul 47
1          6         38         57        33   Frank Baker 33
2         89         41         47        59  Kristen Bell 59


In [97]:
df

Unnamed: 0,Parcial 1,Parcial 2,Parcial 3,Promedio,Nombre
0,37,47,58,47,Glenn Paul 47
1,6,38,57,33,Frank Baker 33
2,89,41,47,59,Kristen Bell 59
3,13,77,23,37,Adam Ponce 37
4,44,81,45,56,Mary Lowery 56
...,...,...,...,...,...
495,69,95,49,71,Clinton Bartlett 71
496,48,77,79,68,Lauren Salazar 68
497,42,7,17,22,Michael Reid 22
498,44,61,32,45,Jose Guzman 45


In [98]:
def metrica(v, w):
    var = 0
    for i in range(0, 3):
        if var < abs(v[i] - w[i]):
            var = abs(v[i] - w[i])
    return var

In [99]:
df.iloc[1]

Parcial 1                 6
Parcial 2                38
Parcial 3                57
Promedio                 33
Nombre       Frank Baker 33
Name: 1, dtype: object

In [100]:
df.iloc[0]

Parcial 1               37
Parcial 2               47
Parcial 3               58
Promedio                47
Nombre       Glenn Paul 47
Name: 0, dtype: object

In [101]:
metrica(df.iloc[1], df.iloc[0])

31

In [102]:
df.Nombre

0            Glenn Paul 47
1           Frank Baker 33
2          Kristen Bell 59
3            Adam Ponce 37
4           Mary Lowery 56
              ...         
495    Clinton Bartlett 71
496      Lauren Salazar 68
497        Michael Reid 22
498         Jose Guzman 45
499        Lauren Duran 78
Name: Nombre, Length: 500, dtype: object

In [103]:
X_array = df[['Parcial 1', 'Parcial 2', 'Parcial 3']].to_numpy()

In [104]:
X_array

array([[37, 47, 58],
       [ 6, 38, 57],
       [89, 41, 47],
       ...,
       [42,  7, 17],
       [44, 61, 32],
       [99, 39, 98]])

In [105]:
dbscan = DBSCAN(eps=10, min_samples=5, metric=metrica).fit(X_array)

In [106]:
mapper = km.KeplerMapper(verbose=2)

lens2 = mapper.fit_transform(X_array, projection="x[1]")

lens1 = mapper.fit_transform(X_array, projection="l2norm")

lens = np.c_[lens1, lens2]

KeplerMapper(verbose=2)
..Composing projection pipeline of length 1:
	Projections: x[1]
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (500, 3)

..Projecting data using: x[1]

..Scaling with: MinMaxScaler()

..Composing projection pipeline of length 1:
	Projections: l2norm
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (500, 3)

..Projecting data using: l2norm

..Scaling with: MinMaxScaler()



In [107]:
graph = mapper.map(
    X_array,
    clusterer=dbscan,
    cover=km.Cover(n_cubes=3, perc_overlap=0.4),
)

informacion = mapper.data_from_cluster_id('cube2_cluster0', graph, X_array)

print(informacion)

Mapping on data shaped (500, 3) using lens shaped (500, 3)

Minimal points in hypercube before clustering: 5
Creating 27 hypercubes.
   > Found 3 clusters in hypercube 0.
   > Found 4 clusters in hypercube 1.
   > Found 1 clusters in hypercube 2.
   > Found 3 clusters in hypercube 3.
   > Found 3 clusters in hypercube 4.
   > Found 1 clusters in hypercube 5.
   > Found 1 clusters in hypercube 6.
   > Found 2 clusters in hypercube 7.
   > Found 1 clusters in hypercube 8.
   > Found 4 clusters in hypercube 9.
   > Found 6 clusters in hypercube 10.
   > Found 1 clusters in hypercube 11.
   > Found 2 clusters in hypercube 12.
   > Found 3 clusters in hypercube 13.
   > Found 4 clusters in hypercube 14.
   > Found 2 clusters in hypercube 15.
   > Found 3 clusters in hypercube 16.
   > Found 3 clusters in hypercube 17.
   > Found 3 clusters in hypercube 18.
   > Found 2 clusters in hypercube 19.
   > Found 1 clusters in hypercube 20.
   > Found 2 clusters in hypercube 21.
   > Found 2 cluste

In [108]:
mapper.visualize(
    graph,
    path_html="promedios.html",
    title="Promedio de 500 alumnos",
    color_values=lens,
    colorscale=None,
    nbins=8,
    color_function_name=['Proyeccion en el eje x', 'Norma L2', 'kjkj', 'dlkdl'],
    node_color_function=['mean', 'std', 'median', 'max'],
    custom_tooltips=df.Nombre,
    include_searchbar=True
)

Wrote visualization to: promedios.html


'<!DOCTYPE html>\n<html>\n\n<head>\n  <meta charset="utf-8">\n  <meta name="generator" content="KeplerMapper">\n  <title>Promedio de 500 alumnos | KeplerMapper</title>\n\n  <link rel="icon" type="image/png" href="http://i.imgur.com/axOG6GJ.jpg" />\n\n  <link href=\'https://fonts.googleapis.com/css?family=Roboto+Mono:700,300\' rel=\'stylesheet\' type=\'text/css\'>\n  <style>* {\n  margin: 0;\n  padding: 0;\n}\n\nhtml, body {\n  height: 100%;\n}\n\nbody {\n  font-family: "Roboto Mono", "Helvetica", sans-serif;\n  font-size: 14px;\n}\n\n#logo {\n  width:  85px;\n  height: 85px;\n}\n\n#display {\n  color: #95A5A6;\n  background: #212121;\n}\n\n#header {\n  background: #111111;\n}\n\n#print {\n  color: #000;\n  background: #FFF;\n}\n\nh1 {\n  font-size: 21px;\n  font-weight: 300;\n  font-weight: 300;\n}\n\nh2 {\n  font-size: 18px;\n  padding-bottom: 20px;\n  font-weight: 300;\n}\n\nh3 {\n  font-size: 14px;\n  font-weight: 700;\n  text-transform: uppercase;\n}\n\nh4 {\n  font-size: 13px;\n  