# Funciones hash

### Método de división

In [16]:
def division(x=0, M=97):
    return abs(x) % M

print(division(30501))
print(division(30502))
print(division(-30501))
for i in range(10):
    print(i, '=>', division(i))

43
44
43
0 => 0
1 => 1
2 => 2
3 => 3
4 => 4
5 => 5
6 => 6
7 => 7
8 => 8
9 => 9


### Método de la mitad del cuadrado

In [17]:
def mitad_cuadrado(x=0, k=10, w=32):
    mask = (1 << k) - 1
    return ((x**2) >> (w-k)) & mask

print(mitad_cuadrado(30501))
print(mitad_cuadrado(30502))
print(mitad_cuadrado(-30501))
for i in range(10):
    print(i, '=>', mitad_cuadrado(i))

221
221
221
0 => 0
1 => 0
2 => 0
3 => 0
4 => 0
5 => 0
6 => 0
7 => 0
8 => 0
9 => 0


### Método de Fibonacci

In [18]:
from math import sqrt
((1+sqrt(5))/2)**2

2.618033988749895

In [19]:
(1-sqrt(5))/2

-0.6180339887498949

In [20]:
def fibonacci(x=0, k=10, w=32, a=2654435769):
    mask = (1 << k) - 1
    return ((x * a) >> (w-k)) & mask

print(fibonacci(30501))
print(fibonacci(30502))
print(fibonacci(-30501))
for i in range(10):
    print(i, '=>', fibonacci(i))

670
279
353
0 => 0
1 => 632
2 => 241
3 => 874
4 => 483
5 => 92
6 => 725
7 => 334
8 => 966
9 => 575


### Datos faltantes

In [21]:
import pandas as pd
from io import StringIO

In [22]:
datos = \
"""A,B,C,D
1.2,2.5,3.9,4.5
5.2,6.8,,8.4
9.1,0.7,1.3"""
df = pd.read_csv(StringIO(datos))
df

Unnamed: 0,A,B,C,D
0,1.2,2.5,3.9,4.5
1,5.2,6.8,,8.4
2,9.1,0.7,1.3,


In [23]:
df.isnull().sum()

A    0
B    0
C    1
D    1
dtype: int64

Deshacerse de valores nulos

In [24]:
df.dropna(axis=1) # Por columna

Unnamed: 0,A,B
0,1.2,2.5
1,5.2,6.8
2,9.1,0.7


In [25]:
df.dropna(axis=0) # Por fila

Unnamed: 0,A,B,C,D
0,1.2,2.5,3.9,4.5


Sustitución de faltantes

In [27]:
from sklearn.impute import SimpleImputer
import numpy as np

Sustituye valores faltantes con la media

In [29]:
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(df.values)
datos_n = imp.transform(df.values)
datos_n

array([[1.2 , 2.5 , 3.9 , 4.5 ],
       [5.2 , 6.8 , 2.6 , 8.4 ],
       [9.1 , 0.7 , 1.3 , 6.45]])

Sustituye valores faltantes con el dato más freuente

In [30]:
df = pd.DataFrame([['a', np.nan],
                  ['b', 'y'],
                  ['c', 'x'],
                  ['a', 'y'],
                  [np.nan, 'z']], dtype='category')

imp = SimpleImputer(strategy='most_frequent')
imp.fit_transform(df)

array([['a', 'y'],
       ['b', 'y'],
       ['c', 'x'],
       ['a', 'y'],
       ['a', 'z']], dtype=object)