# Clonamos el repositorio para obtener los dataSet

In [None]:
!git clone https://github.com/joanby/tensorflow.git

Cloning into 'tensorflow'...
remote: Enumerating objects: 51, done.[K
remote: Counting objects: 100% (51/51), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 60311 (delta 32), reused 37 (delta 23), pack-reused 60260[K
Receiving objects: 100% (60311/60311), 442.46 MiB | 14.79 MiB/s, done.
Resolving deltas: 100% (82/82), done.
Checking out files: 100% (60225/60225), done.


# Damos acceso a nuestro Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Test it

In [None]:
!ls '/content/drive/My Drive' 

# Google colab tools

In [1]:
from google.colab import files # Para manejar los archivos y, por ejemplo, exportar a su navegador
import glob # Para manejar los archivos y, por ejemplo, exportar a su navegador
from google.colab import drive # Montar tu Google drive

##Especificando la versión de TensorFlow

Ejecutando "importar tensorflow" importará la versión por defecto (actualmente 2.x). Puedes usar la 1.x ejecutando una celda con la "versión mágica de tensorflow" **antes de ejecutar "importar tensorflow".

### Si no funciona hacer el pip install


In [2]:
#!pip install tensorflow==1.14
%tensorflow_version 1.x

TensorFlow 1.x selected.


# Importar Tensorflow

In [3]:
import tensorflow as tf
print(tf.__version__)
import matplotlib.pyplot as plt

1.15.2


In [142]:
session = tf.Session()

# Correspondencia en direcciones postales

In [161]:
import random
import string
import numpy as np

In [162]:
n = 10
street_names = ["diagon", "elm", "abbey", "gran", "python"]
street_type = ["callejon", "calle", "carrera", "via", "avenida"]
street_zips = [random.randint(20000, 29999) for i in range(5)]
numbers = [random.randint(1,999) for i in range(n)]

In [163]:
streets = [random.choice(street_names) for i in range(n)]
street_prefs = [random.choice(street_type) for i in range(n)]
zips = [random.choice(street_zips) for i in range(n)]
full_streets = [x + " " + y + " " + str(z) for x,y,z in zip(street_prefs, streets, numbers)]
reference_data = [list(x) for x in zip(full_streets, zips)]

In [164]:
reference_data

[['callejon python 861', 25879],
 ['carrera elm 482', 21630],
 ['callejon diagon 940', 25879],
 ['carrera abbey 861', 25879],
 ['avenida elm 431', 21630],
 ['calle abbey 104', 29700],
 ['calle diagon 760', 29700],
 ['avenida diagon 460', 29700],
 ['calle diagon 297', 21630],
 ['via diagon 922', 29712]]

In [165]:
def create_typo(s, prob=0.75):
    if random.uniform(0,1)<0.75:
        rand_idx = random.choice(range(len(s)))
        s_list = list(s)
        s_list[rand_idx] = random.choice(string.ascii_lowercase)
        s = ''.join(s_list)
    return s

In [166]:
typo_streets = [create_typo(x) for x in streets]

In [167]:
typo_full_streets = [x+" "+y+" "+str(z) for x,y,z in zip(street_prefs, typo_streets, numbers)]
test_data = [list(x) for x in zip(typo_full_streets, zips)]

In [168]:
test_data

[['callejon python 861', 25879],
 ['carrera elm 482', 21630],
 ['callejon diagmn 940', 25879],
 ['carrera afbey 861', 25879],
 ['avenida elm 431', 21630],
 ['calle wbbey 104', 29700],
 ['calle diagoo 760', 29700],
 ['avenida diagon 460', 29700],
 ['calle diagon 297', 21630],
 ['via diagoc 922', 29712]]

In [170]:
session = tf.Session()

In [171]:
test_address = tf.sparse_placeholder(dtype = tf.string)
test_zip = tf.placeholder(shape = [None, 1], dtype=tf.float32)

ref_address = tf.sparse_placeholder(dtype = tf.string)
ref_zip = tf.placeholder(shape=[None, n], dtype=tf.float32 )

In [172]:
zip_dist = tf.square(tf.subtract(ref_zip, test_zip))
address_dist = tf.edit_distance(test_address, ref_address, normalize=True)

- $S(x,y) = 0$ si $x$ e $y$ son totalmente diferentes (no se parecen en nada)
- $S(x,x) = 1$, ya que todo objeto es similar (si no igual) a si mismo.
- $S(x,y) = \frac{D - d(x,y)}{D-d}$ donde $D$ es la mayor distancia entre dos objetos posibles, y $d$ es la menor.

In [173]:
zip_max = tf.gather(tf.squeeze(zip_dist), tf.argmax(zip_dist, 1))
zip_min = tf.gather(tf.squeeze(zip_dist), tf.argmin(zip_dist, 1))
zip_sim = tf.divide(tf.subtract(zip_max, zip_dist), tf.subtract(zip_max, zip_min))

In [174]:
address_sim = tf.subtract(1.0, address_dist)

$$S(x,y) = \sum_{i=1}^k w_iS_k(x,y):\quad \sum_{i=1}^kw_i = 1$$

In [175]:
address_wi = 0.5
zip_wi = 1.0 -address_wi

In [176]:
weighted_sim = tf.add(tf.transpose(tf.multiply(address_wi, address_sim)),tf.multiply(zip_wi, zip_sim))

In [177]:
top_match_idx = tf.argmax(weighted_sim, 1)

In [178]:
def sparse_from_word_vector(word_vector):
    num_words = len(word_vector)
    idx = [[xi, 0, yi] for xi, x in enumerate(word_vector) for yi, y in enumerate(x)]
    chars = list(''.join(word_vector))
    return tf.SparseTensorValue(idx, chars, [num_words,1,1])

In [179]:
reference_address = [x[0] for x in reference_data]
reference_zips = np.array([[x[1] for x in reference_data]])

In [180]:
sparse_ref_set = sparse_from_word_vector(reference_address)

In [181]:
for i in range(n):
    test_address_entry = test_data[i][0]
    test_zip_entry = [[test_data[i][1]]]
    
    test_address_rep = [test_address_entry]*n
    sparse_test_set = sparse_from_word_vector(test_address_rep)
    
    feed_dict = {test_address: sparse_test_set,
                test_zip: test_zip_entry,
                ref_address: sparse_ref_set,
                ref_zip: reference_zips}
    
    best_match = session.run(top_match_idx, feed_dict=feed_dict)
    best_address = reference_address[best_match[0]]
    [best_zip] = reference_zips[0][best_match]
    [[test_zip_aux]] = test_zip_entry
    
    print("Dirección original: "+str(test_address_entry)+ ", "+str(test_zip_aux))
    print("Dirección corregida: "+str(best_address)+", "+str(best_zip)+"\n")

Dirección original: callejon python 861, 25879
Dirección corregida: callejon python 861, 25879

Dirección original: carrera elm 482, 21630
Dirección corregida: carrera elm 482, 21630

Dirección original: callejon diagmn 940, 25879
Dirección corregida: callejon diagon 940, 25879

Dirección original: carrera afbey 861, 25879
Dirección corregida: carrera abbey 861, 25879

Dirección original: avenida elm 431, 21630
Dirección corregida: avenida elm 431, 21630

Dirección original: calle wbbey 104, 29700
Dirección corregida: calle abbey 104, 29700

Dirección original: calle diagoo 760, 29700
Dirección corregida: calle diagon 760, 29700

Dirección original: avenida diagon 460, 29700
Dirección corregida: avenida diagon 460, 29700

Dirección original: calle diagon 297, 21630
Dirección corregida: calle diagon 297, 21630

Dirección original: via diagoc 922, 29712
Dirección corregida: via diagon 922, 29712

