# Realizar Predicciones con Modelo Previamente Exportado

## Importar Librerias

In [276]:
import os
import tensorflow as tf
import pandas as pd

## Crear Variables Globales

In [277]:
dir = os.path.dirname(os.path.realpath('__file__'))

CLASS_OF_INCOME = [" <=50K", " >50K"]

## Cargar Modelo

Carga el modelo desde el directorio de exportación y crea una función de predicción.

In [278]:
predict_fn = tf.contrib.predictor.from_saved_model(dir + '/export/1536099145')

INFO:tensorflow:Restoring parameters from /notebooks/export/1536099145/variables/variables


## Obtener Datos

Obtener los datos nuevos en base a los cuales se desean hacer predicciones

In [279]:
# extraer datos previamente guardados
inputs = pd.read_csv('./testing_data/testing_data.csv')

# guardar resultados de prediccion (para este conjunto de datos si se cuenta con ellos)
true_results = inputs['income_bracket']

# quitar resultados de las columnas de datos
inputs = inputs.drop('income_bracket', axis=1)

## Conversion de Input Data

Convertir datos de entrada (input data) a strings serializados.

In [280]:
examples = []

for index, row in inputs.iterrows():
    feature = {}
    for col, value in row.iteritems():
        if 'int' in str(type(value)):
            feature[col] = tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
        elif 'str' in str(type(value)):
            feature[col] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[str.encode(value)]))
    
    example = tf.train.Example(
        features=tf.train.Features(
            feature=feature
        )
    )
    examples.append(example.SerializeToString())

## Realizar predicciones

In [281]:
predictions = predict_fn({'inputs': examples})

In [38]:
# predictions format 

# {
#     'classes': 
#     array([
#         [b'0', b'1'],
#         [b'0', b'1'],
#         [b'0', b'1'],
#         ...,
#         [b'0', b'1'],
#         [b'0', b'1'],
#         [b'0', b'1']
#     ], dtype=object),
#     'scores': 
#     array([
#         [9.9974567e-01, 2.5436125e-04],
#         [5.6878764e-02, 9.4312125e-01],
#         [5.7067543e-01, 4.2932451e-01],
#         ...,
#         [9.8696136e-01, 1.3038697e-02],
#         [9.9976856e-01, 2.3141838e-04],
#         [3.3010733e-31, 1.0000000e+00]
#     ], dtype=float32)
# }

## Procesar Predicciones

Procesar las predicciones para obtener el resultado de predicción (CLASE DE INGRESO **<=50K** o **>50K**) y la probabilidad (%) para este resultado.

En este caso se busca la clase con mayor probabilidad (mas cercana al 100%) y se agrega al DataFrame de **inputs** para identificar los resultados de cada fila de datos ingresados.

In [285]:
# diccionario que guarda los valores de predicción
probabilities = {'CLASS_OF_INCOME': [], 'PROBABILITY %': []}

for classes, scores in zip(predictions['classes'], predictions['scores']):
    '''
    la probabilidad mayor
    
    Ej:
    
    [5.6878764e-02, 9.4312125e-01]
    
    probabilidad mayor: 9.4312125e-01
    
    '''
    probability = float("%.2f" % (scores.max(axis=0) * 100))
    
    '''
    la posicion en la que se encuentra la probabilidad mayor en el np.ndarray()
    
    Ej:
    
    [5.6878764e-02, 9.4312125e-01]
    
    posicion: 1 
    
    '''
    index_of_max = list(scores).index(scores.max(axis=0))
    
    '''
    la clase a la que pertenece segun la posicion de la probabilidad mayor

    Ej:

    [b'0', b'1']

    posicion 1 --> pertenece a la clase 1
    
    '''
    class_of_max = list(classes)[index_of_max]
    
    # llenar el diccionario
    probabilities['CLASS_OF_INCOME'].append(CLASS_OF_INCOME[int(class_of_max)])
    probabilities['PROBABILITY %'].append(probability)
    
# convertir dict() a Pandas DataFrame
probabilities = pd.DataFrame(probabilities)

inputs['CLASS_OF_INCOME'] = probabilities['CLASS_OF_INCOME']
inputs['PROBABILITY %'] = probabilities['PROBABILITY %']

In [286]:
inputs

Unnamed: 0,age,workclass,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country,CLASS_OF_INCOME,PROBABILITY %
0,25,Private,Some-college,10,Divorced,Other-service,Own-child,Black,Male,0,0,38,United-States,<=50K,99.98
1,51,Local-gov,Masters,14,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,80,United-States,>50K,95.61
2,32,Private,Assoc-acdm,12,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,62,United-States,<=50K,55.39
3,51,Self-emp-inc,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,25,United-States,>50K,75.53
4,42,Private,Some-college,10,Divorced,Sales,Not-in-family,White,Male,0,0,50,United-States,<=50K,93.50
5,39,Private,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,1887,40,United-States,>50K,99.64
6,24,Private,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,45,United-States,<=50K,79.08
7,52,Private,HS-grad,9,Divorced,Adm-clerical,Unmarried,White,Female,0,1741,38,United-States,>50K,90.86
8,58,?,1st-4th,2,Married-spouse-absent,?,Unmarried,Amer-Indian-Eskimo,Male,0,0,40,United-States,<=50K,99.66
9,43,Private,Masters,14,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,55,United-States,>50K,88.81
