In [1]:
# cargar de parametros y seleccion de modelo
def load_params():
    import os
    from dotenv import load_dotenv
    load_dotenv()
    model_name = os.getenv("MODEL")
    feature_train = int(os.getenv("FEATURE_TRAIN"))
    return model_name, feature_train
def load_model(model_name):
    import joblib
    model = joblib.load('models/'+model_name+'.pkl')
    return model

def load_preprocessor():
    import joblib
    # carga de preprocesador
    preprocessor = joblib.load('models/preprocessor.pkl')
    pca = joblib.load('models/pca.pkl')
    return preprocessor, pca

def predict(input_file, output_file):
    # carga de parametros
    model_name, feature_train = load_params()

    # carga de los datos de entrada
    import pandas as pd
    print('Carga de datos')
    data = pd.read_parquet(input_file)

    # carga de preprocesador y modelo
    print('Carga modelo y preprocesador')
    preprocessor, pca = load_preprocessor()
    model = load_model(model_name)

    # aplicar preprocesador
    X = preprocessor.transform(data)
    if X.shape[1]>feature_train:
        X = pca.transform(X)

    # realizamos predicciones
    print('Realizando predicciones')
    predictions = model.predict(X)

    pd.DataFrame(predictions).to_parquet(output_file)
    print('Predicciones guardadas')

In [7]:
input_file = 'data/input/batch1.parquet'

In [8]:
import pandas as pd
df = pd.read_parquet(input_file).head(50)

data_json = df.to_dict(orient='records')

In [19]:
# carga de parametros
model_name, feature_train = load_params()

import pandas as pd
print('Carga de datos')
data = pd.read_parquet(input_file)

# carga de preprocesador y modelo
print('Carga modelo y preprocesador')
preprocessor, pca = load_preprocessor()
model = load_model(model_name)

# aplicar preprocesador
X = preprocessor.transform(data)
if X.shape[1]>feature_train:
    X = pca.transform(X)

# realizamos predicciones
print('Realizando predicciones')
predictions = model.predict_proba(X)

a = pd.DataFrame(predictions, columns=['Clase '+str(x+1) for x in range(predictions.shape[1])])

Carga de datos
Carga modelo y preprocesador
Realizando predicciones


In [24]:
a.to_dict('records')

[{'Clase 1': 0.7263084152288903,
  'Clase 2': 0.07762405624152383,
  'Clase 3': 0.03275787586635839,
  'Clase 4': 0.03374320732873364,
  'Clase 5': 0.02937244786882468,
  'Clase 6': 0.029658695349484822,
  'Clase 7': 0.07053530211618449},
 {'Clase 1': 0.8176845037362938,
  'Clase 2': 0.03977173620875666,
  'Clase 3': 0.03619896443425855,
  'Clase 4': 0.03523231954178246,
  'Clase 5': 0.013552179030120208,
  'Clase 6': 0.01155491851144025,
  'Clase 7': 0.04600537853734785},
 {'Clase 1': 0.17382534294337698,
  'Clase 2': 0.17997898699466874,
  'Clase 3': 0.08065956539924549,
  'Clase 4': 0.34045043261061003,
  'Clase 5': 0.05016933212985844,
  'Clase 6': 0.08402335371836651,
  'Clase 7': 0.0908929862038737},
 {'Clase 1': 0.7263084152288903,
  'Clase 2': 0.07762405624152383,
  'Clase 3': 0.03275787586635839,
  'Clase 4': 0.03374320732873364,
  'Clase 5': 0.02937244786882468,
  'Clase 6': 0.029658695349484822,
  'Clase 7': 0.07053530211618449},
 {'Clase 1': 0.9405409717632514,
  'Clase 2':

In [35]:
input_file = '/Hola/my/mnt/Balmo/nombre.doc'

In [36]:
input_file[len(input_file)-input_file[::-1].find("/"):]

'nombre.doc'

In [29]:
string.find("/")

4