# Proyecto Industrialización: creación de K-API-BARA

<img src="capibara.jpg" alt="Icono de API y Capibara" width="150">

In [308]:
# Importamos todos los paquetes que vamos a necesitar
from flask import Flask, request, jsonify
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import json
import sqlite3

In [309]:
# Creamos la aplicación Flask 
app = Flask(__name__)
app.config['DEBUG'] = False

In [310]:
# Creamos la variable model a la que luego asinaremos nuestro modelo; hacemos lo mismo con nuestras variables que almacenarán nuestros datos de entrenamiento y de prueba/test
model = None
X_train, X_test, y_train, y_test = None, None, None, None

In [311]:
# Cargamos datos de nuestro modelo
general_data = pd.read_csv('DataSet/general_data.csv')
employee_survey = pd.read_csv('DataSet/employee_survey_data.csv')
manager_survey = pd.read_csv('DataSet/manager_survey_data.csv')

# Unimos los datasets con un merge
data = pd.merge(general_data, employee_survey, on='EmployeeID')
data = pd.merge(data, manager_survey, on='EmployeeID')

# Preprocesar datos quitando los valores na
data = data.dropna()

# Transformamos las variables categóricas en booleanas
data = pd.get_dummies(data, drop_first=True)

In [312]:
# Variable y variable a predecir
X = data.drop(['Attrition_Yes', 'EmployeeID', 'EmployeeCount', 'StandardHours','YearsSinceLastPromotion','JobSatisfaction','JobInvolvement'], axis=1)
y = data['Attrition_Yes']
# Dividimos entre train y test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True) # 20% para el conjunto de test y 80% para el entrenamiento

In [313]:
# Hacer el fit
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [314]:
# Hacer predicciones
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

In [315]:
# Guardar el modelo en un pickle
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [316]:
# Guardar los datos de entrenamiento y prueba en un json
train_data = {
    'X_train': X_train.to_dict(orient='list'),
    'y_train': y_train.to_list()
}
test_data = {
    'X_test': X_test.to_dict(orient='list'),
    'y_test': y_test.to_list()
}

with open('train_data.json', 'w') as f:
        json.dump(train_data, f)
    
with open('test_data.json', 'w') as f:
    json.dump(test_data, f)

In [317]:
# Configuramos la home de nuestra API

@app.route('/', methods=['GET'])
def home():
	return "<h1>La K-API-BARA</p>"

In [318]:
# Endpoint 1 que nos muestre todos los datos

@app.route('/data', methods=['GET'])
def get_data():
    return data.to_json(orient='records'), 200

In [319]:
# Endpoint 2 para obtener los datos de entrenamiento

def load_train_data():
    global train_data
    with open('train_data.json', 'r') as f:
        train_data = json.load(f)

In [320]:
@app.route('/train', methods=['GET'])
def get_train_data():
    global train_data
    if train_data is None:
        return jsonify({"message": "Datos de entrenamiento no disponibles"}), 400

    return jsonify(train_data), 200

In [321]:
# Endpoint 3 para obtener los datos del test

def load_test_data():
    global test_data
    with open('test_data.json', 'r') as f:
        test_data = json.load(f)

In [322]:
@app.route('/test', methods=['GET'])
def get_test_data():
    global test_data
    if test_data is None:
        return jsonify({"message": "Datos del test no disponibles"}), 400

    return jsonify(test_data), 200

In [323]:
# Endpoint 4 con una observación n de mi dataset

@app.route('/observacion', methods=['GET']) 
def obs():
    return jsonify(data.iloc[34].to_dict())

In [324]:
# Endpoint 5: query

@app.route('/query', methods=['GET'])
def age():
    if 'age' in request.args:
        age = int(request.args['age'])
    else: "No se enuentra una edad válida"

    edad = data[data['Age']==age]
    edad_dict = edad.to_dict(orient= "records")
    return jsonify(edad_dict)


#http://127.0.0.1:5000/query?age=51


In [325]:
# Endpoint 6: predicciones
df = pd.DataFrame(y_pred)

In [326]:
df

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
855,False
856,False
857,False
858,False


In [327]:
@app.route('/predicciones', methods=['GET']) 
def preds():
    return jsonify(df.iloc[2].to_dict())

In [328]:
app.run(port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [28/Jul/2024 20:23:01] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [28/Jul/2024 20:23:07] "GET /predicciones HTTP/1.1" 200 -
