# Clasificacion de la calidad del vino

Se entrenara un modelo que clasifique la calidad del vino, ya sea tinto o blanco, segun sus caracteristicas

In [None]:
# librerias
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split as tts

## Datos

In [None]:
tinto=pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv',
                  sep=';')

print (tinto.shape)
tinto.head()

In [None]:
blanco=pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv',
                  sep=';')

print (blanco.shape)
blanco.head()

In [None]:
# se concatena en un dataframe

blanco['color']=0
tinto['color']=1

vino=pd.concat([blanco, tinto], ignore_index=True)
vino.head()

**exploracion**

In [None]:
vino.columns

In [None]:
vino.describe()

In [None]:
vino.info(memory_usage='deep')

In [None]:
# dato objetivo (calidad)

vino.quality.value_counts()

In [None]:
vino.quality.hist()
plt.title('Histograma Calidad del Vino')
plt.xlabel('Grupos de Calidad')
plt.ylabel('Numero de elementos')
plt.show();

Los datos estan desbalanceados, tanto en tinto-blanco como en las calidades.

Se hace un nuevo binning, llamando calidad=3 a lo que eran (3,4,5). calidad=6, (6). calidad=9, (7,8,9). De esta manera quedan balanceados segun calidad. Es una simple aproximacion a este problema, la idea es llegar a un modelo funcional para poner en produccion. 


In [None]:
# ahora solo tres clases
vino_malo=vino[vino.quality.isin([3,4,5])]
vino_malo.quality=3

vino_normal=vino[vino.quality==6]
vino_normal.quality=6

vino_bueno= vino[vino.quality.isin([7,8,9])]
vino_bueno.quality=9

vino=pd.concat([vino_malo,
                vino_normal,
                vino_bueno], 
               ignore_index=True)

# se guarda
vino.to_csv('data/vino_data.csv', index=False)

vino.quality.value_counts()

In [None]:
vino.quality.hist()
plt.title('Histograma Calidad del Vino')
plt.xlabel('Grupos de Calidad')
plt.ylabel('Numero de elementos')
plt.show();

In [None]:
vino.head()

## Modelo

In [None]:
# train-test split

X=vino.drop('quality', axis=1)

vino.quality=pd.Categorical(vino.quality)
vino['quality_c']=vino.quality.cat.codes
y=vino.quality_c

In [None]:
X_train, X_test, y_train, y_test=tts(X, y, test_size=0.2, random_state=42)

print (X_train.shape, X_test.shape)

In [None]:
# clasificador

gbc=GBC(random_state=10, learning_rate=0.1, max_depth=10)

gbc.fit(X_train, y_train)

# se guarda el entrenamiento en pickle para no entrenar en la web
with open('data/gbc.p', 'wb') as f:
    pickle.dump(gbc, f, 2)

# se carga el modelo
gbc=pickle.load(open('data/gbc.p', 'rb' ))

In [None]:
y_pred=gbc.predict(X_test)
y_pred

In [None]:
y_prob=gbc.predict_proba(X_test)
y_prob

**Evaluacion**

In [None]:
# accuracy

gbc.score(X_test, y_test)

**Matriz de Confusion**

In [None]:
# plot matriz confusion

confusion=confusion_matrix(y_test, y_pred)

ax=sns.heatmap(confusion,
            cmap=plt.cm.Blues,
            annot=True)

b, t=ax.get_ylim()
ax.set_ylim(b+.5, t-.5)

plt.xlabel('Prediction')
plt.ylabel('Truth')
plt.title('Confusion Matrix(clases)')
plt.show()

In [None]:
ax=sns.heatmap((confusion/len(y_pred)*100),
                cmap=plt.cm.Blues,
                annot=True)

b, t=ax.get_ylim()
ax.set_ylim(b+.5, t-.5)

plt.xlabel('Prediction')
plt.ylabel('Truth')
plt.title('Confusion Matrix(%)')
plt.show()

## Prediciendo a mano

In [None]:
# nuevos datos, la media

n_data={k:v for k,v in zip(X_train.columns, vino.mean())}
n_data['color']=0
n_data

In [None]:
# se pasa a dataframe

X_pred=pd.DataFrame.from_dict(n_data, orient='index').T
X_pred

In [None]:
# prediccion clases=[0,1,2]

pred=gbc.predict_proba(X_pred)
pred

# Web-App 

## main.py

In [None]:
# librerias
from flask import Flask
from flask import render_template
from flask import flash
from flask import request
from flask import jsonify
from flask import Markup

import logging
import io
import os
import sys

import pandas as pd
import numpy as np
import scipy
import pickle

from sklearn.ensemble import GradientBoostingClassifier as GBC



# inicia flask
app=Flask(__name__)



# modelo 
gbc=None


# variables, caracteristicas
var=None
 
    

# para cargar las imagenes
def imagen_vino(color, calidad):
    if color==0:
        color_str='blanco'
    else:
        color_str='tinto'
    return('/static/images/vino_' +color_str+'_'+str(calidad)+'.jpg')




# antes del primer request
@app.before_first_request
def startup():
    global gbc
    gbc=pickle.load(open('data/gbc.p','rb'))
    global var
    var=pd.read_csv('data/vino_data.csv').drop('quality', axis=1).columns



# manejo de errores
@app.errorhandler(500)
def server_error(e):
    logging.exception('algun error...')
    return """
    And internal error <pre>{}</pre>
    """.format(e), 500



# conexion a traves de ruta
@app.route('/backend', methods=['POST', 'GET'])
def backend():
    # requests
    req=[float(request.args.get(e.replace(' ', '_'))) for e in var]
    
    # nuevos datos
    n_data={k:v for k,v in zip(var, req)}
    n_data['color']=int(request.args.get('color'))
    
    X_pred=pd.DataFrame.from_dict(n_data, orient='index').T

    # prediccion
    prob=gbc.predict_proba(X_pred)

    pred=[3,6,9][np.argmax(prob[0])]
    
    return jsonify({'prediccion':pred, 
                    'imagen': imagen_vino(n_data['color'], pred)})


# principal
@app.route("/", methods=['POST', 'GET'])
def main():
    logging.warning('¡main!')
    # carga por defecto
    return render_template('index.html', 
                           prediccion=1, 
                           imagen='/static/images/vino_tinto_6.jpg')





# solo en local
if __name__=='__main__':
    app.run(debug=False)


 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [04/Jan/2020 08:38:49] "[37mGET / HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Jan/2020 08:38:49] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Jan/2020 08:38:51] "[37mGET /backend?fixed_acidity=7.5&volatile_acidity=0.4&citric_acid=0.3&residual_sugar=5.5&chlorides=0.05&free_sulfur_dioxide=31&total_sulfur_dioxide=115&density=0.99&pH=3.2&sulphates=0.5&alcohol=10.5&color=0 HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Jan/2020 08:38:51] "[37mGET /backend?fixed_acidity=7.5&volatile_acidity=0.4&citric_acid=0.3&residual_sugar=5.5&chlorides=0.05&free_sulfur_dioxide=31&total_sulfur_dioxide=115&density=0.99&pH=3.2&sulphates=0.5&alcohol=10.5&color=0 HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Jan/2020 08:38:51] "[36mGET /static/images/vino_blanco_6.jpg HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [04/Jan/2020 08:38:51] "[36mGET /static/images/vino_blanco_6.jpg HTTP/1.1[0m" 304 -
127.0.0.1 - - [04/Jan/20

127.0.0.1 - - [04/Jan/2020 08:39:12] "[37mGET /backend?fixed_acidity=11&volatile_acidity=1.28&citric_acid=1.3&residual_sugar=29.5&chlorides=0.05&free_sulfur_dioxide=1&total_sulfur_dioxide=440&density=1.02&pH=3.5&sulphates=1.2&alcohol=10.5&color=1 HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Jan/2020 08:39:12] "[37mGET /backend?fixed_acidity=11&volatile_acidity=1.28&citric_acid=1.3&residual_sugar=29.5&chlorides=0.05&free_sulfur_dioxide=1&total_sulfur_dioxide=440&density=1.02&pH=3.5&sulphates=1.2&alcohol=10.5&color=1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Jan/2020 08:39:13] "[37mGET /backend?fixed_acidity=11&volatile_acidity=1.28&citric_acid=1.3&residual_sugar=29.5&chlorides=0.05&free_sulfur_dioxide=1&total_sulfur_dioxide=440&density=1.02&pH=3.5&sulphates=1.2&alcohol=10.5&color=1 HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Jan/2020 08:39:13] "[37mGET /backend?fixed_acidity=11&volatile_acidity=1.28&citric_acid=1.3&residual_sugar=29.5&chlorides=0.05&free_sulfur_dioxide=1&

## Codigo HTML