In [13]:
#!pip install -U scikit-learn --user

In [14]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
import numpy as np
import requests

In [15]:
df_original = pd.read_csv('winequality-white.csv', sep=';')

In [16]:
df = df_original.copy()

In [17]:
df.head()


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [18]:
X = df.drop(columns='quality')
Y = df['quality']

In [19]:
from sklearn.impute import SimpleImputer
Imputer = SimpleImputer(strategy='mean')
X[X.columns] = Imputer.fit_transform(X[X.columns])

In [20]:
from sklearn.preprocessing import MinMaxScaler
MinMaxScaler = MinMaxScaler()
X[X.columns] = MinMaxScaler.fit_transform(X[X.columns])


In [21]:
X = np.log(X+1)

In [22]:
n_splits=3
kf = KFold(n_splits=n_splits,shuffle=True,random_state=0)
valor = 0
for tr,ts in kf.split(X,Y):
    X_train, Y_train = X.iloc[tr], Y.iloc[tr]
    X_test, Y_test = X.iloc[ts], Y.iloc[ts]

    mdl = RandomForestRegressor(random_state=0,min_samples_leaf=1,n_estimators=250)
    mdl.fit(X_train,Y_train)
    valores = mdl.predict(X_test)
    print(mean_squared_error(Y_test,valores))
    valor+=mean_squared_error(Y_test,valores)
print()
print(valor/n_splits)    

0.4318954072259645
0.3709080514390692
0.38360116666666666

0.39546820844390007


In [23]:
Model = RandomForestRegressor(random_state=0,min_samples_leaf=1,n_estimators=250)
Model.fit(X,Y)


RandomForestRegressor(n_estimators=250, random_state=0)

In [24]:
import pickle as pkl
pkl.dump(Imputer,open("C:/Users/Guilherme/Desktop/Deploy/Imputer.pkl","wb"))
pkl.dump(MinMaxScaler,open("C:/Users/Guilherme/Desktop/Deploy/MinMaxScaler.pkl","wb"))
pkl.dump(Model,open("C:/Users/Guilherme/Desktop/Deploy/Model.pkl","wb"))

In [25]:
ex = df.sample()
ex

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4226,5.6,0.255,0.57,10.7,0.056,66.0,171.0,0.99464,3.25,0.61,10.4,7


In [26]:
ex = ex.drop(columns='quality')
teste = ex
ex

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
4226,5.6,0.255,0.57,10.7,0.056,66.0,171.0,0.99464,3.25,0.61,10.4


In [27]:
ex = ex.to_json(orient='records')
import json
with open('data.json', 'w') as f:
    json.dump(ex, f)

In [28]:
ex

'[{"fixed acidity":5.6,"volatile acidity":0.255,"citric acid":0.57,"residual sugar":10.7,"chlorides":0.056,"free sulfur dioxide":66.0,"total sulfur dioxide":171.0,"density":0.99464,"pH":3.25,"sulphates":0.61,"alcohol":10.4}]'

In [29]:
url = 'http://127.0.0.1:5000/predict'
data = ex
headers = {'Content-type': 'application/json'}
r = requests.post(url=url,data=data,headers=headers)
r.json()

[{'fixed acidity': 0.1596301456,
  'volatile acidity': 0.1583435581,
  'citric acid': 0.2951839831,
  'residual sugar': 0.1440206659,
  'chlorides': 0.1305596222,
  'free sulfur dioxide': 0.2013040077,
  'total sulfur dioxide': 0.3190863089,
  'density': 0.135553638,
  'pH': 0.393269835,
  'sulphates': 0.373966441,
  'alcohol': 0.3272129112,
  'prediction': 6.496}]

In [30]:
resultado = pd.DataFrame(r.json(),columns=r.json()[0].keys())
resultado

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,prediction
0,0.15963,0.158344,0.295184,0.144021,0.13056,0.201304,0.319086,0.135554,0.39327,0.373966,0.327213,6.496


In [31]:
columns = teste.columns
teste[columns] = Imputer.transform(teste[columns])
teste[columns] = MinMaxScaler.transform(teste[columns])
teste = np.log(teste+1)
teste

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
4226,0.15963,0.158344,0.295184,0.144021,0.13056,0.201304,0.319086,0.135554,0.39327,0.373966,0.327213


In [1]:
class WineProcessing(object ):
    def __init__(self):
        self.Imputer = pkl.load(open("C:/Users/Guilherme/Desktop/Deploy/Imputer.pkl","rb"))
        self.MinMaxScaler = pkl.load(open("C:/Users/Guilherme/Desktop/Deploy/MinMaxScaler.pkl","rb"))
    
    def pre_processing(self,df):
        columns = df.columns #Para o Imputer e o MinMax retornar um df e não um np array
        df[columns] = self.Imputer.transform(df[columns])
        df[columns] = self.MinMaxScaler.transform(df[columns])
        df = np.log(df+1)
        
        return df 
