## Model Persistence

**First let's bring the base that we want to use to make the prediction**

In [24]:
# Import
import pandas as pd

In [48]:
# Importing data to analyse
data_base = pd.read_csv('producao.csv', sep=';')
data_base_original = data_base

In [49]:
# some rows from the data base
data_base.head()

Unnamed: 0,bairro,condominio,iptu,metragem,quarto,banheiro,vaga_carro,andar,aceita_pet,mobilia,metro_prox,aluguel
0,Perdizes,448.0,34.0,58.0,1.0,1.0,1.0,6.0,1.0,1.0,0.0,
1,Bela Vista,1106.0,218.0,74.0,3.0,2.0,1.0,5.0,1.0,1.0,0.0,
2,Mooca,50.0,83.0,80.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,
3,Santana,230.0,50.0,120.0,2.0,2.0,1.0,2.0,1.0,0.0,1.0,
4,Vila Mariana,1040.0,140.0,85.0,2.0,1.0,2.0,18.0,0.0,0.0,0.0,


**All the treatments we did on the model base, we will need to do for this data base**<br>
**We no longer have the columns 'Seguro_incendio', 'taxa_servico' and 'Total' in this base, we just need to deal with the neighborhoods ('bairro')**

In [50]:
# importing the region file
region = pd.read_excel('bairros.xlsx')
region.head()

Unnamed: 0,Bairro,Região
0,Sé,Centro
1,Bela Vista,Centro
2,Bom Retiro,Centro
3,Cambuci,Centro
4,Consolação,Centro


In [51]:
# Merging data base with region
data_base = pd.merge(left=data_base, right=region, how='left', left_on='bairro', right_on='Bairro')
data_base.head()

Unnamed: 0,bairro,condominio,iptu,metragem,quarto,banheiro,vaga_carro,andar,aceita_pet,mobilia,metro_prox,aluguel,Bairro,Região
0,Perdizes,448.0,34.0,58.0,1.0,1.0,1.0,6.0,1.0,1.0,0.0,,Perdizes,Oeste
1,Bela Vista,1106.0,218.0,74.0,3.0,2.0,1.0,5.0,1.0,1.0,0.0,,Bela Vista,Centro
2,Mooca,50.0,83.0,80.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,,Mooca,Leste
3,Santana,230.0,50.0,120.0,2.0,2.0,1.0,2.0,1.0,0.0,1.0,,Santana,Norte
4,Vila Mariana,1040.0,140.0,85.0,2.0,1.0,2.0,18.0,0.0,0.0,0.0,,Vila Mariana,Sul


In [52]:
# Removing the columns named by 'bairro' e 'Bairro', we no longer need
data_base = data_base.drop(['Bairro', 'bairro'], axis=1)

In [53]:
# Doing the 'One Hot Encoding'
data_base = pd.concat([data_base, pd.get_dummies(data_base.Região)], axis=1)
data_base.head()

Unnamed: 0,condominio,iptu,metragem,quarto,banheiro,vaga_carro,andar,aceita_pet,mobilia,metro_prox,aluguel,Região,Centro,Leste,Norte,Oeste,Sul
0,448.0,34.0,58.0,1.0,1.0,1.0,6.0,1.0,1.0,0.0,,Oeste,0,0,0,1,0
1,1106.0,218.0,74.0,3.0,2.0,1.0,5.0,1.0,1.0,0.0,,Centro,1,0,0,0,0
2,50.0,83.0,80.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,,Leste,0,1,0,0,0
3,230.0,50.0,120.0,2.0,2.0,1.0,2.0,1.0,0.0,1.0,,Norte,0,0,1,0,0
4,1040.0,140.0,85.0,2.0,1.0,2.0,18.0,0.0,0.0,0.0,,Sul,0,0,0,0,1


In [54]:
# Dropping the columns named by 'Região' 
data_base = data_base.drop('Região', axis=1)
data_base.head()


Unnamed: 0,condominio,iptu,metragem,quarto,banheiro,vaga_carro,andar,aceita_pet,mobilia,metro_prox,aluguel,Centro,Leste,Norte,Oeste,Sul
0,448.0,34.0,58.0,1.0,1.0,1.0,6.0,1.0,1.0,0.0,,0,0,0,1,0
1,1106.0,218.0,74.0,3.0,2.0,1.0,5.0,1.0,1.0,0.0,,1,0,0,0,0
2,50.0,83.0,80.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,,0,1,0,0,0
3,230.0,50.0,120.0,2.0,2.0,1.0,2.0,1.0,0.0,1.0,,0,0,1,0,0
4,1040.0,140.0,85.0,2.0,1.0,2.0,18.0,0.0,0.0,0.0,,0,0,0,0,1


**Now we need to import the model that we developed in data_analysis_project_code**

In [55]:
# importing the load from joblib
from joblib import load

In [56]:
# loading our model
regression = load('Model_Regression.joblib')

In [57]:
# Divided into x and y
x = data_base.drop('aluguel', axis=1)
y = data_base.aluguel

In [58]:
# All this new values into x is use to find values for rent ('aluguel') - Predict
aluguel_predict = regression.predict(x)
aluguel_predict


array([3015.77444921, 4004.39480203, 1962.12982564, 2565.44563071,
       2709.07798241])

In [59]:
# Inserting all predicted values into our data base
data_base.aluguel=aluguel_predict
data_base.head(5)

Unnamed: 0,condominio,iptu,metragem,quarto,banheiro,vaga_carro,andar,aceita_pet,mobilia,metro_prox,aluguel,Centro,Leste,Norte,Oeste,Sul
0,448.0,34.0,58.0,1.0,1.0,1.0,6.0,1.0,1.0,0.0,3015.774449,0,0,0,1,0
1,1106.0,218.0,74.0,3.0,2.0,1.0,5.0,1.0,1.0,0.0,4004.394802,1,0,0,0,0
2,50.0,83.0,80.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1962.129826,0,1,0,0,0
3,230.0,50.0,120.0,2.0,2.0,1.0,2.0,1.0,0.0,1.0,2565.445631,0,0,1,0,0
4,1040.0,140.0,85.0,2.0,1.0,2.0,18.0,0.0,0.0,0.0,2709.077982,0,0,0,0,1


In [60]:
# exporting to excel
data_base.to_excel('producao_results.xlsx')

In [63]:
#if we want to insert the predicted value into our original data base
data_base_original.aluguel = aluguel_predict
data_base_original.head()

Unnamed: 0,bairro,condominio,iptu,metragem,quarto,banheiro,vaga_carro,andar,aceita_pet,mobilia,metro_prox,aluguel
0,Perdizes,448.0,34.0,58.0,1.0,1.0,1.0,6.0,1.0,1.0,0.0,3015.774449
1,Bela Vista,1106.0,218.0,74.0,3.0,2.0,1.0,5.0,1.0,1.0,0.0,4004.394802
2,Mooca,50.0,83.0,80.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1962.129826
3,Santana,230.0,50.0,120.0,2.0,2.0,1.0,2.0,1.0,0.0,1.0,2565.445631
4,Vila Mariana,1040.0,140.0,85.0,2.0,1.0,2.0,18.0,0.0,0.0,0.0,2709.077982


In [64]:
#exporting the original data base
data_base_original.to_excel('results_original_base.xlsx')