# 0.0. IMPORTS.

In [None]:
import re
import pickle

import pandas            as pd
import numpy             as np
import seaborn           as sns
import matplotlib.pyplot as plt
import xgboost           as xgb
import lightgbm          as lgb

from sklearn.preprocessing   import MinMaxScaler
from sklearn.ensemble        import RandomForestRegressor
from sklearn.metrics         import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.preprocessing   import LabelEncoder
from sklearn.linear_model    import LinearRegression, Lasso
from category_encoders       import TargetEncoder
from sklearn.preprocessing   import OneHotEncoder
from category_encoders.count import CountEncoder
from sklearn.model_selection import KFold, cross_val_score

from sklearn               import model_selection   as ms
from sklearn               import ensemble          as en

# from skopt.space        import Real, Integer
# from skopt.utils        import use_named_args
# from skopt              import gp_minimize
# from skopt.callbacks    import CheckpointSaver

## 0.1. Aux Functions

In [None]:
def settings():
    plt.style.use('bmh')
    plt.rcParams['figure.figsize'] = [25,12]
    plt.rcParams['font.size'] = 24
    plt.rcParams['figure.dpi'] = 100
    sns.set()


def ml_error( model_name, ytest, yhat ):
    mae = mean_absolute_error( ytest, yhat )
    mape = mean_absolute_percentage_error( ytest, yhat )
    rmse = np.sqrt( mean_squared_error( ytest, yhat ) )
    
    return pd.DataFrame( {'Model name': model_name,
                          'MAE': mae,
                          'MAPE': mape,
                          'RMSE': rmse }, index=[0] )

def analise_bivariada(df,column):
    aux1=df[[column,'preco']].groupby(column).mean().reset_index()
    aux2=df[[column,'preco']].groupby(column).median().reset_index()
    plt.figure(figsize=(15,10))
    plt.subplot(2,1,1)
    sns.barplot(x=column,y='preco',data=aux1)
    plt.subplot(2,1,2)
    sns.barplot(x=column,y='preco',data=aux2)

## 0.2. Reading Data

In [None]:
df_raw=pd.read_csv('treino.csv')
df_test=pd.read_csv('teste.csv')

# 1.0. DESCRIÇÃO DOS DADOS

In [None]:
df1=df_raw.copy()

## 1.1. Dimensão dos Dados

In [None]:
df1.shape

## 1.2. NA Check

In [None]:
df1.isna().sum()

### 1.2.1. Fillout NA

In [None]:
# # num_fotos - considerar nan como 0 fotos > ordinal encoding
df1['num_fotos']=df1['num_fotos'].fillna(0)

## 1.3. Tipos dos Dados

In [None]:
df1.dtypes

## 1.4. Estatística Descritiva

In [None]:
num_attr=df1.select_dtypes(include=['int64','float64'])
cat_attr=df1.select_dtypes(exclude=['int64','float64'])

### 1.4.1. Atributos Numéricos

In [None]:
# Central Tendency - mean, median
ct1=pd.DataFrame(num_attr.apply(np.mean)).T
ct2=pd.DataFrame(num_attr.apply(np.median)).T

# Dispersion - std, min, max, range, skew, kurtosis
d1=pd.DataFrame(num_attr.apply(np.std)).T
d2=pd.DataFrame(num_attr.apply(min)).T
d3=pd.DataFrame(num_attr.apply(max)).T
d4=pd.DataFrame(num_attr.apply(lambda x: x.max()-x.min())).T
d5=pd.DataFrame(num_attr.apply(lambda x: x.skew())).T
d6=pd.DataFrame(num_attr.apply(lambda x: x.kurtosis())).T

# Concatenate
m=pd.concat([d2,d3,d4,ct1,ct2,d1,d5,d6]).T.reset_index()
m.columns=['attributes','min','max','range','mean','median','std','skew','kurtosis']

### 1.4.2. Atributos Categóricos

In [None]:
cat_attr.apply(lambda x: x.unique().shape[0])

In [None]:
plt.figure(figsize=(20,30))
plt.subplot(5,2,(1,2))
sns.boxplot(x='cambio',y='preco',data=df1)
plt.subplot(5,2,(3,4))
sns.boxplot(x='tipo',y='preco',data=df1)
plt.subplot(5,2,(5,6))
sns.boxplot(x='cor',y='preco',data=df1)
plt.subplot(5,2,7)
sns.boxplot(x='entrega_delivery',y='preco',data=df1)
plt.subplot(5,2,8)
sns.boxplot(x='troca',y='preco',data=df1)
plt.subplot(5,2,9)
sns.boxplot(x='blindado',y='preco',data=df1)
plt.subplot(5,2,10)
sns.boxplot(x= 'tipo_vendedor',y='preco',data=df1)

## 1.3. Tipos dos Dados

In [None]:
df1.dtypes

### 1.3.1. Change data types

In [None]:
df1['num_fotos']=df1['num_fotos'].astype(int)
df1['ano_modelo']=df1['ano_modelo'].astype(int)

# 2.0. FEATURE ENGINEERING

In [None]:
df2 = df1.copy()

In [None]:
# ano de fabricacao
df2['ano_de_fabricacao_bins']=df2['ano_de_fabricacao'].apply(lambda x:  'bin1' if x<=2000 else
                                                                        'bin2' if x<= 2005 else
                                                                        'bin3' if x<=2010 else
                                                                        'bin4' if x<= 2015 else
                                                                        'bin5' if x<=2020 else
                                                                        'bin6' if x<=2025 else 0 )

# categoria marca
popular_baixo_padrao = ['FIAT','SUZUKI','CHEVROLET','SMART','HYUNDAI','LIFAN','SSANGYONG','RENAULT','DODGE','ALFA ROMEO','CITROËN','CHRYSLER','BRM','EFFA']
popular_alto_padrao = ['JEEP','SUBARU','FORD','KIA','CHERY','PEUGEOT','VOLKSWAGEN','NISSAN','JAC','HONDA','MITSUBISHI']
luxo = ['VOLVO','LEXUS','MERCEDES-BENZ','FERRARI','AUDI','TOYOTA','IVECO','MINI','TROLLER']
superluxo = ['PORSCHE','RAM','LAMBORGHINI','JAGUAR','LAND ROVER','MASERATI','BMW']
df2['categoria_marca'] = df2['marca'].apply( lambda x: 'popular_baixo_padrao' if x in popular_baixo_padrao else
                                                       'popular_alto_padrao' if x in popular_alto_padrao else
                                                       'luxo' if x in luxo else
                                                       'superluxo' if x in superluxo 
                                                        else 'nao_identificado')

# cilindrada
cilindradas = []
for i in range(len(df2)):
    try:
        cilindrada = re.search( "[0-9]{1}.[0-9]{1}", df2['versao'][i] )[0]
    except:
        cilindrada = 0
    cilindradas.append( cilindrada )
df_cilindradas = pd.DataFrame (cilindradas, columns = ['cilindradas'])
df2 = pd.concat( [df2,df_cilindradas],axis=1)
df2['cilindradas']=df2['cilindradas'].astype('float64')

# turbo
turbo_list = []
for i in range(len(df2)):
    try:
        turbo = re.search( "TURBO", df2['versao'][i] )[0]
    except:
        turbo = "NÃO TURBO"
        
    turbo_list.append( turbo )
df_turbo = pd.DataFrame (turbo_list, columns = ['turbo'])
df2 = pd.concat( [df2,df_turbo],axis=1)

# 4x4
offroad_list = []
for i in range(len(df2)):
    try:
        offroad = re.search( "4X4", df2['versao'][i] )[0]
    except:
        offroad = "NÃO 4x4"
    offroad_list.append( offroad )
df_offroad = pd.DataFrame (offroad_list, columns = ['offroad'])
df2 = pd.concat( [df2,df_offroad],axis=1)

# combustivel
df2['combustivel'] = df2['versao'].apply( lambda x: re.search( "GASOLINA", x )[0] if re.search( "GASOLINA", x ) is not None else 
                                                    re.search( "FLEX", x )[0] if re.search( "FLEX", x ) is not None else
                                                    re.search( "HYBRID", x )[0] if re.search( "HYBRID", x ) is not None else
                                                    re.search( "DIESEL", x )[0] if re.search( "DIESEL", x ) is not None else
                                                    re.search( "ELECTIRC", x )[0] if re.search( "ELECTIRC", x ) is not None else
                                                    re.search( "ELÉTRICO", x )[0] if re.search( "ELÉTRICO", x ) is not None else
                                                    re.search( "HÍBRIDO", x )[0] if re.search( "HÍBRIDO", x ) is not None else
                                                    re.search( "GÁS", x )[0] if re.search( "GÁS", x ) is not None else "DESCONHECIDO")

# kilometragem 100 - 400000
# df2['km']=df2['odometro'].apply(lambda x: '0_40000' if x<=40000 else
#                                             '40000_80000'if x<=80000 else
#                                             '80000_120000' if x<=120000  else
#                                             '120000_160000' if x<=160000  else
#                                             '160000_200000' if x<=200000  else
#                                             '200000_240000' if x<=240000  else
#                                             '240000_280000' if x<=280000  else
#                                             '280000_320000' if x<=320000  else
#                                             '320000_360000' if x<=360000  else
#                                             '360000_400000')

# 3.0. FILTRAGEM DE VARIÁVEIS

In [None]:
df3 = df2.copy()

## 3.1. Filtragem das Linhas

## 3.2. Seleção das Colunas

In [None]:
df3 = df3.drop( columns = ['elegivel_revisao','attr_veiculo_aceita_troca','attr_veiculo_único_dono','attr_veiculo_todas_as_revisões_feitas_pela_concessionária',
                           'attr_veiculo_todas_as_revisões_feitas_pela_agenda_do_carro','attr_veiculo_ipva_pago','attr_veiculo_licenciado',
                           'attr_veiculo_garantia_de_fábrica','attr_veiculo_alienado','elegivel_revisao','entrega_delivery'])

# 4.0. EDA

In [None]:
df4 = df3.copy()

#4.1. Analise Univariada

#4.1.1. Response Variable

In [None]:
plt.figure(figsize=(20,15))
plt.subplot(3,1,1)
sns.distplot(df4['preco'], kde=False);
# transformação log
plt.subplot(3,1,2)
sns.distplot(np.log(df4['preco']), kde=False);
plt.subplot(3,1,3)
sns.boxplot(df4['preco']);

#4.1.2. Numerical Variable

In [None]:
num_attributes = df4.select_dtypes(include='number')
num_attributes.hist(bins=25);

#4.1.3. Categorical Variable

In [None]:
cat_attributes = df4.select_dtypes(exclude='number')
cat_attributes.columns

#4.2. Analise Bivariada

In [None]:
df4['estado_vendedor']=df4['estado_vendedor'].apply(lambda x: 'SP' if x=='São Paulo (SP)' else 
                                                        'RS' if x=='Rio Grande do Sul (RS)' else 
                                                        'MG' if x=='Minas Gerais (MG)' else
                                                        'PR' if x=='Paraná (PR)' else
                                                        'RJ' if x=='Rio de Janeiro (RJ)' else
                                                        'MA' if x=='Maranhão (MA)' else
                                                        'SC' if x=='Santa Catarina (SC)' else
                                                        'AL' if x=='Alagoas (AL)' else
                                                        'BA' if x=='Bahia (BA)' else
                                                        'GO' if x=='Goiás (GO)' else
                                                        'RN' if x=='Rio Grande do Norte (RN)' else
                                                        'PE' if x=='Pernambuco (PE)' else
                                                        'MT' if x=='Mato Grosso (MT)' else
                                                        'PA' if x=='Pará (PA)' else
                                                        'CE' if x=='Ceará (CE)' else
                                                        'AM' if x=='Amazonas (AM)' else
                                                        'ES' if x=='Espírito Santo (ES)'else
                                                        'RO' if x=='Rondônia (RO)' else
                                                        'PB' if x=='Paraíba (PB)' else
                                                        'TO' if x=='Tocantins (TO)' else
                                                        'AC' if x=='Acre (AC)'else
                                                        'SE' if x=='Sergipe (SE)' else
                                                        'MS' if x=='Mato Grosso do Sul (MS)' else
                                                        'RR' if x=='Roraima (RR)'else
                                                        'PI' if x=='Piauí (PI)' else 0)
analise_bivariada(df4,'estado_vendedor')

In [None]:
analise_bivariada(df4,'num_fotos')

In [None]:
analise_bivariada(df4,'ano_de_fabricacao_bins')

In [None]:
analise_bivariada(df4,'ano_de_fabricacao')

In [None]:
analise_bivariada(df4,'ano_modelo')

In [None]:
# #odometro
# analise_bivariada(df4,'km')

In [None]:
analise_bivariada(df4,'cambio')

In [None]:
analise_bivariada(df4,'tipo')

In [None]:
analise_bivariada(df4,'cor')

In [None]:
analise_bivariada(df4,'tipo_vendedor')

In [None]:
analise_bivariada(df4,'tipo_anuncio')

In [None]:
analise_bivariada(df4,'troca')

#4.3. Analise Multivariada

In [None]:
correlation = num_attributes.corr(method='pearson')
sns.heatmap(correlation, annot=True);

# 5.0. DATA PREPARATION

In [None]:
df5 = df4.copy()

## 5.1. Encoding

In [None]:
# colunas pra dropar: id
df5 = df5.drop( columns=['ID','ano_de_fabricacao'] )

# binario (0/1): 
df5['offroad'] = df5['offroad'].apply( lambda x: 1 if x == '4X4' else 0 )
df5['turbo'] = df5['turbo'].apply( lambda x: 1 if x == 'TURBO' else 0 )
df5['blindado'] = df5['blindado'].apply( lambda x: 0 if x == "false" else 1 if x=="true" else 0 )
df5['troca'] = df5['troca'].apply( lambda x: 0 if x == "false" else 1 if x=="true" else 0 )

# dummies: marca, cambio, tipo, tipo_vendedor
encoder = OneHotEncoder(handle_unknown='ignore')

df_cambio = pd.DataFrame(encoder.fit_transform(df5[['cambio']]).toarray())
df_cambio.columns = encoder.get_feature_names_out()
df5 = df5.join(df_cambio)
pickle.dump( encoder, open( 'cambio_encoding', 'wb' ) )

df_tipo_vendedor = pd.DataFrame(encoder.fit_transform(df5[['tipo_vendedor']]).toarray())
df_tipo_vendedor.columns = encoder.get_feature_names_out()
df5 = df5.join(df_tipo_vendedor)
pickle.dump( encoder, open( 'tipo_vendedor_encoding', 'wb' ) )

df_tipo_anuncio = pd.DataFrame(encoder.fit_transform(df5[['tipo_anuncio']]).toarray())
df_tipo_anuncio.columns = encoder.get_feature_names_out()
df5 = df5.join(df_tipo_anuncio)
pickle.dump( encoder, open( 'tipo_anuncio_encoding', 'wb' ) )

df_categoria_marca = pd.DataFrame(encoder.fit_transform(df5[['categoria_marca']]).toarray())
df_categoria_marca.columns = encoder.get_feature_names_out()
df5 = df5.join(df_categoria_marca)
pickle.dump( encoder, open( 'categoria_marca_encoding', 'wb' ) )

df_combustivel = pd.DataFrame(encoder.fit_transform(df5[['combustivel']]).toarray())
df_combustivel.columns = encoder.get_feature_names_out()
df5 = df5.join(df_combustivel)

df_combustivel = pd.DataFrame(encoder.fit_transform(df5[['ano_de_fabricacao_bins']]).toarray())
df_combustivel.columns = encoder.get_feature_names_out()
df5 = df5.join(df_combustivel)

df5 = df5.drop( columns=['cambio','tipo_vendedor','tipo_anuncio','categoria_marca','combustivel','ano_de_fabricacao_bins'] )
pickle.dump( encoder, open( 'combustivel_encoding', 'wb' ) )


te = TargetEncoder()

df5['versao'] = te.fit_transform(df5['versao'], df5['preco'])
pickle.dump( te, open( 'versao_encoding', 'wb' ) )

# label encoder
# le = LabelEncoder()
df5['marca'] = te.fit_transform(df5['marca'],df5['preco'])
pickle.dump( te, open( 'marca_encoding', 'wb' ) )
df5['cor'] = te.fit_transform(df5['cor'],df5['preco'])
pickle.dump( te, open( 'cor_encoding', 'wb' ) )
df5['tipo'] = te.fit_transform(df5['tipo'],df5['preco'])
pickle.dump( te, open( 'tipo_encoding', 'wb' ) )
df5['cidade_vendedor'] = te.fit_transform(df5['cidade_vendedor'],df5['preco'])
pickle.dump( te, open( 'cidade_vendedor_encoding', 'wb' ) )
df5['estado_vendedor'] = te.fit_transform(df5['estado_vendedor'],df5['preco'])
pickle.dump( te, open( 'estado_vendedor_encoding', 'wb' ) )
df5['modelo'] = te.fit_transform(df5['modelo'], df5['preco'])
pickle.dump( te, open( 'modelo_encoding_te', 'wb' ) )

## 5.2. Normalização

## 5.3. Rescaling

## 5.4. Transformação

In [None]:
df5['preco'] = np.log1p(df5['preco'])

# 6.0. FEATURE SELECTION

In [None]:
df6 = df5.copy()

## 6.1. Manual Selection

## 6.2. Feature Importance

In [None]:
# # model
# forest = en.ExtraTreesRegressor( n_estimators=250, random_state=0, n_jobs=-1 )

# # training
# x_train_fselection = df6.drop( ['preco'], axis=1 )
# y_train_fselection = df6['preco'].values
# forest.fit( x_train_fselection, y_train_fselection )

In [None]:
# importances = forest.feature_importances_
# std = np.std( [tree.feature_importances_ for tree in forest.estimators_], axis=0 )
# indices = np.argsort( importances )[::-1]

# # print the feature ranking
# df = pd.DataFrame()

# print( 'Feature Ranking:\n' )
# for i, j in zip( x_train_fselection,forest.feature_importances_ ):
#     aux = pd.DataFrame( {'feature': i, 'importance': j}, index=[0] )
#     df = pd.concat( [df, aux], axis=0 )
    
# print( df.sort_values( 'importance', ascending=False ) ) 

# # plot the impurity-based feature importances of the forest
# plt.figure(figsize=(10,5))
# plt.title( 'Feature importances' )
# plt.bar( range( x_train_fselection.shape[1] ), importances[indices], color='r', yerr=std[indices], align='center' )
# plt.xticks( range(x_train_fselection.shape[1]), indices )
# plt.xlim( [-1, x_train_fselection.shape[1]] )
# plt.show()

# 7.0. MACHINE LEARNING MODELLING

In [None]:
df7 = df6.copy()

In [None]:
X = df7.drop(columns=['preco'])
Y = df7['preco'].copy()

X_train, X_val, y_train, y_val = ms.train_test_split( X, Y, test_size=0.2, random_state=42 )

## 7.1. Linear Regression

In [None]:
# model training
lr = LinearRegression().fit( X_train, y_train )

# prediction
yhat_lr = lr.predict( X_val )

# performance (error)
lr_results = ml_error( 'Linear Regression', np.expm1( y_val ) , np.expm1( yhat_lr ) )
lr_results

### 7.1.1. Cross Validation

## 7.3. Random Forest Regressor

In [None]:
# model
rf = RandomForestRegressor( n_estimators=150, n_jobs=-1, random_state=42 ).fit( X_train, y_train )

# prediction
yhat_rf = rf.predict( X_val )

# performance
rf_results = ml_error( 'Random Forest Regressor', np.expm1(y_val), np.expm1(yhat_rf) )
rf_results

### 7.3.1. Cross Validation

## 7.5. XGBoost Regressor

In [None]:
# model
model_xgb = xgb.XGBRegressor( n_estimators=400 ).fit( X_train, y_train )

# prediction
yhat_xgb = model_xgb.predict( X_val )

# performance
model_xgb_results = ml_error( 'XGBoost Regressor',  np.expm1(y_val), np.expm1(yhat_xgb) )
model_xgb_results

### 7.5.1. Cross Validation

## 7.6. LightGBM

In [None]:
# model
model_lgb = lgb.LGBMRegressor(n_jobs=-1, random_state=42, subsample_freq=1, max_bin=500,
                              n_estimators=1181, max_depth=10, learning_rate=0.01027, 
                              num_leaves=128, min_child_samples=1, subsample=0.92676, 
                              colsample_bytree=0.68369).fit( X_train, y_train )

# prediction
yhat_lgb = model_lgb.predict( X_val )

# performance
model_lgb_results = ml_error( 'LightGBM Regressor',  np.expm1(y_val), np.expm1(yhat_lgb) )
model_lgb_results

### 7.6.1. Cross Validation

In [None]:
# model_lgb = lgb.LGBMRegressor(n_jobs=-1, random_state=42, subsample_freq=1, max_bin=500,
#                               n_estimators=1181, max_depth=10, learning_rate=0.01027, 
#                               num_leaves=128, min_child_samples=1, subsample=0.92676, 
#                               colsample_bytree=0.68369)
# x = pd.concat([X_train, X_val])
# y = pd.concat([y_train, y_val])
  
# kf = KFold(n_splits=5, random_state=42, shuffle=True)    
# mae_cv = cross_val_score(model_lgb, x, np.expm1(y), scoring='neg_mean_absolute_error', cv=kf.split(x), n_jobs=-1, verbose=1)*-1
# np.mean(mae_cv)

# 8.0 Fine Tuning

## 8.1. Bayesian

In [None]:
# # Search space for hyper parameters
# search_space = [Integer( 100, 1500, name = 'n_estimators'), 
#                 Integer(1, 20, name = 'max_depth'), 
#                 Real(0.001, 0.1, 'log-uniform', name = 'learning_rate'),
#                 Integer(2, 128, name = 'num_leaves'),
#                 Integer(1, 100, name = 'min_child_samples'),
#                 Real(0.05, 1.0, name = 'subsample'),
#                 Real(0.15, 1.0, name = 'colsample_bytree')]

# # model definition
# lgbm_model = LGBMRegressor(n_jobs=-1, random_state=42, subsample_freq=1)

# # applying cross-validation into tunning
# @use_named_args(search_space)
# def model_eval( **params ):
	
#     lgbm_model.set_params(**params)
#     print(lgbm_model)
#     kf = KFold(n_splits=10, random_state=42, shuffle=True)  
#     ft_result = cross_val_score(lgbm_model, x, np.expm1(y), scoring='neg_mean_absolute_error', cv=kf.split(x), n_jobs=-1, verbose=1)*-1
#     return np.mean(ft_result)

# # create checkpoints
# checkpoint_callback = CheckpointSaver('lgbm_ft.pkl')

# # return results and run bayesian optimize
# result = gp_minimize( model_eval, search_space, n_calls = 300, 
#                       n_initial_points = 10, verbose=True, n_jobs=-1, 
#                       random_state= 42, callback=[checkpoint_callback])
# result

In [None]:
# result.x

In [None]:
# # final model

# # model
# model_lgb = lgb.LGBMRegressor(n_jobs=-1, random_state=42, subsample_freq=1, max_bin=500,
#                               n_estimators=1181, max_depth=10, learning_rate=0.01027, 
#                               num_leaves=128, min_child_samples=1, subsample=0.92676, 
#                               colsample_bytree=0.68369).fit( X_train, y_train )

# # prediction
# yhat_lgb = model_lgb.predict( X_val )

# # performance
# model_lgb_results = ml_error( 'LightGBM Regressor',  np.expm1(y_val), np.expm1(yhat_lgb) )
# model_lgb_results

## GridSearchCV

In [None]:
# from sklearn.model_selection import GridSearchCV

In [None]:
# param={'max_bin':[150,200,250],
#         'num_leaves':[20,25,30],
#         'min_sum_hessian_in_leaf':[0.01,0.1],
#         'min_data_in_leaf':[10,15,20],
#         #'bagging_fraction':[1,2],
#         #'bagging_freq':[1,2],
#         'feature_fraction':[0.8,0.4]}

# pre_param={'max_bin':[150,200,250]}

# kf=KFold(n_splits=5,shuffle=True,random_state=42)

In [None]:
# gs = GridSearchCV(model_lgb, param_grid=param, cv=kf.split(X), scoring='neg_mean_absolute_error')
# gs.fit(X,np.expm1(Y))

# maet = mean_absolute_error(np.expm1(y_train), gs.predict(X_train))
# mae = mean_absolute_error(np.expm1(y_val), gs.predict(X_val))

# print(f'MAE train set: {maet}, MAE test set: {mae}')

In [None]:
# gs.best_params_

# 9.0. Production Model

In [None]:
# # join dfs
# Full_X_train = pd.concat( [X_train, X_val], axis=0 )
# Full_Y_train = pd.concat( [y_train, y_val], axis=0 )

# # model
# model_lgb_full = lgb.LGBMRegressor( n_jobs=-1, random_state=42, subsample_freq=1, max_bin=500,
#                               n_estimators=1181, max_depth=10, learning_rate=0.01027, 
#                               num_leaves=128, min_child_samples=1, subsample=0.92676, 
#                               colsample_bytree=0.68369 ).fit( Full_X_train, Full_Y_train )
# # saving trained model
# pickle.dump( model_lgb_full, open( '/Users/mathe/Repos_ComunidadeDS/mobility_cars_hackday/mobility_cars_lgb.pkl', 'wb' ) )

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3039c7b4-bcc6-4ef2-8633-383a22ac2c90' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>