In [4]:
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import pandas as pd
import numpy as np

df = pd.read_excel('df_filtred_with_coordinates.xlsx')
df = df.loc[(df['preco']>49999)&(df['area util por m²']>9)]

In [5]:
# 1 - Alta
# 2 - Média
# 3 - baixa

classe_bairros = {
    'Pontal': 1,
    'Nossa Senhora da Vitória': 2,
    'São Francisco': 1,
    'São Sebastião': 2,
    'Jardim Savóia': 2,
    'Conquista': 2,
    'Centro': 1,
    'Olivença': 2,
    'Boa Vista': 3,
    'Jardim Atlântico': 1,
    'Cidade Nova': 2,
    'Ilhéus II': 2,
    'São Domingos': 2,
    'Teresópolis': 3,
    'Malhado': 2,
    'Nelson Costa': 2,
    'Esperança': 2,
    'Aritaguá': 3,
    'Hernani Sá': 2,
    'Iguape': 3
}

df['classe'] = df['bairro'].map(classe_bairros)

In [6]:
for col in df.columns:
    df[col] = df[col].replace(-1, 0)

df['log_preco'] = np.log(df['preco'])
df['log_area_util'] = np.log(df['area util por m²'])
df['log_distance_ceps'] = np.log(df['distance_between_ceps_in_meters'])

df.drop(columns=['custo condominio','iptu', 'preco', 'area util por m²', 'distance_between_ceps_in_meters'], inplace=True)

# Somente dos Bairros de Ns. Vitoria e São Francisco

## Centro

In [4]:
somente_nsrv_e_sf_p_centro = df.loc[
    (df['Zona'] == 'Sul') &
    (df['bairro'].isin(['Nossa Senhora da Vitória', 'São Francisco'])) &
    (df['cep_dest'] == 'Centro')
].drop(['rua/avenida', 'bairro', 'cidade', 'estado', 'cep', 'Zona', 'cep_dest', 'cep_origin', 'latitude_origin', 'longitude_origin', 'latitude_destino', 'longitude_destino'], axis=1).reset_index(drop=True)

y = somente_nsrv_e_sf_p_centro['preco']
x = somente_nsrv_e_sf_p_centro.drop(['preco'], axis=1)

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  preco   R-squared:                       0.804
Model:                            OLS   Adj. R-squared:                  0.796
Method:                 Least Squares   F-statistic:                     107.4
Date:                Sat, 07 Sep 2024   Prob (F-statistic):          2.00e-204
Time:                        17:11:56   Log-Likelihood:                -8945.7
No. Observations:                 655   AIC:                         1.794e+04
Df Residuals:                     630   BIC:                         1.805e+04
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

## BA-001

In [5]:
somente_nsrv_e_sf_p_ba001 = df.loc[
    (df['Zona'] == 'Sul') &
    (df['bairro'].isin(['Nossa Senhora da Vitória', 'São Francisco'])) &
    (df['cep_dest'] == 'BA-001')
].drop(['rua/avenida', 'bairro', 'cidade', 'estado', 'cep', 'Zona', 'cep_dest', 'cep_origin', 'latitude_origin', 'longitude_origin', 'latitude_destino', 'longitude_destino'], axis=1).reset_index(drop=True)

y = somente_nsrv_e_sf_p_ba001['preco']
x = somente_nsrv_e_sf_p_ba001.drop(['preco'], axis=1)

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  preco   R-squared:                       0.803
Model:                            OLS   Adj. R-squared:                  0.796
Method:                 Least Squares   F-statistic:                     107.0
Date:                Sat, 07 Sep 2024   Prob (F-statistic):          4.35e-204
Time:                        17:11:56   Log-Likelihood:                -8946.6
No. Observations:                 655   AIC:                         1.794e+04
Df Residuals:                     630   BIC:                         1.806e+04
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

## Ponte Jorge Amado

In [6]:
somente_nsrv_e_sf_p_jorge_amado = df.loc[
    (df['Zona'] == 'Sul') &
    (df['bairro'].isin(['Nossa Senhora da Vitória', 'São Francisco'])) &
    (df['cep_dest'] == 'Ponte Jorge Amado')
].drop(['rua/avenida', 'bairro', 'cidade', 'estado', 'cep', 'Zona', 'cep_dest', 'cep_origin', 'latitude_origin', 'longitude_origin', 'latitude_destino', 'longitude_destino'], axis=1).reset_index(drop=True)

y = somente_nsrv_e_sf_p_jorge_amado['preco']
x = somente_nsrv_e_sf_p_jorge_amado.drop(['preco'], axis=1)

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  preco   R-squared:                       0.803
Model:                            OLS   Adj. R-squared:                  0.796
Method:                 Least Squares   F-statistic:                     107.3
Date:                Sat, 07 Sep 2024   Prob (F-statistic):          2.42e-204
Time:                        17:11:56   Log-Likelihood:                -8945.9
No. Observations:                 655   AIC:                         1.794e+04
Df Residuals:                     630   BIC:                         1.805e+04
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

# Todos os Bairros de Ilheus

## Centro

In [7]:
tds_bairros_centro = df.loc[
    (df['cep_dest'] == 'Centro')
].drop(['rua/avenida', 'bairro', 'cidade', 'estado', 'cep', 'Zona', 'cep_dest', 'cep_origin', 'latitude_origin', 'longitude_origin', 'latitude_destino', 'longitude_destino'], axis=1).reset_index(drop=True)

y = tds_bairros_centro['preco']
x = tds_bairros_centro.drop(['preco'], axis=1)

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  preco   R-squared:                       0.422
Model:                            OLS   Adj. R-squared:                  0.410
Method:                 Least Squares   F-statistic:                     35.03
Date:                Sat, 07 Sep 2024   Prob (F-statistic):          5.79e-124
Time:                        17:11:56   Log-Likelihood:                -18428.
No. Observations:                1226   AIC:                         3.691e+04
Df Residuals:                    1200   BIC:                         3.704e+04
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

## BA-001

In [8]:
tds_bairros_ba001 = df.loc[
    (df['cep_dest'] == 'BA-001')
].drop(['rua/avenida', 'bairro', 'cidade', 'estado', 'cep', 'Zona', 'cep_dest', 'cep_origin', 'latitude_origin', 'longitude_origin', 'latitude_destino', 'longitude_destino'], axis=1).reset_index(drop=True)

y = tds_bairros_ba001['preco']
x = tds_bairros_ba001.drop(['preco'], axis=1)

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  preco   R-squared:                       0.423
Model:                            OLS   Adj. R-squared:                  0.411
Method:                 Least Squares   F-statistic:                     35.14
Date:                Sat, 07 Sep 2024   Prob (F-statistic):          2.68e-124
Time:                        17:11:56   Log-Likelihood:                -18427.
No. Observations:                1226   AIC:                         3.691e+04
Df Residuals:                    1200   BIC:                         3.704e+04
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

## Ponte Jorge Amado

In [9]:
tds_bairros_jorge_amado = df.loc[
    (df['cep_dest'] == 'Ponte Jorge Amado')
].drop(['rua/avenida', 'bairro', 'cidade', 'estado', 'cep', 'Zona', 'cep_dest', 'cep_origin', 'latitude_origin', 'longitude_origin', 'latitude_destino', 'longitude_destino'], axis=1).reset_index(drop=True)

y = tds_bairros_jorge_amado['preco']
x = tds_bairros_jorge_amado.drop(['preco'], axis=1)

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  preco   R-squared:                       0.426
Model:                            OLS   Adj. R-squared:                  0.414
Method:                 Least Squares   F-statistic:                     35.59
Date:                Sat, 07 Sep 2024   Prob (F-statistic):          1.15e-125
Time:                        17:11:56   Log-Likelihood:                -18424.
No. Observations:                1226   AIC:                         3.690e+04
Df Residuals:                    1200   BIC:                         3.703e+04
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

In [10]:
somente_nsrv_e_sf_p_jorge_amado.to_excel('somente_nsrv_e_sf_p_jorge_amado.xlsx', index=False)
somente_nsrv_e_sf_p_centro.to_excel('somente_nsrv_e_sf_p_centro.xlsx', index=False)
somente_nsrv_e_sf_p_ba001.to_excel('somente_nsrv_e_sf_p_ba001.xlsx', index=False)
tds_bairros_jorge_amado.to_excel('tds_bairros_p_jorge_amado.xlsx', index=False)
tds_bairros_centro.to_excel('tds_bairros_p_centro.xlsx', index=False)
tds_bairros_ba001.to_excel('tds_bairros_p_ba001.xlsx', index=False)