# REGRESSÃO LINEAR MÚLTIPLA

## Importando as bibliotecas

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

## Importando a base de dados

In [2]:
limite = pd.read_excel("/content/Cópia de Limite_Credito.xlsx")
limite.head()

Unnamed: 0,Idade,RendimentoTotal,Salario,LimitedeCreditoImediato,LimitedoChequeEspecial,Escolaridade
0,70,10736,5214,400,500,Fundamental_Medio
1,68,5000,5000,380,600,Fundamental_Medio
2,44,5800,5800,500,800,Fundamental_Medio
3,72,4300,4300,2000,1000,Fundamental_Medio
4,75,4400,4400,3000,1000,Fundamental_Medio


## Transformando as varíaveis categóricas em variáveis indicadoras

In [3]:
limite = pd.get_dummies(limite, prefix=['Escolaridade'])
limite.head()

Unnamed: 0,Idade,RendimentoTotal,Salario,LimitedeCreditoImediato,LimitedoChequeEspecial,Escolaridade_Fundamental_Medio,Escolaridade_Superior_Pos
0,70,10736,5214,400,500,1,0
1,68,5000,5000,380,600,1,0
2,44,5800,5800,500,800,1,0
3,72,4300,4300,2000,1000,1,0
4,75,4400,4400,3000,1000,1,0


## Verificando a correlação entre as variáveis

In [4]:
limite.corr()

Unnamed: 0,Idade,RendimentoTotal,Salario,LimitedeCreditoImediato,LimitedoChequeEspecial,Escolaridade_Fundamental_Medio,Escolaridade_Superior_Pos
Idade,1.0,0.066396,0.063227,0.064149,0.075883,-0.110892,0.110892
RendimentoTotal,0.066396,1.0,0.960551,0.031041,0.85812,-0.396191,0.396191
Salario,0.063227,0.960551,1.0,0.081792,0.888839,-0.475479,0.475479
LimitedeCreditoImediato,0.064149,0.031041,0.081792,1.0,0.22699,-0.259433,0.259433
LimitedoChequeEspecial,0.075883,0.85812,0.888839,0.22699,1.0,-0.59831,0.59831
Escolaridade_Fundamental_Medio,-0.110892,-0.396191,-0.475479,-0.259433,-0.59831,1.0,-1.0
Escolaridade_Superior_Pos,0.110892,0.396191,0.475479,0.259433,0.59831,-1.0,1.0


## Criando e analisando os resultados da OLS

In [6]:
import statsmodels.formula.api as smf

function = 'LimitedoChequeEspecial~Idade+RendimentoTotal+Salario+LimitedeCreditoImediato+Escolaridade_Fundamental_Medio+Escolaridade_Superior_Pos'
model = smf.ols(formula=function, data=limite).fit()
print(model.summary())

                              OLS Regression Results                              
Dep. Variable:     LimitedoChequeEspecial   R-squared:                       0.848
Model:                                OLS   Adj. R-squared:                  0.830
Method:                     Least Squares   F-statistic:                     46.89
Date:                    Tue, 17 Oct 2023   Prob (F-statistic):           4.07e-16
Time:                            20:36:12   Log-Likelihood:                -441.00
No. Observations:                      48   AIC:                             894.0
Df Residuals:                          42   BIC:                             905.2
Df Model:                               5                                         
Covariance Type:                nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------

## Removendo a variável Idade que possui o maior P-value

In [7]:
function = 'LimitedoChequeEspecial~RendimentoTotal+Salario+LimitedeCreditoImediato+Escolaridade_Fundamental_Medio+Escolaridade_Superior_Pos'
model = smf.ols(formula=function, data=limite).fit()
print(model.summary())

                              OLS Regression Results                              
Dep. Variable:     LimitedoChequeEspecial   R-squared:                       0.848
Model:                                OLS   Adj. R-squared:                  0.834
Method:                     Least Squares   F-statistic:                     59.99
Date:                    Tue, 17 Oct 2023   Prob (F-statistic):           4.92e-17
Time:                            20:38:24   Log-Likelihood:                -441.01
No. Observations:                      48   AIC:                             892.0
Df Residuals:                          43   BIC:                             901.4
Df Model:                               4                                         
Covariance Type:                nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------

## Removendo o Intercepto, que possui o maior P-value

In [8]:
function = 'LimitedoChequeEspecial~RendimentoTotal+Salario+LimitedeCreditoImediato+Escolaridade_Fundamental_Medio+Escolaridade_Superior_Pos - 1'
model = smf.ols(formula=function, data=limite).fit()
print(model.summary())

                              OLS Regression Results                              
Dep. Variable:     LimitedoChequeEspecial   R-squared:                       0.848
Model:                                OLS   Adj. R-squared:                  0.834
Method:                     Least Squares   F-statistic:                     59.99
Date:                    Tue, 17 Oct 2023   Prob (F-statistic):           4.92e-17
Time:                            20:46:02   Log-Likelihood:                -441.01
No. Observations:                      48   AIC:                             892.0
Df Residuals:                          43   BIC:                             901.4
Df Model:                               4                                         
Covariance Type:                nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------

## Removendo a variável Escolaridade_Superior_Pos que possui o maior P-value

In [9]:
function = 'LimitedoChequeEspecial~RendimentoTotal+Salario+LimitedeCreditoImediato+Escolaridade_Fundamental_Medio - 1'
model = smf.ols(formula=function, data=limite).fit()
print(model.summary())

                                   OLS Regression Results                                  
Dep. Variable:     LimitedoChequeEspecial   R-squared (uncentered):                   0.918
Model:                                OLS   Adj. R-squared (uncentered):              0.911
Method:                     Least Squares   F-statistic:                              123.2
Date:                    Tue, 17 Oct 2023   Prob (F-statistic):                    2.66e-23
Time:                            20:47:39   Log-Likelihood:                         -441.06
No. Observations:                      48   AIC:                                      890.1
Df Residuals:                          44   BIC:                                      897.6
Df Model:                               4                                                  
Covariance Type:                nonrobust                                                  
                                     coef    std err          t      P>|t|      

## Removendo a variável RendimentoTotal que possui o maior P-value

In [10]:
function = 'LimitedoChequeEspecial~Salario+LimitedeCreditoImediato+Escolaridade_Fundamental_Medio - 1'
model = smf.ols(formula=function, data=limite).fit()
print(model.summary())

                                   OLS Regression Results                                  
Dep. Variable:     LimitedoChequeEspecial   R-squared (uncentered):                   0.915
Model:                                OLS   Adj. R-squared (uncentered):              0.909
Method:                     Least Squares   F-statistic:                              160.9
Date:                    Tue, 17 Oct 2023   Prob (F-statistic):                    4.57e-24
Time:                            20:48:58   Log-Likelihood:                         -442.01
No. Observations:                      48   AIC:                                      890.0
Df Residuals:                          45   BIC:                                      895.6
Df Model:                               3                                                  
Covariance Type:                nonrobust                                                  
                                     coef    std err          t      P>|t|      