# Linear Regression

## Import libraries and data

In [19]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns
from seaborn import regplot

import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.stats.multicomp as multi

import scipy
from scipy import stats
from scipy.stats import pearsonr

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

import researchpy as rp

import plotly.express as px

In [20]:
df = pd.read_csv('data.csv')

In [21]:
df.head()

Unnamed: 0,patient_id,appointment_id,gender,scheduled_day,appointment_day,age,neighbourhood,scholarship,hipertension,diabetes,alcoholism,handicap,sms,show_up
0,29872499824296,5642903,0,2016-04-29 18:38:08+00:00,2016-04-29 00:00:00+00:00,62,JARDIM DA PENHA,0,1,0,0,0,0,1
1,558997776694438,5642503,1,2016-04-29 16:08:27+00:00,2016-04-29 00:00:00+00:00,56,JARDIM DA PENHA,0,0,0,0,0,0,1
2,4262962299951,5642549,0,2016-04-29 16:19:04+00:00,2016-04-29 00:00:00+00:00,62,MATA DA PRAIA,0,0,0,0,0,0,1
3,867951213174,5642828,0,2016-04-29 17:29:31+00:00,2016-04-29 00:00:00+00:00,8,PONTAL DE CAMBURI,0,0,0,0,0,0,1
4,8841186448183,5642494,0,2016-04-29 16:07:23+00:00,2016-04-29 00:00:00+00:00,56,JARDIM DA PENHA,0,1,1,0,0,0,1


## Implement linear regression model

In [22]:
X = df.drop(columns=['patient_id', 'appointment_id', 'scheduled_day', 'appointment_day', 'neighbourhood', 'show_up'])

In [23]:
Y = df.show_up

### Constant variable

In [38]:
X = sm.add_constant(X)

In [39]:
model_constant = sm.OLS(Y,X)

In [40]:
results_constant = model_constant.fit()

In [41]:
results_constant.summary()

0,1,2,3
Dep. Variable:,show_up,R-squared:,0.021
Model:,OLS,Adj. R-squared:,0.02
Method:,Least Squares,F-statistic:,289.9
Date:,"Fri, 24 Jun 2022",Prob (F-statistic):,0.0
Time:,22:17:19,Log-Likelihood:,-54800.0
No. Observations:,110519,AIC:,109600.0
Df Residuals:,110510,BIC:,109700.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.7971,0.003,286.544,0.000,0.792,0.803
gender,0.0026,0.003,1.013,0.311,-0.002,0.008
age,0.0010,6.1e-05,16.797,0.000,0.001,0.001
scholarship,-0.0309,0.004,-7.585,0.000,-0.039,-0.023
hipertension,0.0094,0.004,2.516,0.012,0.002,0.017
diabetes,-0.0127,0.005,-2.459,0.014,-0.023,-0.003
alcoholism,-0.0210,0.007,-2.972,0.003,-0.035,-0.007
handicap,-0.0043,0.007,-0.582,0.560,-0.019,0.010
sms,-0.1095,0.003,-42.712,0.000,-0.115,-0.105

0,1,2,3
Omnibus:,22029.026,Durbin-Watson:,1.803
Prob(Omnibus):,0.0,Jarque-Bera (JB):,38528.048
Skew:,-1.442,Prob(JB):,0.0
Kurtosis:,3.211,Cond. No.,272.0


### No constant variable

In [45]:
X = df.drop(columns=['patient_id', 'appointment_id', 'scheduled_day', 'appointment_day', 'neighbourhood', 'show_up'])

In [46]:
Y = df.show_up

In [47]:
model_no_constant = sm.OLS(Y,X)

In [48]:
results_no_constant = model_no_constant.fit()

In [49]:
results_no_constant.summary()

0,1,2,3
Dep. Variable:,show_up,R-squared (uncentered):,0.655
Model:,OLS,Adj. R-squared (uncentered):,0.655
Method:,Least Squares,F-statistic:,26260.0
Date:,"Fri, 24 Jun 2022",Prob (F-statistic):,0.0
Time:,22:19:59,Log-Likelihood:,-85502.0
No. Observations:,110519,AIC:,171000.0
Df Residuals:,110511,BIC:,171100.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
gender,0.3217,0.003,105.541,0.000,0.316,0.328
age,0.0137,5.54e-05,247.459,0.000,0.014,0.014
scholarship,0.2782,0.005,53.655,0.000,0.268,0.288
hipertension,-0.1379,0.005,-28.373,0.000,-0.147,-0.128
diabetes,-0.0539,0.007,-7.907,0.000,-0.067,-0.041
alcoholism,-0.0991,0.009,-10.626,0.000,-0.117,-0.081
handicap,0.0124,0.010,1.261,0.207,-0.007,0.032
sms,0.1123,0.003,34.786,0.000,0.106,0.119

0,1,2,3
Omnibus:,7327.709,Durbin-Watson:,1.527
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8896.632
Skew:,-0.694,Prob(JB):,0.0
Kurtosis:,2.943,Cond. No.,272.0
