In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

# 1. Import data

In [2]:
df = pd.read_csv('regresi-data.csv')

In [3]:
df.head()

Unnamed: 0,X1.1,X1.2,X1.3,X1.4,X1.5,X1,X2.1,X2.2,X2.3,X2.4,...,X2,Y1.1,Y1.2,Y1.3,Y1.4,Y1.5,Y,RES_1,e2,RES_2
0,4,2,4,4,4,18,4,4,4,4,...,20,3,3,4,2,4,16,-1.040131,1.081873,-3.307081
1,5,4,5,4,4,22,5,5,5,5,...,25,5,5,5,3,4,22,1.673318,2.799993,-1.097339
2,4,5,4,3,1,17,4,3,3,3,...,18,3,3,3,1,1,11,-5.080004,25.806436,21.216757
3,4,5,4,4,5,22,5,3,5,5,...,23,4,5,5,3,3,20,0.042624,0.001817,-4.103035
4,5,4,4,2,4,19,4,4,4,4,...,21,4,4,4,5,4,21,3.184394,10.140368,5.84838


# 2. Pendugaan Parameter (Modeling)

In [4]:
X = df[['X1', 'X2']]
y = df['Y']
model = sm.OLS(y, sm.add_constant(X)).fit()

# 3. Uji Asumsi Klasik

## 3.1 Uji Homoskedastisitas
Kita akan menggunakan uji Breusch Pagan

In [5]:
from statsmodels.stats.diagnostic import het_breuschpagan

In [6]:
_, p_value, _, _ = het_breuschpagan(model.resid, X)

In [7]:
"""
Karena pvalue (9.326813654447332e-06) > alpha (5%) maka asumsi homoskedastisitas terpenuhi
"""
p_value

9.326813654447332e-06

## 3.2 Uji Non-Autokorelasi
kita akan menggunakan uji durbin watson

In [8]:
from statsmodels.stats.stattools import durbin_watson

In [9]:
dw_statistic = durbin_watson(model.resid)

In [10]:
"""
karena dw_statistic berada diantara 1.5 dan 2.5 maka asumsi non-autokorelasi terpenuhi
"""
dw_statistic

2.2589635152651244

## 3.3 Uji Normalitas
Kita akan menggunakan uji kolmogorov smirnov.

In [11]:
from scipy.stats import kstest, norm

In [12]:
ks_statistic, p_value = kstest(model.resid, 'norm')

In [13]:
"""
karena pvalue (0.07) > alpha (0.05) maka asumsi normalitas sisaan terpenuhi
"""
p_value

0.007661037699126738

## 3.4 Uji Non-Multikolinieritas
Kita akan menggunakan VIF

In [14]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [15]:
# Tambahkan kolom konstanta untuk menghitung VIF
X = sm.add_constant(X)

# Inisialisasi list untuk menyimpan hasil VIF
vif_data = pd.DataFrame()
vif_data["Variable"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

In [16]:
"""
karena semua nilai VIF < 10 maka asumsi non-multikolinieritas terpenuhi 
"""
vif_data

Unnamed: 0,Variable,VIF
0,const,51.603337
1,X1,1.531493
2,X2,1.531493


# 4. Goodness of fit

In [17]:
## Uji Simultan
# Uji simultan (F-Test)
f_statistic = model.fvalue
p_value = model.f_pvalue

if p_value < 0.05:
    print("Setidaknya satu variabel independen signifikan secara simultan.")
else:
    print("Tidak ada yang signifikan secara simultan.")

Setidaknya satu variabel independen signifikan secara simultan.


In [18]:
## Uji Parsial
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.567
Model:                            OLS   Adj. R-squared:                  0.552
Method:                 Least Squares   F-statistic:                     37.38
Date:                Tue, 17 Oct 2023   Prob (F-statistic):           4.25e-11
Time:                        00:12:27   Log-Likelihood:                -128.60
No. Observations:                  60   AIC:                             263.2
Df Residuals:                      57   BIC:                             269.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          2.7123      1.963      1.381      0.1

In [19]:
## R-Square
r_square = model.rsquared
print("R-Square:", r_square)

R-Square: 0.5674188566834188
