In [1]:
import pandas as pd
import numpy as np 
from scipy.stats import boxcox
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
from statsmodels.stats.diagnostic import het_breuschpagan
from linearmodels.panel import PanelOLS

In [3]:
data = pd.read_csv(r'E:\COOLYEAH\smt_5\EKT\Ekonometrika-DataPanel\Data\Inclusive Growth Determinants.csv')
data.head()

Unnamed: 0,Province,Year,GRDP Capita,GRDP Employed,Health Complaint,Formal Sector,Unemployed Percent,HDI,Gov Spending,Gov CapEx,Poverty,Gini,Decent Sanitation,Clean Water,HS Graduation,Agriculture,Life Expectancy,GRDP Nominal,Population Thousand
0,ACEH,2015,22524.31,53.9906,27.92,0.395,8.83,69.45,37549.69,8553.03,17.095,0.3365,54.68,61.23,68.16,37598.849177,69.54,129092700.0,5018.7
1,ACEH,2016,22835.29,56.680773,25.78,0.4283,7.85,70.0,42180.53,9870.07,16.58,0.337,62.68,63.31,74.46,40218.310849,69.565,136843800.0,5096.2
2,ACEH,2017,23362.9,56.179526,24.85,0.4042,6.98,70.6,43580.96,8920.15,16.405,0.329,63.38,64.85,70.64,43363.35429,69.585,145806900.0,5189.5
3,ACEH,2018,24013.79,56.608468,30.18,0.4056,6.44,71.19,38767.1,6320.93,15.825,0.3215,67.09,66.48,70.68,46365.383165,69.7,155911000.0,5281.3
4,ACEH,2019,24842.3,55.897635,29.33,0.4288,5.825,71.9,45620.69,8743.71,15.165,0.3205,73.16,85.81,69.96,48438.86287,69.915,164163000.0,5371.5


In [4]:
print(data.isnull().sum())  # Melihat jumlah missing values di setiap kolom

Province               0
Year                   0
GRDP Capita            0
GRDP Employed          0
Health Complaint       0
Formal Sector          0
Unemployed Percent     0
HDI                    0
Gov Spending           0
Gov CapEx              0
Poverty                0
Gini                   0
Decent Sanitation      0
Clean Water            0
HS Graduation          0
Agriculture            0
Life Expectancy        0
GRDP Nominal           0
Population Thousand    0
dtype: int64


In [9]:
# Menampilkan daftar REGION untuk memastikan "INDONESIA" tidak ada
print(data.columns)

Index(['GRDP Capita', 'GRDP Employed', 'Health Complaint', 'Formal Sector',
       'Unemployed Percent', 'HDI', 'Gov Spending', 'Gov CapEx', 'Poverty',
       'Gini', 'Decent Sanitation', 'Clean Water', 'HS Graduation',
       'Agriculture', 'Life Expectancy', 'GRDP Nominal',
       'Population Thousand'],
      dtype='object')


In [6]:
data = data.set_index(['Province', 'Year'])

In [32]:
# Pisahkan variabel independen (X) dan dependen (y)
X = data[['GRDP Nominal', 'GRDP Employed', 'Formal Sector', 'HDI', 'Gov Spending', 'Gov CapEx', 'Poverty',
       'Gini', 'Life Expectancy',
       'Population Thousand']]
y = data['GRDP Capita']

# Tambahkan konstanta untuk model regresi
X = sm.add_constant(X)

# Regresi OLS
model = sm.OLS(y, X).fit()

# Uji Breusch-Pagan
bp_test = het_breuschpagan(model.resid, X)
labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
results = dict(zip(labels, bp_test))

# Output hasil
print("Hasil Uji Breusch-Pagan:")
for key, value in results.items():
    print(f"{key}: {value}")

Hasil Uji Breusch-Pagan:
Lagrange multiplier statistic: 89.60315353726298
p-value: 6.419633350792625e-15
f-value: 13.706434023223078
f p-value: 8.340996516368948e-19


In [20]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           GRDP Nominal   R-squared:                       0.937
Model:                            OLS   Adj. R-squared:                  0.934
Method:                 Least Squares   F-statistic:                     338.2
Date:                Fri, 29 Nov 2024   Prob (F-statistic):          2.55e-130
Time:                        18:00:57   Log-Likelihood:                -4824.2
No. Observations:                 238   AIC:                             9670.
Df Residuals:                     227   BIC:                             9709.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                1.563e+09   4

In [26]:
# Menambahkan konstanta kecil jika ada nilai nol atau negatif
if (data['GRDP Capita'] <= 0).any():
    data['GRDP Capita'] += abs(data['GRDP Capita'].min()) + 1

# Transformasi Box-Cox pada kolom 'salary'
data['GDRP Capita_boxcox'], lambda_GDRP = boxcox(data['GRDP Capita'])

# Menampilkan nilai lambda terbaik
print(f"Lambda terbaik untuk transformasi Box-Cox: {lambda_GDRP}")

# Melihat hasil transformasi
print(data[['GRDP Capita', 'GDRP Capita_boxcox']].head())

Lambda terbaik untuk transformasi Box-Cox: -0.5474986745375913
               GRDP Capita  GDRP Capita_boxcox
Province Year                                 
ACEH     2015     22524.31            1.818928
         2016     22835.29            1.818985
         2017     23362.90            1.819078
         2018     24013.79            1.819188
         2019     24842.30            1.819323


In [43]:
# Pisahkan variabel independen (X) dan dependen (y)
X = data[['GRDP Nominal', 'GRDP Employed', 'Formal Sector',
       'Unemployed Percent', 'HDI', 'Gov Spending', 'Poverty',
       'Gini', 'Decent Sanitation', 'Clean Water',

       'Population Thousand']]
y = data['GDRP Capita_boxcox']

# Tambahkan konstanta untuk model regresi
X = sm.add_constant(X)

# Regresi OLS
model_boxcox = sm.OLS(y, X).fit()

# Uji Breusch-Pagan
bp_test = het_breuschpagan(model_boxcox.resid, X)
labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
results = dict(zip(labels, bp_test))

# Output hasil
print("Hasil Uji Breusch-Pagan:")
for key, value in results.items():
    print(f"{key}: {value}")

Hasil Uji Breusch-Pagan:
Lagrange multiplier statistic: 58.51852045379889
p-value: 1.7450388532060648e-08
f-value: 6.698683368839128
f p-value: 1.0725168616197257e-09


In [38]:
print(model_boxcox.summary())

                            OLS Regression Results                            
Dep. Variable:     GDRP Capita_boxcox   R-squared:                       0.851
Model:                            OLS   Adj. R-squared:                  0.842
Method:                 Least Squares   F-statistic:                     98.35
Date:                Fri, 29 Nov 2024   Prob (F-statistic):           8.30e-85
Time:                        19:27:08   Log-Likelihood:                 1411.8
No. Observations:                 238   AIC:                            -2796.
Df Residuals:                     224   BIC:                            -2747.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                   1.8197    