In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from feature_engine.selection import RecursiveFeatureElimination
from feature_engine.selection import SmartCorrelatedSelection
from feature_engine.selection import DropConstantFeatures
from sklearn.preprocessing import StandardScaler

# Variable Selection for Global Linear Regression

In [2]:
indikator = pd.read_csv('../dataset/indikator/DE_INDIKATOR3.csv', sep = ';')
auxiliary = pd.read_csv('../dataset/auxiliary/auxiliary.csv')

In [3]:
kecamatan = auxiliary['Kecamatan']
auxiliary = auxiliary.dropna(axis = 1)

column = auxiliary.columns
scaler = StandardScaler()
auxiliary = scaler.fit_transform(auxiliary)
auxiliary = pd.DataFrame(auxiliary, columns = column)

auxiliary = auxiliary.drop(['Kecamatan'], axis = 1)
auxiliary = pd.concat([kecamatan, auxiliary], axis = 1)

In [4]:
dataset = indikator.merge(auxiliary, left_on = 'Kecamatan', right_on = 'Kecamatan', how = 'left')
dataset_1 = dataset.drop(['Unnamed: 0', 'Provinsi', 'Kecamatan', 'SE', 'VAR', 'CI LOWER', 'CI UPPER', 'RSE', 'DEFF', 'r101', 'r102', 'r103'], axis = 1)

In [5]:
y = dataset_1['Rata2 Kapita']
X = dataset_1.drop(['Rata2 Kapita'], axis = 1)

In [6]:
tr = DropConstantFeatures(tol=0.7)
Xt = tr.fit_transform(X)

In [7]:
tr = SmartCorrelatedSelection(
    method="pearson",
    threshold=0.8,
    selection_method="variance",
)
Xt = tr.fit_transform(Xt)

In [8]:
regressor = LinearRegression()
tr = RecursiveFeatureElimination(estimator = regressor, scoring='neg_root_mean_squared_error', cv = 5)
Xt = tr.fit_transform(Xt, y)
Xt.to_csv('../dataset/auxiliary/auxiliary_linear_transformed.csv', index = False)

# Variable Selection for Sumsel Linear Regression

In [9]:
indikator = pd.read_csv('../dataset/indikator/DE_INDIKATOR3.csv', sep = ';')
auxiliary = pd.read_csv('../dataset/auxiliary/auxiliary.csv')

In [10]:
kecamatan = auxiliary['Kecamatan']
auxiliary = auxiliary.dropna(axis = 1)

column = auxiliary.columns
scaler = StandardScaler()
auxiliary = scaler.fit_transform(auxiliary)
auxiliary = pd.DataFrame(auxiliary, columns = column)

auxiliary = auxiliary.drop(['Kecamatan'], axis = 1)
auxiliary = pd.concat([kecamatan, auxiliary], axis = 1)

In [11]:
dataset = indikator.merge(auxiliary, left_on = 'Kecamatan', right_on = 'Kecamatan', how = 'left')
dataset = dataset[dataset['Provinsi'] == 16] 
dataset_1 = dataset.drop(['Unnamed: 0', 'Provinsi', 'Kecamatan', 'SE', 'VAR', 'CI LOWER', 'CI UPPER', 'RSE', 'DEFF', 'r101', 'r102', 'r103'], axis = 1)

In [12]:
y = dataset_1['Rata2 Kapita']
X = dataset_1.drop(['Rata2 Kapita'], axis = 1)

In [13]:
tr = DropConstantFeatures(tol=0.7)
Xt = tr.fit_transform(X)

In [14]:
tr = SmartCorrelatedSelection(
    method="pearson",
    threshold=0.8,
    selection_method="variance",
)
Xt = tr.fit_transform(Xt)

In [15]:
regressor = LinearRegression()
tr = RecursiveFeatureElimination(estimator = regressor, scoring='neg_root_mean_squared_error', cv = 5)
Xt = tr.fit_transform(Xt, y)
Xt.to_csv('../dataset/auxiliary/auxiliary_sumsel_linear_transformed.csv', index = False)

# Variable Selection for Kalsel Linear Regression

In [23]:
indikator = pd.read_csv('../dataset/indikator/DE_INDIKATOR3.csv', sep = ';')
auxiliary = pd.read_csv('../dataset/auxiliary/auxiliary.csv')

In [24]:
kecamatan = auxiliary['Kecamatan']
auxiliary = auxiliary.dropna(axis = 1)

column = auxiliary.columns
scaler = StandardScaler()
auxiliary = scaler.fit_transform(auxiliary)
auxiliary = pd.DataFrame(auxiliary, columns = column)

auxiliary = auxiliary.drop(['Kecamatan'], axis = 1)
auxiliary = pd.concat([kecamatan, auxiliary], axis = 1)

In [25]:
dataset = indikator.merge(auxiliary, left_on = 'Kecamatan', right_on = 'Kecamatan', how = 'left')
dataset = dataset[dataset['Provinsi'] == 63] 
dataset_1 = dataset.drop(['Unnamed: 0', 'Provinsi', 'Kecamatan', 'SE', 'VAR', 'CI LOWER', 'CI UPPER', 'RSE', 'DEFF', 'r101', 'r102', 'r103'], axis = 1)

In [26]:
y = dataset_1['Rata2 Kapita']
X = dataset_1.drop(['Rata2 Kapita'], axis = 1)

In [27]:
tr = DropConstantFeatures(tol=0.7)
Xt = tr.fit_transform(X)

In [28]:
tr = SmartCorrelatedSelection(
    method="pearson",
    threshold=0.7,
    selection_method="variance",
)
Xt = tr.fit_transform(Xt)

In [29]:
regressor = LinearRegression()
tr = RecursiveFeatureElimination(estimator = regressor, scoring='neg_root_mean_squared_error', cv = 5)
Xt = tr.fit_transform(Xt, y)
Xt.to_csv('../dataset/auxiliary/auxiliary_kalsel_linear_transformed.csv', index = False)