In [7]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.datasets import load_wine

data = load_wine()
X, y = data.data, data.target

df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

filter_selector = SelectKBest(score_func=chi2, k=2)
X_train_filter = filter_selector.fit_transform(X_train, y_train)
X_test_filter = filter_selector.transform(X_test)

print("Fitur yang dipilih dengan Filter methods: ", filter_selector.get_support(indices=True))

Fitur yang dipilih dengan Filter methods:  [ 9 12]


In [8]:
model = LogisticRegression(solver='lbfgs', max_iter=5000)
rfe_selector = RFE(model, n_features_to_select=2)
X_train_rfe = rfe_selector.fit_transform(X_train, y_train)
X_test_rfe = rfe_selector.transform(X_test)

print("Fitur yang dipilih dengan wrapper methods ", rfe_selector.get_support(indices=True))

Fitur yang dipilih dengan wrapper methods  [0 6]


In [9]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

importance = rf_model.feature_importances_
indices = np.argsort(importance)[::-1]
threshold = 0.05

importance_features_indices = [i for i in range(len(importance)) if importance[i] >= threshold]
X_important = X_train[:, importance_features_indices]
X_test_important  = X_test[:, importance_features_indices]

for i in importance_features_indices:
    print(f"{data.feature_names[i]}: {importance[i]}")

print("\nDimensi data pelatihan dengan fitur penting ", X_important.shape)
print("Dimensi data pengujian  dengan fitur  penting ", X_test_important.shape)

alcohol: 0.11239773542143083
flavanoids: 0.20229341635663617
color_intensity: 0.17120218308649574
hue: 0.07089132259413941
od280/od315_of_diluted_wines: 0.11156431672604968
proline: 0.13904586955351153

Dimensi data pelatihan dengan fitur penting  (142, 6)
Dimensi data pengujian  dengan fitur  penting  (36, 6)


In [10]:
def evaluate_model(X_train, X_test, y_train, y_test, model):
    model.fit(X_train, y_train)
    accuracy = model.score(X_test, y_test)
    return accuracy

In [11]:
logistic_model_filter = LogisticRegression(max_iter=200)
accuracy_filter = evaluate_model(X_train_filter, X_test_filter, y_train, y_test, logistic_model_filter)

logistic_model_rfe = LogisticRegression(max_iter=200)
accuracy_rfe = evaluate_model(X_train_rfe, X_test_rfe, y_train, y_test, logistic_model_rfe)

accuracy_rf = evaluate_model(X_important, X_test_important, y_train, y_test, rf_model)

print(f"\nAccuracy Filter {accuracy_filter:.2f}")
print(f"\Accuracy RFE {accuracy_rfe:.2f}")
print(f"Accuracy RF {accuracy_rf:.2f}")


Accuracy Filter 0.89
\Accuracy RFE 0.94
Accuracy RF 1.00


In [12]:
data = [[10], [2], [30], [40], [50]]

min_max_scaler = MinMaxScaler()
scaled_min_max = min_max_scaler.fit_transform(data)
print('Min-Max Scaling:\n ', scaled_min_max)

standard_scaler = StandardScaler()
scaled_standard = standard_scaler.fit_transform(data)
print('\nStandarization:\n ', scaled_standard)

Min-Max Scaling:
  [[0.16666667]
 [0.        ]
 [0.58333333]
 [0.79166667]
 [1.        ]]

Standarization:
  [[-0.91133616]
 [-1.35589038]
 [ 0.2000494 ]
 [ 0.75574218]
 [ 1.31143496]]
