In [None]:
import pandas as pd

In [None]:
load_wine = pd.read_csv('ML\notebook\data\WineQT.csv')

In [None]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, RandomizedSearchCV, ShuffleSplit
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from scipy.stats import randint

# Step 1: Load the wine dataset
wine_data = load_wine()
X = wine_data.data
y = wine_data.target

# Step 2: Split the dataset into train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Hyperparameter tuning using random search CV
param_dist = {
    'criterion': ['gini', 'entropy'],
    'max_depth': randint(1, 20),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 20),
    'max_features': ['auto', 'sqrt', 'log2', None]
}

random_search = RandomizedSearchCV(DecisionTreeClassifier(), param_distributions=param_dist, n_iter=100, cv=5, random_state=42)
random_search.fit(X_train, y_train)

best_dt = random_search.best_estimator_

# Step 4: Evaluate the model
y_pred = best_dt.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy of Decision Tree Classifier:", accuracy)


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Step 1: Create 10 subsets of the training dataset
shuffle_split = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)

# Step 2: Train 1 decision tree on each subset using the best hyperparameters
forest = []
for train_index, _ in shuffle_split.split(X_train):
    X_train_subset, y_train_subset = X_train[train_index], y_train[train_index]
    dt = DecisionTreeClassifier(**random_search.best_params_)
    dt.fit(X_train_subset, y_train_subset)
    forest.append(dt)

# Step 3: Evaluate all the trees on the test dataset
forest_accuracy = []
for dt in forest:
    y_pred = dt.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    forest_accuracy.append(accuracy)

average_accuracy = sum(forest_accuracy) / len(forest_accuracy)
print("Average Accuracy of Random Forest:", average_accuracy)
