In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

In [37]:
df = pd.read_csv('IRIS.csv')
X = df.drop('SpecieS', axis=1)
y = df['Species']

In [38]:
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [44]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.3, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.4, random_state=123)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.3, random_state=456)
X_train4, X_test4, y_train4, y_test4 = train_test_split(X, y, test_size=0.3, random_state=789)

In [45]:
rf_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestClassifier())
])

In [46]:
rf_param_grid = {
    'rf__n_estimators': [50, 100, 200],
    'rf__max_depth': [2, 3, 4],
    'rf__min_samples_split': [2, 3, 4]
}

In [47]:
from sklearn.model_selection import StratifiedKFold

cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

In [48]:
rf_grid.fit(X_train1, y_train1)
y_pred1 = rf_grid.predict(X_test1)
print(f"Accuracy for split 1: {accuracy_score(y_test1, y_pred1)}")

rf_grid.fit(X_train2, y_train2)
y_pred2 = rf_grid.predict(X_test2)
print(f"Accuracy for split 2: {accuracy_score(y_test2, y_pred2)}")

rf_grid.fit(X_train3, y_train3)
y_pred3 = rf_grid.predict(X_test3)
print(f"Accuracy for split 3: {accuracy_score(y_test3, y_pred3)}")

rf_grid.fit(X_train4, y_train4)
y_pred4 = rf_grid.predict(X_test4)
print(f"Accuracy for split 4: {accuracy_score(y_test4, y_pred4)}")


Accuracy for split 1: 1.0
Accuracy for split 2: 1.0
Accuracy for split 3: 1.0
Accuracy for split 4: 1.0


In [50]:
gb_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('gb', GradientBoostingClassifier())
])

gb_param_grid = {
    'gb__n_estimators': [50, 100, 200],
    'gb__learning_rate': [0.1, 0.01, 0.001],
    'gb__max_depth': [2, 3, 4]
}

gb_grid = GridSearchCV(gb_pipeline, param_grid=gb_param_grid, cv=5, n_jobs=-1)

gb_grid.fit(X_train1, y_train1)
y_pred1 = gb_grid.predict(X_test1)
print(f"Accuracy for split 1: {accuracy_score(y_test1, y_pred1)}")

gb_grid.fit(X_train2, y_train2)
y_pred2 = gb_grid.predict(X_test2)
print(f"Accuracy for split 2: {accuracy_score(y_test2, y_pred2)}")

gb_grid.fit(X_train3, y_train3)
y_pred3 = gb_grid.predict(X_test3)
print(f"Accuracy for split 3: {accuracy_score(y_test3, y_pred3)}")

gb_grid.fit(X_train4, y_train4)
y_pred4 = gb_grid.predict(X_test4)
print(f"Accuracy for split 4: {accuracy_score(y_test4, y_pred4)}")


Accuracy for split 1: 1.0
Accuracy for split 2: 1.0
Accuracy for split 3: 0.9777777777777777
Accuracy for split 4: 1.0
