## 1 Užduotis

Panaudokite Grid Search, Random Search ir Bayes optimizavimą vienam iš savo anksčiau naudotų datasetų, jog surastumėte didžiausią tikslumą ir geriausius hiperparametrus

In [12]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.ensemble  import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.datasets import load_iris


In [22]:
# Nuskaityti duomenų rinkinį
df = pd.read_csv('diabetes.csv')

# Atskirti požymius ir tikslinį kintamąjį
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Padalinti duomenis į mokymo ir testavimo rinkinius
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Skalavimas duomenų
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistinė regresija
model = LogisticRegression(max_iter=10000)

# 1. Grid Search
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'saga']
}

grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Grid Search geriausi parametrai:", grid_search.best_params_)
print("Grid Search geriausias tikslumas:", grid_search.best_score_)

Grid Search geriausi parametrai: {'C': 10, 'solver': 'liblinear'}
Grid Search geriausias tikslumas: 0.7655071304811408


In [25]:
model = LogisticRegression(C=10, solver='liblinear')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.7532467532467533


In [27]:
# 2. Random Search
param_dist = {
    'C': np.logspace(-4, 4, 20),
    'solver': ['liblinear', 'saga']
}

random_search = RandomizedSearchCV(model, param_dist, n_iter=50, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

print("Random Search geriausi parametrai:", random_search.best_params_)
print("Random Search geriausias tikslumas:", random_search.best_score_)




Random Search geriausi parametrai: {'solver': 'liblinear', 'C': 4.281332398719396}
Random Search geriausias tikslumas: 0.7655071304811408


In [30]:
model = LogisticRegression(C=4, solver='liblinear')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.7532467532467533


In [18]:
# Įkelti Iris duomenų rinkinį
iris = load_iris()
X = iris.data
y = iris.target

# Padalinti duomenis į mokymo ir testavimo rinkinius
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. Grid Search
model = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print(f'Grid Search geriausi parametrai: {grid_search.best_params_}')
print(f'Grid Search geriausias tikslumas: {grid_search.best_score_}')

Grid Search geriausi parametrai: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 10}
Grid Search geriausias tikslumas: 0.95


In [19]:
model = RandomForestClassifier(max_depth=None, min_samples_split=2, n_estimators=10)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))

1.0


In [20]:
# 2. Random Search
param_dist = {
    'n_estimators': np.arange(10, 201, 10),
    'max_depth': [None] + list(np.arange(5, 21, 5)),
    'min_samples_split': np.arange(2, 11)
}

random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=50, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

print(f'Random Search geriausi parametrai: {random_search.best_params_}')
print(f'Random Search geriausias tikslumas: {random_search.best_score_}')


Random Search geriausi parametrai: {'n_estimators': 140, 'min_samples_split': 10, 'max_depth': 10}
Random Search geriausias tikslumas: 0.9583333333333334


In [21]:
model = RandomForestClassifier(max_depth=10, min_samples_split=10, n_estimators=140)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))

1.0
