In [22]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

In [2]:
url = r"C:\Users\bunyo\OneDrive\Desktop\AI_Course\FirstWeekProject\data\preprocessed_data\preprocessed_data.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,WIND,IND,RAIN,IND.1,T.MAX,IND.2,T.MIN,T.MIN.G,Year,Month,Day,T_AVG,IS_RAINIY,IS_FROST
0,13.67,0.0,0.002985,0.0,0.356877,0.0,0.515254,0.443709,0.0,0.0,0.0,0.439104,1.0,0.0
1,11.5,0.0,0.076119,0.0,0.271375,0.0,0.532203,0.513245,0.0,0.0,0.033333,0.320144,1.0,0.0
2,11.25,0.0,0.00597,0.0,0.208178,0.0,0.40678,0.460265,0.0,0.0,0.066667,0.392398,1.0,0.0
3,8.63,0.0,0.002985,0.0,0.211896,0.0,0.40339,0.370861,0.0,0.0,0.1,0.40065,1.0,0.0
4,11.92,0.0,0.155224,0.0,0.271375,0.25,0.338983,0.228477,0.0,0.0,0.133333,0.5445,1.0,0.0


In [17]:
x = df.drop('WIND',axis=1)
y = df['WIND']

x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=99,test_size=0.21)

# Manual

In [19]:
RF = RandomForestRegressor(max_depth=4,n_estimators=40)
RF.fit(x_train,y_train)

RF_pred = RF.predict(x_test)
accuracy = r2_score(y_test,RF_pred)
print(f": R2 SCORE: {accuracy}")

: R2 SCORE: 0.18400405612873605


# GRID SEARCH

In [21]:
#

parametr = {
    'n_estimators':[5, 100, 102, 150],
    'max_depth':[4, 7, 9, 12]
}

grid_search = GridSearchCV(RandomForestRegressor(), param_grid=parametr, cv=3)
grid_search.fit(x_train,y_train)

pred = grid_search.predict(x_test)
gr_accuracy = r2_score(y_test,pred)
print(f"Grid search r2 score: {gr_accuracy}")

Grid search r2 score: 0.2597131471459191


# Randomized Search

In [25]:
parametr = {
    'n_estimators':[5,15, 145, 102, 200],
    'max_depth':[4, 7, 9, 12,15]
}

Random_search = RandomizedSearchCV(RandomForestRegressor(), parametr,random_state=99, cv=3,n_iter=7)
Random_search.fit(x_train,y_train)


print(f"Random search best parametrlari: {Random_search.best_params_}")

RF_r = RandomForestRegressor(**Random_search.best_params_)
RF_r.fit(x_train,y_train)

pred2 = RF_r.predict(x_test)
random_r2 = r2_score(y_test,pred2)

print(f"Eng yaxshi parametrlar: {Random_search.best_params_}")
print(f"Random search r2 score: {random_r2}")

Random search best parametrlari: {'n_estimators': 145, 'max_depth': 12}
Eng yaxshi parametrlar: {'n_estimators': 145, 'max_depth': 12}
Random search r2 score: 0.2611389639354368


# Bayesian optimization

In [30]:
# 
from skopt import BayesSearchCV

param = {
    "n_estimators":(60,100),
    "max_depth":(5,10)
}

bayes = BayesSearchCV(RandomForestRegressor(), param, cv=4,random_state=88,n_iter=5)
bayes.fit(x_train,y_train)

print(f"best parametrs: {bayes.best_params_}")

RF_b = RandomForestRegressor(**bayes.best_params_)
RF_b.fit(x_train,y_train)

pred3 = RF_b.predict(x_test)
bayes_r2 = r2_score(y_test,pred3)

print(f"Eng yaxshi parametrlar: {bayes.best_params_}")
print(f"Random search r2 score: {bayes_r2}")

best parametrs: OrderedDict({'max_depth': 9, 'n_estimators': 69})
Eng yaxshi parametrlar: OrderedDict({'max_depth': 9, 'n_estimators': 69})
Random search r2 score: 0.2501763192335005


# optuna

In [33]:
import optuna
from sklearn.model_selection import cross_val_score

def objective(trial):
  
    n_estimators = trial.suggest_int('n_estimators', 10, 200)
    max_depth = trial.suggest_int('max_depth', 2, 32, log=True)
    
    
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)
    
   
    model = RandomForestRegressor(
        n_estimators=n_estimators, 
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42,
        n_jobs=-1 
    )
    
    
    score = cross_val_score(model, x, y, cv=6).mean()
    
    return score


study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

print(f"Eng yaxshi parametrlar: {study.best_params}")
print(f"Eng yaxshi natija (R^2): {study.best_value}")

[32m[I 2026-02-18 23:16:37,681][0m A new study created in memory with name: no-name-c26310e9-370f-44d1-b337-f301bc4e3f40[0m
[32m[I 2026-02-18 23:16:38,512][0m Trial 0 finished with value: 0.14838322246694804 and parameters: {'n_estimators': 16, 'max_depth': 27, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.14838322246694804.[0m
[32m[I 2026-02-18 23:16:41,758][0m Trial 1 finished with value: 0.1948820628924014 and parameters: {'n_estimators': 164, 'max_depth': 16, 'min_samples_split': 16, 'min_samples_leaf': 10}. Best is trial 1 with value: 0.1948820628924014.[0m
[32m[I 2026-02-18 23:16:43,876][0m Trial 2 finished with value: 0.1930156298784513 and parameters: {'n_estimators': 99, 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.1948820628924014.[0m
[32m[I 2026-02-18 23:16:45,268][0m Trial 3 finished with value: 0.19329542016717713 and parameters: {'n_estimators': 68, 'max_depth': 9, 'min_samples

Eng yaxshi parametrlar: {'n_estimators': 123, 'max_depth': 8, 'min_samples_split': 14, 'min_samples_leaf': 8}
Eng yaxshi natija (R^2): 0.19526473287185753
