In [1]:
import pandas as pd
import time
import warnings

from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('seattle-weather.csv')
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015-12-27,8.6,4.4,1.7,2.9,rain
1457,2015-12-28,1.5,5.0,1.7,1.3,rain
1458,2015-12-29,0.0,7.2,0.6,2.6,fog
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun


In [3]:
from sklearn.preprocessing import LabelEncoder

le=LabelEncoder()
df['weather']=le.fit_transform(df['weather'])

In [4]:
x = df[['temp_min', 'temp_max', 'precipitation', 'wind']]
y=df['weather']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

start_time = time.time()

base_model = DecisionTreeRegressor(max_depth=8, random_state=1)

bagging_regressor = BaggingRegressor(base_model, n_estimators=10, random_state=1)

bagging_regressor.fit(X_train, y_train)

predictions = bagging_regressor.predict(X_test)

r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
print(f"R2 score для случайного леса: {r2}")
print("mean_squared_error score", mse)
end_time = time.time()

# Вычисляем время выполнения программы
execution_time = end_time - start_time

print(f"Время выполнения программы: {execution_time} секунд")

R2 score для случайного леса: 0.2582725390406979
mean_squared_error score 1.055969554432153
Время выполнения программы: 0.09188699722290039 секунд


In [5]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

start_time = time.time()

base_model = DecisionTreeRegressor(max_depth=8, random_state=1)

base_model.fit(X_train, y_train)

predictions = base_model.predict(X_test)

r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
print(f"R2 score: {r2}")
print("mean_squared_error score", mse)

R2 score: 0.16209622628805642
mean_squared_error score 1.192892162087779


In [6]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Время выполнения программы: {execution_time} секунд")

Время выполнения программы: 0.012945175170898438 секунд


In [7]:
from sklearn.ensemble import RandomForestClassifier
import catboost as cb
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV, train_test_split

In [8]:
A_train, A_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

In [9]:
start_time = time.time()

In [10]:
random_forest = RandomForestClassifier(max_depth=15, min_samples_split=10).fit(A_train, y_train)

In [11]:
y_preds_d = random_forest.predict(A_train)
print('F1 мера for train', f1_score(y_preds_d, y_train, average='macro'))

F1 мера for train 0.6561140714955148


In [12]:
y_preds = random_forest.predict(A_test)
print('F1 мера for test', f1_score(y_preds, y_test, average='macro'))

F1 мера for test 0.44006992477004203


In [13]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Время выполнения программы: {execution_time} секунд")

Время выполнения программы: 0.28180694580078125 секунд


In [14]:
start_time = time.time()

In [15]:
random_forest = RandomForestClassifier()

params_grid = {
    "max_depth": [12, 18],
    "min_samples_leaf": [3, 10],
    "min_samples_split": [6, 12]
}

grid_search_random_forest = GridSearchCV(estimator=random_forest, param_grid=params_grid, scoring='f1_macro', cv=4)

In [16]:
grid_search_random_forest.fit(A_train, y_train)

In [17]:
best_mdoel = grid_search_random_forest.best_estimator_

In [18]:
y_preds_d = best_mdoel.predict(A_train)
print('F1 мера for train', f1_score(y_preds_d, y_train, average='macro'))

F1 мера for train 0.6511534699480099


In [19]:
y_preds = best_mdoel.predict(A_test)
print('F1 мера for test', f1_score(y_preds, y_test, average='macro'))

F1 мера for test 0.41290422551999517


In [20]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Время выполнения программы: {execution_time} секунд")

Время выполнения программы: 4.920030355453491 секунд


In [21]:
start_time = time.time()

In [22]:
model_catboost_clf = cb.CatBoostClassifier(iterations=3000, task_type='CPU', devices='0')
model_catboost_clf.fit(A_train, y_train)

Learning rate set to 0.031521
0:	learn: 1.5337548	total: 168ms	remaining: 8m 24s
1:	learn: 1.4659992	total: 170ms	remaining: 4m 15s
2:	learn: 1.4063640	total: 172ms	remaining: 2m 51s
3:	learn: 1.3562355	total: 174ms	remaining: 2m 10s
4:	learn: 1.3050368	total: 176ms	remaining: 1m 45s
5:	learn: 1.2601797	total: 177ms	remaining: 1m 28s
6:	learn: 1.2154930	total: 179ms	remaining: 1m 16s
7:	learn: 1.1758558	total: 180ms	remaining: 1m 7s
8:	learn: 1.1417953	total: 182ms	remaining: 1m
9:	learn: 1.1087446	total: 184ms	remaining: 54.9s
10:	learn: 1.0775298	total: 186ms	remaining: 50.4s
11:	learn: 1.0486431	total: 187ms	remaining: 46.7s
12:	learn: 1.0209104	total: 189ms	remaining: 43.4s
13:	learn: 0.9947693	total: 191ms	remaining: 40.7s
14:	learn: 0.9720112	total: 192ms	remaining: 38.3s
15:	learn: 0.9497334	total: 194ms	remaining: 36.2s
16:	learn: 0.9286537	total: 196ms	remaining: 34.4s
17:	learn: 0.9085598	total: 198ms	remaining: 32.8s
18:	learn: 0.8893549	total: 199ms	remaining: 31.3s
19:	lea

<catboost.core.CatBoostClassifier at 0x243e98a94b0>

In [23]:
y_preds_t = model_catboost_clf.predict(A_train)
print('F1 мера for train', f1_score(y_preds_t, y_train, average='macro'))

F1 мера for train 0.9341012020973857


In [24]:
y_preds = model_catboost_clf.predict(A_test)
print('F1 мера for test', f1_score(y_preds, y_test, average='macro'))

F1 мера for test 0.4431280347559417


In [25]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Время выполнения программы: {execution_time} секунд")

Время выполнения программы: 7.083932638168335 секунд
