In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score

# Load data
train_data = pd.read_csv(r'/content/train_AIC.csv')

# Data preprocessing
exclude_columns = ['Месяц3', 'Количество позиций']
X = train_data.drop(columns=exclude_columns + ['y'])  # Drop specified columns
y = train_data['y']

# Splitting into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(estimator=XGBClassifier(random_state=42), param_grid=param_grid, scoring='f1_macro', cv=3)
grid_search.fit(X_train_scaled, y_train)

# Get best model
best_model = grid_search.best_estimator_

# Predictions on the validation set
val_predictions = best_model.predict(X_val_scaled)
f1_macro = f1_score(y_val, val_predictions, average='macro')
print("F1-Score (Validation):", f1_macro)

F1-Score (Validation): 0.859880514339685
