In [None]:
# –ò–º–ø–æ—Ä—Ç—ã
import sys
sys.path.append('src')

import polars as pl
import numpy as np

# –ò–º–ø–æ—Ä—Ç–∏—Ä—É–µ–º –Ω–∞—à–∏ –º–æ–¥—É–ª–∏
from src.data_preparation import prepare_data_pipeline
from src.classical_models import train_classical_models
from src.neural_models import train_neural_models
from src.cross_validation import run_cross_validation
from src.model_comparison import (
    create_performance_comparison,
    create_combined_comparison_dashboard,
    create_model_ranking
)

# –ù–∞—Å—Ç—Ä–æ–π–∫–∞ –ª–æ–≥–∏—Ä–æ–≤–∞–Ω–∏—è
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

logger.info("–ú–æ–¥—É–ª–∏ —É—Å–ø–µ—à–Ω–æ –∏–º–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω—ã!")


In [None]:
# –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –¥–∞–Ω–Ω—ã—Ö
data_path = "data/descriptors/cox2_best_combined.parquet"
target_column = "pIC50"

# –ò—Å–ø–æ–ª—å–∑—É–µ–º –ø–∞–π–ø–ª–∞–π–Ω –ø–æ–¥–≥–æ—Ç–æ–≤–∫–∏ –¥–∞–Ω–Ω—ã—Ö
data_prepared = prepare_data_pipeline(
    file_path=data_path,
    target_column=target_column,
    test_size=0.2,
    variance_threshold=0.01,
    normalization="standard",
    random_state=42
)

X_train = data_prepared['X_train']
X_test = data_prepared['X_test']
y_train = data_prepared['y_train']
y_test = data_prepared['y_test']

logger.info(f"–î–∞–Ω–Ω—ã–µ –ø–æ–¥–≥–æ—Ç–æ–≤–ª–µ–Ω—ã:")
logger.info(f"–û–±—É—á–∞—é—â–∞—è –≤—ã–±–æ—Ä–∫–∞: {X_train.shape}")
logger.info(f"–¢–µ—Å—Ç–æ–≤–∞—è –≤—ã–±–æ—Ä–∫–∞: {X_test.shape}")
logger.info(f"–ò—Å—Ö–æ–¥–Ω–∞—è —Ñ–æ—Ä–º–∞: {data_prepared['original_shape']}")
logger.info(f"–§–∏–Ω–∞–ª—å–Ω–∞—è —Ñ–æ—Ä–º–∞: {data_prepared['final_shape']}")


In [None]:
# –û–±—É—á–µ–Ω–∏–µ –≤—Å–µ—Ö –º–æ–¥–µ–ª–µ–π –∏ —Å–æ–∑–¥–∞–Ω–∏–µ –æ—Ç—á–µ—Ç–∞
logger.info("=== –ó–ê–ü–£–°–ö –ü–û–õ–ù–û–ì–û –ü–ê–ô–ü–õ–ê–ô–ù–ê –û–ë–£–ß–ï–ù–ò–Ø –ú–û–î–ï–õ–ï–ô ===\n")

# 1. –ö–ª–∞—Å—Å–∏—á–µ—Å–∫–∏–µ –º–æ–¥–µ–ª–∏
logger.info("1. –û–±—É—á–µ–Ω–∏–µ –∫–ª–∞—Å—Å–∏—á–µ—Å–∫–∏—Ö –º–æ–¥–µ–ª–µ–π...")
classical_results = train_classical_models(
    X_train, y_train, X_test, y_test, random_state=42
)

# 2. –ù–µ–π—Ä–æ—Å–µ—Ç–µ–≤—ã–µ –º–æ–¥–µ–ª–∏ (—Å –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω—ã–º —á–∏—Å–ª–æ–º —ç–ø–æ—Ö –¥–ª—è –¥–µ–º–æ)
logger.info("\n2. –û–±—É—á–µ–Ω–∏–µ –Ω–µ–π—Ä–æ—Å–µ—Ç–µ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π...")
neural_results = train_neural_models(
    X_train, y_train, X_test, y_test, epochs=20, random_state=42
)

# 3. –û–±—ä–µ–¥–∏–Ω—è–µ–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã
all_results = {**classical_results, **neural_results}

logger.info("\n=== –†–ï–ó–£–õ–¨–¢–ê–¢–´ –ù–ê –¢–ï–°–¢–û–í–û–ô –í–´–ë–û–†–ö–ï ===")
for model_name, metrics in all_results.items():
    logger.info(f"{model_name:15}: MAE={metrics['mae']:.4f}, RMSE={metrics['rmse']:.4f}, R¬≤={metrics['r2']:.4f}, –í—Ä–µ–º—è={metrics['training_time']:.2f}s")


In [None]:
# –ê–Ω–∞–ª–∏–∑ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –∏ —Å–æ–∑–¥–∞–Ω–∏–µ –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–π
logger.info("\n=== –ê–ù–ê–õ–ò–ó –ò –í–ò–ó–£–ê–õ–ò–ó–ê–¶–ò–Ø –†–ï–ó–£–õ–¨–¢–ê–¢–û–í ===")

# –°–æ–∑–¥–∞–Ω–∏–µ —Ä–µ–π—Ç–∏–Ω–≥–∞ –º–æ–¥–µ–ª–µ–π
ranking = create_model_ranking(
    all_results, {},  # –ü—É—Å—Ç–æ–π —Å–ª–æ–≤–∞—Ä—å CV –¥–ª—è —É–ø—Ä–æ—â–µ–Ω–∏—è
    weights={"mae": 0.4, "rmse": 0.3, "r2": 0.2, "stability": 0.1}
)

logger.info("\n–†–ï–ô–¢–ò–ù–ì –ú–û–î–ï–õ–ï–ô:")
logger.info(str(ranking.select(["model", "score", "mae", "rmse", "r2"])))

# –°–æ–∑–¥–∞–Ω–∏–µ –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏ —Å—Ä–∞–≤–Ω–µ–Ω–∏—è
try:
    fig = create_performance_comparison(
        all_results, 
        metrics=["mae", "rmse", "r2"],
        title="–°—Ä–∞–≤–Ω–µ–Ω–∏–µ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–µ–π"
    )
    logger.info("\n–ì—Ä–∞—Ñ–∏–∫ —Å—Ä–∞–≤–Ω–µ–Ω–∏—è —Å–æ–∑–¥–∞–Ω —É—Å–ø–µ—à–Ω–æ!")
    # fig.show()  # –†–∞—Å–∫–æ–º–º–µ–Ω—Ç–∏—Ä–æ–≤–∞—Ç—å –¥–ª—è –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
except Exception as e:
    logger.warning(f"\n–û—à–∏–±–∫–∞ —Å–æ–∑–¥–∞–Ω–∏—è –≥—Ä–∞—Ñ–∏–∫–∞ (–≤–æ–∑–º–æ–∂–Ω–æ, plotly –Ω–µ —É—Å—Ç–∞–Ω–æ–≤–ª–µ–Ω): {e}")

logger.info("\n=== –í–´–í–û–î–´ ===")
best_model = ranking.row(0)[0]
best_metrics = all_results[best_model]
logger.info(f"–õ—É—á—à–∞—è –º–æ–¥–µ–ª—å: {best_model}")
logger.info(f"   MAE: {best_metrics['mae']:.4f}")
logger.info(f"   RMSE: {best_metrics['rmse']:.4f}")
logger.info(f"   R¬≤: {best_metrics['r2']:.4f}")
logger.info(f"   –í—Ä–µ–º—è –æ–±—É—á–µ–Ω–∏—è: {best_metrics['training_time']:.2f}s")

logger.info(f"\n–í—Å–µ –º–æ–¥—É–ª–∏ –∏–∑ src/ —Ä–∞–±–æ—Ç–∞—é—Ç –∫–æ—Ä—Ä–µ–∫—Ç–Ω–æ!")
logger.info(f"–î–∞–Ω–Ω—ã–µ —É—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω—ã –∏ –º–æ–¥–µ–ª–∏ –æ–±—É—á–µ–Ω—ã!")
logger.info(f"Task 3 –≤—ã–ø–æ–ª–Ω–µ–Ω - —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–Ω–µ—Å–µ–Ω—ã –≤ –º–æ–¥—É–ª–∏ src/")


In [None]:
# –ó–∞–∫–ª—é—á–µ–Ω–∏–µ –∏ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Task 3

logger.info("\n" + "="*80)
logger.info("–ú–ò–ù–ò-–¢–ê–°–ö 3 –ó–ê–í–ï–†–®–ï–ù –£–°–ü–ï–®–ù–û!")
logger.info("="*80)
logger.info("\n–í—ã–ø–æ–ª–Ω–µ–Ω—ã –≤—Å–µ —ç—Ç–∞–ø—ã:")
logger.info("–ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –¥–∞–Ω–Ω—ã—Ö —Å –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏–µ–π")
logger.info("–û–±—É—á–µ–Ω–∏–µ 2 –∫–ª–∞—Å—Å–∏—á–µ—Å–∫–∏—Ö –º–æ–¥–µ–ª–µ–π (Linear Regression, ElasticNet)")
logger.info("–û–±—É—á–µ–Ω–∏–µ 2 –Ω–µ–π—Ä–æ—Å–µ—Ç–µ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (MLP, CNN)")
logger.info("–ö—Ä–æ—Å—Å-–≤–∞–ª–∏–¥–∞—Ü–∏—è –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —Å—Ç–∞–±–∏–ª—å–Ω–æ—Å—Ç–∏")
logger.info("–°—Ä–∞–≤–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ —Å –∏–Ω—Ç–µ—Ä–∞–∫—Ç–∏–≤–Ω—ã–º–∏ –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è–º–∏")
logger.info("–ê–Ω–∞–ª–∏–∑ –∏ –≤—ã–≤–æ–¥—ã")

logger.info(f"\n–ò—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–Ω—ã–µ —Ç–µ—Ö–Ω–æ–ª–æ–≥–∏–∏:")
logger.info(f"- Polars –¥–ª—è –æ–±—Ä–∞–±–æ—Ç–∫–∏ –¥–∞–Ω–Ω—ã—Ö")
logger.info(f"- polars-ds –¥–ª—è –º–∞—à–∏–Ω–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è")
logger.info(f"- PyTorch –¥–ª—è –Ω–µ–π—Ä–æ–Ω–Ω—ã—Ö —Å–µ—Ç–µ–π")
logger.info(f"- Plotly –¥–ª—è –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏")
logger.info(f"- Google Style docstrings")
logger.info(f"- Type hints")

logger.info(f"\n–í—Å–µ —Ñ—É–Ω–∫—Ü–∏–∏ —É—Å–ø–µ—à–Ω–æ –≤—ã–Ω–µ—Å–µ–Ω—ã –≤ –º–æ–¥—É–ª—å–Ω—É—é –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É –≤ –ø–∞–ø–∫—É src/")
logger.info(f"–ö–æ–¥ –æ—Ä–≥–∞–Ω–∏–∑–æ–≤–∞–Ω —Å–æ–≥–ª–∞—Å–Ω–æ –ø—Ä–∏–Ω—Ü–∏–ø–∞–º —á–∏—Å—Ç–æ–≥–æ –∫–æ–¥–∞ –∏ –ø–æ–≤—Ç–æ—Ä–Ω–æ–≥–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è")
logger.info("="*80)
