In [1]:
import sys
from pathlib import Path

project_root = Path().absolute().parent
sys.path.insert(0, str(project_root))

import os
os.chdir(project_root)


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.utils.config import load_config
from src.data.loader import load_and_filter_dataset

In [6]:
from datasets import load_dataset
from pathlib import Path
import pandas as pd
import zipfile
import io

print("Сваляне на dataset от Hugging Face...")
dataset_name = "pmoe7/SP_500_Stocks_Data-ratios_news_price_10_yrs"

try:
    print("Опитвам се да заредя само price данните...")
    dataset = load_dataset(
        dataset_name,
        data_files="sp500_daily_ratios_20yrs.zip",
        download_mode="force_redownload"
    )
    print(f"Dataset зареден успешно!")
    print(f"Splits: {list(dataset.keys())}")
    
    split_name = list(dataset.keys())[0]
    df = dataset[split_name].to_pandas()
    
    print(f"Конвертирано в pandas. Размер: {df.shape}")
    print(f"Колони: {list(df.columns)[:15]}...")
    
    output_path = Path("data/raw/sp500_stocks_data.parquet")
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    df.to_parquet(output_path, index=False)
    print(f"\nDataset запазен локално в: {output_path}")
    print(f"Размер на файла: {output_path.stat().st_size / 1024 / 1024:.2f} MB")
    
except Exception as e:
    print(f"Error сваляне с data_files: {e}")
    print("\nОпитвам се алтернативен метод - директно от zip файла...")
    try:
        from huggingface_hub import hf_hub_download
        import zipfile
        
        zip_path = hf_hub_download(
            repo_id=dataset_name,
            filename="sp500_daily_ratios_20yrs.zip",
            repo_type="dataset"
        )
        
        print(f"Zip файл свалени в: {zip_path}")
        
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            csv_files = [f for f in zip_ref.namelist() if f.endswith('.csv')]
            print(f"Намерени CSV файлове: {csv_files[:5]}...")
            
            if csv_files:
                first_csv = csv_files[0]
                with zip_ref.open(first_csv) as f:
                    df = pd.read_csv(f)
                    print(f"Зареден CSV файл: {first_csv}")
                    print(f"Размер: {df.shape}")
                    print(f"Колони: {list(df.columns)[:15]}...")
                    
                    output_path = Path("data/raw/sp500_stocks_data.parquet")
                    output_path.parent.mkdir(parents=True, exist_ok=True)
                    df.to_parquet(output_path, index=False)
                    print(f"\nDataset запазен локално в: {output_path}")
    except Exception as e2:
        print(f"Error алтернативен метод: {e2}")
        print("\nМоля, провери интернет връзката и опитай отново.")


In [7]:
import torch
from torch.utils.data import DataLoader
from pathlib import Path
import matplotlib.pyplot as plt
from IPython.display import clear_output
import time

from src.data.pipeline import get_datasets
from src.models.transformer_model import StockTransformer
from src.training.trainer import Trainer
from src.utils.config import load_config
from src.evaluation.visualizations import plot_training_curves

# Зареждане на конфигурация
config = load_config()


print(f" Dataset: {config.data.dataset_name}")
print(f" Features: simplified={config.data.features.simplified}")
print(f" Model: {config.model.d_model}d, {config.model.n_heads} heads, {config.model.n_layers} layers")
print(f"  Training: {config.training.num_epochs} epochs, batch_size={config.training.batch_size}")

print()

# Зареждане на данни
print(" Зареждане на данни...")
train_dataset, val_dataset, test_dataset, feature_columns = get_datasets(config)

print(f"\n Данни заредени успешно!")
print(f"   • Train samples: {len(train_dataset):,}")
print(f"   • Val samples: {len(val_dataset):,}")
print(f"   • Test samples: {len(test_dataset):,}")
print(f"   • Features: {len(feature_columns)}")
print()

# Създаване на DataLoaders
train_loader = DataLoader(
    train_dataset,
    batch_size=config.training.batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=False,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config.training.batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=False,
)

# Създаване на модел
print(" Създаване на модел...")
model = StockTransformer(
    input_dim=len(feature_columns),
    d_model=config.model.d_model,
    n_heads=config.model.n_heads,
    n_layers=config.model.n_layers,
    d_ff=config.model.d_ff,
    dropout=config.model.dropout,
    activation=config.model.activation,
    prediction_horizon=config.data.prediction_horizon,
)

total_params = sum(p.numel() for p in model.parameters())
print(f"   • Total parameters: {total_params:,}")
print(f"   • Model size: {total_params * 4 / 1024 / 1024:.2f} MB")
print()

# Създаване на trainer
print("  Създаване на trainer...")
trainer = Trainer(
    model=model,
    config=config,
    train_loader=train_loader,
    val_loader=val_loader,
)
print(" Trainer създаден!")
print()

In [None]:
# Стартиране на обучението
print(" Training...")
print("=" * 60)
print()

start_time = time.time()

try:
    history = trainer.train()
    
    elapsed_time = time.time() - start_time
    hours = int(elapsed_time // 3600)
    minutes = int((elapsed_time % 3600) // 60)
    seconds = int(elapsed_time % 60)
    
    print()
    print("=" * 60)
    print(" ОБУЧЕНИЕТО ЗАВЪРШИ УСПЕШНО!")
    print("=" * 60)
    print(f"  Общо време: {hours}h {minutes}m {seconds}s")
    print(f" Best validation loss: {history['best_val_loss']:.6f}")
    print(f" Моделът е записан в: {config.paths.models_dir}/best_model.pt")
    print("=" * 60)
    
except KeyboardInterrupt:
    elapsed_time = time.time() - start_time
    print(f"\n  Обучението е прекъснато от потребителя след {elapsed_time/60:.1f} минути.")
    print(f" Последният checkpoint е запазен в: {config.paths.models_dir}/best_model.pt")
except Exception as e:
    print(f"\n Error: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# Визуализация на резултатите
if 'history' in locals() and len(history['train_losses']) > 0:
    print("\n Генериране на графики...")
    
    results_dir = Path(config.paths.results_dir)
    results_dir.mkdir(parents=True, exist_ok=True)
    
    # Генериране на графики
    plot_training_curves(
        history["train_losses"],
        history["val_losses"],
        save_path=results_dir / "training_curves.png",
    )
    
    # Показване на графики в notebook
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history['train_losses'], label='Train Loss', linewidth=2)
    plt.plot(history['val_losses'], label='Val Loss', linewidth=2)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Progress')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    epochs = range(1, len(history['train_losses']) + 1)
    plt.plot(epochs, history['train_losses'], 'o-', label='Train Loss', markersize=4)
    plt.plot(epochs, history['val_losses'], 's-', label='Val Loss', markersize=4)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss per Epoch')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Показване на статистики
    print("\n Статистики:")
    print(f"   • Общо epochs: {len(history['train_losses'])}")
    print(f"   • Начален train loss: {history['train_losses'][0]:.6f}")
    print(f"   • Финален train loss: {history['train_losses'][-1]:.6f}")
    print(f"   • Начален val loss: {history['val_losses'][0]:.6f}")
    print(f"   • Финален val loss: {history['val_losses'][-1]:.6f}")
    print(f"   • Подобрение: {((history['val_losses'][0] - history['val_losses'][-1]) / history['val_losses'][0] * 100):.2f}%")
    
else:
    print("  Няма данни за визуализация. Обучението не е завършило успешно.")

In [4]:
config = load_config()
print(f"Dataset: {config.data.dataset_name}")
print(f"Tickers: {config.data.tickers}")
print(f"Start date: {config.data.start_date}")
print(f"End date: {config.data.end_date}")

In [5]:
print("Зареждане на данни...")
df = load_and_filter_dataset(config=config)
print(f"Заредени {len(df)} реда")

In [None]:
print(f"Размери: {df.shape}")
print(f"\nКолони:")
print(df.columns.tolist())

In [None]:
unique_tickers = df['symbol'].unique() if 'symbol' in df.columns else None
if unique_tickers is not None:
    print(f"Уникални тикери ({len(unique_tickers)}):")
    print(sorted(unique_tickers))
else:
    print("Не е намерена колона 'symbol'")

In [None]:
date_col = 'date' if 'date' in df.columns else None
if date_col:
    df[date_col] = pd.to_datetime(df[date_col])
    print(f"Период:")
    print(f"  От: {df[date_col].min()}")
    print(f"  До: {df[date_col].max()}")
    print(f"  Дни: {(df[date_col].max() - df[date_col].min()).days}")

In [None]:
print("Първи 10 реда:")
df.head(10)

In [None]:
print("Базова статистика:")
df.describe()

In [None]:
missing = df.isnull().sum()
if missing.sum() > 0:
    print("Липсващи стойности:")
    print(missing[missing > 0])
else:
    print("Няма липсващи стойности")

In [3]:
import matplotlib.pyplot as plt
import numpy as np
def plot_adc_conversion():
# Генериране на аналогов сигнал
t = np.linspace(0, 1, 1000)
analog_signal = 2.5 + 1.5 * np.sin(2 * np.pi * 5 * t)
# ADC параметри
resolution = 8
v_ref = 5.0
sampling_rate = 50
# Дискретизация и квантуване
sample_times = np.arange(0, 1, 1/sampling_rate)
samples = 2.5 + 1.5 * np.sin(2 * np.pi * 5 * sample_times)
digital_samples = np.round((samples / v_ref) * (2**resolution - 1))
quantized_voltage = (digital_samples / (2**resolution - 1)) * v_ref
# Визуализация
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
ax1.plot(t, analog_signal, &#39;b-&#39;, label=&#39;Аналогов сигнал&#39;, alpha=0.7)
ax1.stem(sample_times, quantized_voltage, &#39;r-&#39;, markerfmt=&#39;ro&#39;,
label=&#39;Дискретизирани проби&#39;)
ax1.legend()
ax1.grid(True)
quantization_error = samples - quantized_voltage
ax2.stem(sample_times, quantization_error, &#39;g-&#39;, markerfmt=&#39;go&#39;)
ax2.set_title(&#39;Квантова грешка&#39;)
ax2.grid(True)
plt.tight_layout()
plt.show()
plot_adc_conversion()

In [6]:
import ipywidgets as widgets
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import numpy as np
def calculate_adc_parameters(resolution, v_ref, analog_input):
    steps = 2 ** resolution
    lsb = v_ref / steps
    digital_value = int((analog_input / v_ref) * steps)
    quantization_error = lsb / 2
    print(f&quot;=== Резултати за {resolution}-битов АЦП ===&quot;)
    print(f&quot;Брой стъпки: {steps}&quot;)
    print(f&quot;Разделителна способност (LSB): {lsb:} V&quot;)
    print(f&quot;Цифрова стойност за {analog_input}V: {digital_value}&quot;)
    print(f&quot;Максимална квантова грешка: ±{quantization_error:} V&quot;)
     return steps, lsb, digital_value, quantization_error
# Интерактивни контроли
resolution_slider = widgets.IntSlider(value=10, min=8, max=16,
description=&#39;Битове:&#39;)
vref_slider = widgets.FloatSlider(value=3.3, min=1.8, max=5.0,
step=0.1, description=&#39;Vref:&#39;)
input_slider = widgets.FloatSlider(value=1.65, min=0, max=3.3,
step=0.01, description=&#39;Vвход:&#39;)
def on_parameter_change(change):
     calculate_adc_parameters(
        resolution_slider.value,
        vref_slider.value,
        input_slider.value
    )
resolution_slider.observe(on_parameter_change, names=&#39;value&#39;)
vref_slider.observe(on_parameter_change, names=&#39;value&#39;)
input_slider.observe(on_parameter_change, names=&#39;value&#39;)
display(resolution_slider, vref_slider, input_slider)
calculate_adc_parameters(10, 3.3, 1.65)