In [1]:
import warnings
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from core.tools.create_submission import create_submission

warnings.filterwarnings("ignore")

In [2]:
def load_and_prepare_data():
    df = pd.read_csv("frames_errors.csv", header=None)
    df.columns = [
        "block_id", "frame_idx", "E_mu_Z", "E_mu_phys_est", "E_mu_X", "E_nu1_X", "E_nu2_X", 
        "E_nu1_Z", "E_nu2_Z", "N_mu_X", "M_mu_XX", "M_mu_XZ", "M_mu_X", "N_mu_Z", "M_mu_ZZ", 
        "M_mu_Z", "N_nu1_X", "M_nu1_XX", "M_nu1_XZ", "M_nu1_X", "N_nu1_Z", "M_nu1_ZZ", 
        "M_nu1_Z", "N_nu2_X", "M_nu2_XX", "M_nu2_XZ", "M_nu2_X", "N_nu2_Z", "M_nu2_ZZ", 
        "M_nu2_Z", "nTot", "bayesImVoltage", "opticalPower", "polarizerVoltages[0]", 
        "polarizerVoltages[1]", "polarizerVoltages[2]", "polarizerVoltages[3]", "temp_1", 
        "biasVoltage_1", "temp_2", "biasVoltage_2", "synErr", "N_EC_rounds", "maintenance_flag", 
        "estimator_name", "f_EC", "E_mu_Z_est", "R", "s", "p"
    ]
    
    df_base = df.drop(["E_mu_phys_est", "f_EC"], axis=1)
    print(f"Количество пропусков: {df.isna().sum().sum()}")
    
    return df_base

def create_features(df):
    """Создание расширенных признаков"""
    df_featured = df.copy()
    
    # Основные QBER признаки
    df_featured['E_mu_avg'] = (df_featured['E_mu_Z'] + df_featured['E_mu_X']) / 2
    df_featured['E_mu_diff'] = df_featured['E_mu_Z'] - df_featured['E_mu_X']
    
    # Статистики по состояниям
    df_featured['total_sent'] = df_featured[['N_mu_X', 'N_mu_Z']].sum(axis=1)
    df_featured['mu_ratio'] = (df_featured['N_mu_X'] + df_featured['N_mu_Z']) / (df_featured['total_sent'] + 1e-8)
    
    # Физические параметры
    df_featured['temp_avg'] = (df_featured['temp_1'] + df_featured['temp_2']) / 2
    df_featured['bias_avg'] = (df_featured['biasVoltage_1'] + df_featured['biasVoltage_2']) / 2
    
    return df_featured

def prepare_time_series_data(df, target_column='E_mu_Z', sequence_length=160, horizon=8):
    """Подготовка данных для временных рядов"""
    
    # Переименование
    df = df.rename(columns={"block_id": "id", "frame_idx": "date", target_column: "value"})
    
    # Выбор признаков
    feature_columns = [
        'value', 'E_mu_X', 'E_mu_avg', 'E_mu_diff', 
        'total_sent', 'mu_ratio', 'temp_avg', 'bias_avg', 'opticalPower'
    ]
    existing_features = [col for col in feature_columns if col in df.columns]
    
    print(f"Используется {len(existing_features)} признаков: {existing_features}")
    
    # Создание датафрейма с выбранными признаками
    df_for_ts = df[['id', 'date'] + existing_features].dropna(subset=['value'], how='any')
    
    # Обработка временных рядов
    df_for_ts = df_for_ts.set_index(['id', 'date']).unstack().ffill().stack().reset_index()
    df_for_ts = df_for_ts.groupby('id').filter(lambda x: len(x) == 400)
    
    print(f"Оставшиеся сегменты: {df_for_ts['id'].nunique()}")
    
    return df_for_ts, existing_features

In [3]:
class TimeSeriesDataset(Dataset):
    """Кастомный Dataset для временных рядов"""
    
    def __init__(self, data, sequence_length=160, horizon=8, target_col='value', scale=True):
        self.data = data
        self.sequence_length = sequence_length
        self.horizon = horizon
        self.target_col = target_col
        self.scale = scale
        
        self.groups = []
        self.scalers = {}
        
        for device_id in data['id'].unique():
            device_data = data[data['id'] == device_id].sort_values('date')
            
            feature_cols = [col for col in device_data.columns if col not in ['id', 'date', target_col]]
            features = device_data[feature_cols].values
            target = device_data[target_col].values
            
            if scale:
                feature_scaler = StandardScaler()
                target_scaler = StandardScaler()
                
                features_scaled = feature_scaler.fit_transform(features)
                target_scaled = target_scaler.fit_transform(target.reshape(-1, 1)).flatten()
                
                self.scalers[device_id] = (feature_scaler, target_scaler)
            else:
                features_scaled = features
                target_scaled = target
                self.scalers[device_id] = (None, None)
            
            sequences = []
            targets = []
            
            for i in range(len(device_data) - sequence_length - horizon + 1):
                seq_features = features_scaled[i:i + sequence_length]
                seq_target = target_scaled[i + sequence_length:i + sequence_length + horizon]
                
                sequences.append(seq_features)
                targets.append(seq_target)
            
            if sequences:
                self.groups.append({
                    'device_id': device_id,
                    'sequences': np.array(sequences, dtype=np.float32),
                    'targets': np.array(targets, dtype=np.float32)
                })

        self.all_sequences = np.concatenate([group['sequences'] for group in self.groups])
        self.all_targets = np.concatenate([group['targets'] for group in self.groups])
        
        print(f"Всего последовательностей: {len(self.all_sequences)}")
        print(f"Форма sequences: {self.all_sequences.shape}")
        print(f"Форма targets: {self.all_targets.shape}")
    
    def __len__(self):
        return len(self.all_sequences)
    
    def __getitem__(self, idx):
        sequence = self.all_sequences[idx] 
        target = self.all_targets[idx]      
        
        sequence_tensor = torch.FloatTensor(sequence)
        target_tensor = torch.FloatTensor(target)
        
        return sequence_tensor, target_tensor


In [None]:
from core.tools.metrics import *

def train_model(model, train_loader, val_loader, device, epochs=10, lr=1e-3):
    """Обучение модели с метриками BER, SNR, SE и Decoding Complexity"""
    
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.96, nesterov=True)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.5)
    
    train_losses = []
    val_losses = []
    
    # Метрики
    train_ber_values = []
    val_ber_values = []
    train_snr_values = []
    val_snr_values = []
    train_se_values = []
    val_se_values = []
    
    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0
    
    sample_batch = next(iter(train_loader))
    sequence_length = sample_batch[0].shape[1]
    decoding_complexity = calculate_decoding_complexity(model, sequence_length)
    
    print(f"Decoding Complexity Metrics:")
    print(f"  Total Parameters: {decoding_complexity['total_parameters']:,}")
    print(f"  Computational Complexity: {decoding_complexity['computational_complexity']:,}")
    print(f"  Complexity Score: {decoding_complexity['complexity_score']:,}")
    
    for epoch in range(epochs):
        # Обучение
        model.train()
        train_loss = 0.0
        train_ber = 0.0
        train_snr = 0.0
        train_se = 0.0
        
        for sequences, targets in train_loader:
            sequences = sequences.to(device)
            targets = targets.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(sequences)
            predictions = outputs[:, :, 0]
            
            loss = criterion(predictions, targets)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
            with torch.no_grad():
                ber = calculate_ber(predictions, targets)
                snr = calculate_snr(predictions, targets)
                se = calculate_spectral_efficiency(predictions, targets)
                
                train_ber += ber
                train_snr += snr
                train_se += se
        
        model.eval()
        val_loss = 0.0
        val_ber = 0.0
        val_snr = 0.0
        val_se = 0.0
        
        with torch.no_grad():
            for sequences, targets in val_loader:
                sequences = sequences.to(device)
                targets = targets.to(device)
                
                outputs = model(sequences)
                predictions = outputs[:, :, 0]
                
                loss = criterion(predictions, targets)
                val_loss += loss.item()
                
                ber = calculate_ber(predictions, targets)
                snr = calculate_snr(predictions, targets)
                se = calculate_spectral_efficiency(predictions, targets)
                
                val_ber += ber
                val_snr += snr
                val_se += se
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        train_ber /= len(train_loader)
        val_ber /= len(val_loader)
        train_snr /= len(train_loader)
        val_snr /= len(val_loader)
        train_se /= len(train_loader)
        val_se /= len(val_loader)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_ber_values.append(train_ber)
        val_ber_values.append(val_ber)
        train_snr_values.append(train_snr)
        val_snr_values.append(val_snr)
        train_se_values.append(train_se)
        val_se_values.append(val_se)
        
        scheduler.step(val_loss)
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
        
        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'  Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}')
        print(f'  Train BER: {train_ber:.6f}, Val BER: {val_ber:.6f}')
        print(f'  Train SNR: {train_snr:.4f} dB, Val SNR: {val_snr:.4f} dB')
        print(f'  Train SE: {train_se:.6f}, Val SE: {val_se:.6f}')
        print(f'  LR: {optimizer.param_groups[0]["lr"]:.2e}')
        
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch+1}')
            break
    
    model.load_state_dict(torch.load('best_model.pth'))
    
    metrics = {
        'train_ber': train_ber_values,
        'val_ber': val_ber_values,
        'train_snr': train_snr_values,
        'val_snr': val_snr_values,
        'train_se': train_se_values,
        'val_se': val_se_values,
        'decoding_complexity': decoding_complexity
    }
    
    return model, train_losses, val_losses

def predict(model, test_loader, device):
    """Прогнозирование на тестовых данных"""
    model.eval()
    predictions = []
    
    with torch.no_grad():
        for sequences, targets in test_loader:
            sequences = sequences.to(device)
            
            outputs = model(sequences)
            preds = outputs[:, :, 0].cpu().numpy()
            
            predictions.extend(preds)
    
    return np.array(predictions)


In [5]:
from model import DLinear_NN

SEQUENCE_LENGTH = 160
HORIZON = 8
BATCH_SIZE = 1280
EPOCHS = 55
LEARNING_RATE = 1e-3

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

df = load_and_prepare_data()
df = create_features(df)
df_for_ts, feature_columns = prepare_time_series_data(df)

device_ids = df_for_ts['id'].unique()
train_devices = device_ids[:int(0.7 * len(device_ids))]
val_devices = device_ids[int(0.7 * len(device_ids)):int(0.85 * len(device_ids))]
test_devices = device_ids[int(0.85 * len(device_ids)):]

train_data = df_for_ts[df_for_ts['id'].isin(train_devices)]
val_data = df_for_ts[df_for_ts['id'].isin(val_devices)]
test_data = df_for_ts[df_for_ts['id'].isin(test_devices)]

train_dataset = TimeSeriesDataset(train_data, SEQUENCE_LENGTH, HORIZON)
val_dataset = TimeSeriesDataset(val_data, SEQUENCE_LENGTH, HORIZON)
test_dataset = TimeSeriesDataset(test_data, SEQUENCE_LENGTH, HORIZON)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Модель
num_features = len(feature_columns) - 1
model = DLinear_NN(
    pred_len=8,        
    seq_len=160,       
    num_features=num_features,
).to(device)

print(f"Количество параметров: {sum(p.numel() for p in model.parameters())}")

model, train_losses, val_losses = train_model(
    model, train_loader, val_loader, device, EPOCHS, LEARNING_RATE
)

print("3. Прогнозирование...")
test_predictions = predict(model, test_loader, device)

test_predictions_original = []
for i, device_id in enumerate(test_devices):
    if device_id in test_dataset.scalers:
        _, target_scaler = test_dataset.scalers[device_id]
        if target_scaler:
            device_preds = test_predictions[i * (400 - SEQUENCE_LENGTH - HORIZON + 1): 
                                            (i + 1) * (400 - SEQUENCE_LENGTH - HORIZON + 1)]
            last_preds = device_preds[:, -1].reshape(-1, 1)
            original_scale = target_scaler.inverse_transform(last_preds).flatten()
            test_predictions_original.extend(original_scale)

print("4. Создание submission...")
create_submission(test_predictions_original)

print("Готово!")

Количество пропусков: 579
Используется 9 признаков: ['value', 'E_mu_X', 'E_mu_avg', 'E_mu_diff', 'total_sent', 'mu_ratio', 'temp_avg', 'bias_avg', 'opticalPower']
Оставшиеся сегменты: 815
Всего последовательностей: 132810
Форма sequences: (132810, 160, 8)
Форма targets: (132810, 8)
Всего последовательностей: 28426
Форма sequences: (28426, 160, 8)
Форма targets: (28426, 8)
Всего последовательностей: 28659
Форма sequences: (28659, 160, 8)
Форма targets: (28659, 8)
Количество параметров: 22256
Decoding Complexity Metrics:
  Total Parameters: 22,256
  Computational Complexity: 3,560,960
  Complexity Score: 3,583,216
Epoch 1/55:
  Train Loss: 0.995537, Val Loss: 0.903030
  Train BER: 0.273973, Val BER: 0.266751
  Train SNR: 0.0116 dB, Val SNR: 0.3899 dB
  Train SE: 0.081288, Val SE: 0.081102
  LR: 1.00e-03
Epoch 2/55:
  Train Loss: 0.916952, Val Loss: 0.871749
  Train BER: 0.265671, Val BER: 0.265053
  Train SNR: 0.3517 dB, Val SNR: 0.5377 dB
  Train SE: 0.084599, Val SE: 0.084523
  LR: 1.0

ValueError: too many values to unpack (expected 3)