In [None]:
pip install joblib

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

INPUT_STEPS = 72  
OUTPUT_STEPS = 72  
BATCH_SIZE = 16

LR = 1e-4
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

df = pd.read_csv('/kaggle/input/u-novus/data/all.csv')
df['Период'] = pd.to_datetime(df['Период'], format='%Y-%m-%d %H:%M:%S')
df = df.sort_values('Период').dropna()
df['Пост мониторинга'] = LabelEncoder().fit_transform(df['Пост мониторинга'])
df = df.drop(columns=['NO2', 'O3', 'H2S', 'CO', 'SO2'])

features = ['Температура, °С', 'Давление, мм рт. ст.', 'Влажность, %', 'Скорость ветра, м/с', 'Направление ветра, °']

scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

In [None]:
test_gap = 72 
train_data = df.iloc[:-OUTPUT_STEPS - test_gap]
test_input_start = -OUTPUT_STEPS - test_gap + INPUT_STEPS
test_data = df.iloc[test_input_start:test_input_start + OUTPUT_STEPS]

class TimeSeriesDataset(Dataset):
    def __init__(self, data, input_steps, output_steps, features):
        self.data = data[features].values.astype(np.float32)
        self.input_steps = input_steps
        self.output_steps = output_steps

    def __len__(self):
        return len(self.data) - self.input_steps - self.output_steps + 1

    def __getitem__(self, idx):
        x = self.data[idx:idx+self.input_steps]
        y = self.data[idx+self.input_steps:idx+self.input_steps+self.output_steps]
        return torch.tensor(x), torch.tensor(y)

train_dataset = TimeSeriesDataset(train_data, INPUT_STEPS, OUTPUT_STEPS, features)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)


In [None]:
'''
class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim=64, num_heads=4, num_layers=2, dropout=0.1):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, model_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.decoder = nn.Sequential(
            nn.Linear(model_dim, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)
        )

    def forward(self, x):
        x = self.input_proj(x)
        x = self.encoder(x)
        out = self.decoder(x)
        return out

model = TransformerModel(len(features)).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.MSELoss()
EPOCHS = 20
for epoch in range(EPOCHS):
    model.train()
    epoch_losses = []
    preds_all = []
    targets_all = []

    for xb, yb in train_loader:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        pred_seq = []
        input_seq = xb.clone()
        for _ in range(OUTPUT_STEPS):
            output = model(input_seq)
            next_step = output[:, -1]
            pred_seq.append(next_step.unsqueeze(1))
            input_seq = torch.cat((input_seq[:, 1:], next_step.unsqueeze(1)), dim=1)
        pred = torch.cat(pred_seq, dim=1)

        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_losses.append(loss.item())
        preds_all.append(pred.detach().cpu().numpy())
        targets_all.append(yb.detach().cpu().numpy())

    preds_all = np.concatenate(preds_all, axis=0).reshape(-1, len(features))
    targets_all = np.concatenate(targets_all, axis=0).reshape(-1, len(features))
    r2_scores = [r2_score(targets_all[:, i], preds_all[:, i]) for i in range(len(features))]
    r2_str = ', '.join([f'R2_{features[i]}: {r2_scores[i]:.3f}' for i in range(len(features))])
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {np.mean(epoch_losses):.4f}, {r2_str}")'''

In [None]:
'''df_test = pd.read_csv('/kaggle/input/u-novus/data/test.csv')
df_test['Период'] = pd.to_datetime(df_test['Период'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
df_test = df_test.dropna(subset=['Период'])
df_test['Пост мониторинга'] = LabelEncoder().fit_transform(df_test['Пост мониторинга'])
df_test = df_test.drop(columns=['NO2', 'O3', 'H2S', 'CO', 'SO2'])

TEST_X = df_test[['Пост мониторинга', 'Период']]
TEST_Y = df_test[features].values

X_input = df[features].values[-INPUT_STEPS:].astype(np.float32)
X_input = torch.tensor(X_input).unsqueeze(0).to(DEVICE)

model.eval()
predictions = []
with torch.no_grad():
    for _ in range(OUTPUT_STEPS):
        out_seq = model(X_input)
        next_step = out_seq[:, -1, :]
        predictions.append(next_step.cpu().numpy())
        X_input = torch.cat((X_input[:, 1:], next_step.unsqueeze(1)), dim=1)

predicted = np.stack(predictions, axis=1).squeeze(0)
predicted = scaler.inverse_transform(predicted)
true = TEST_Y

plt.figure(figsize=(18, 12))
time_index = df_test['Период'].values
for i, feature in enumerate(features):
    plt.subplot(3, 2, i+1)
    plt.plot(time_index, true[:, i], label='Истинные')
    plt.plot(time_index, predicted[:, i], label='Предсказанные')
    plt.title(feature)
    plt.xlabel('Время')
    plt.ylabel(feature)
    plt.legend()

plt.tight_layout()
plt.show()'''

In [None]:
'''torch.save(model.state_dict(), "/kaggle/working/m1.pth")'''

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim=128, num_heads=4, num_layers=4, dropout=0.2):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, model_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.decoder = nn.Sequential(
            nn.Linear(model_dim, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim * OUTPUT_STEPS)
        )
        self.input_dim = input_dim
        self.output_steps = OUTPUT_STEPS

    def forward(self, x):
        x = self.input_proj(x)  
        x = self.encoder(x)     
        x = x[:, -1, :]         
        x = self.decoder(x)    
        x = x.view(-1, self.output_steps, self.input_dim)  
        return x

model = TransformerModel(len(features)).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.L1Loss()

EPOCHS = 20
for epoch in range(EPOCHS):
    model.train()
    epoch_losses = []
    preds_all = []
    targets_all = []

    for xb, yb in train_loader:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        pred = model(xb) 
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_losses.append(loss.item())
        preds_all.append(pred.detach().cpu().numpy())
        targets_all.append(yb.detach().cpu().numpy())

    preds_all = np.concatenate(preds_all, axis=0).reshape(-1, len(features))
    targets_all = np.concatenate(targets_all, axis=0).reshape(-1, len(features))
    r2_scores = [r2_score(targets_all[:, i], preds_all[:, i]) for i in range(len(features))]
    r2_str = ', '.join([f'R2_{features[i]}: {r2_scores[i]:.3f}' for i in range(len(features))])
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {np.mean(epoch_losses):.4f}, {r2_str}")

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

INPUT_STEPS = 72
OUTPUT_STEPS = 72
test_gap = 72 
VAL_TOTAL = INPUT_STEPS + OUTPUT_STEPS  # 144

val_start = - (VAL_TOTAL + test_gap)
val_end = val_start + VAL_TOTAL
val_data = df.iloc[val_start:val_end]

assert len(val_data) == VAL_TOTAL, f"val_data слишком короткий: {len(val_data)} < {VAL_TOTAL}"

val_seq = val_data[features].values.astype(np.float32)
X_val = torch.tensor(val_seq[:INPUT_STEPS]).unsqueeze(0).to(DEVICE)  
y_true = val_seq[INPUT_STEPS:INPUT_STEPS + OUTPUT_STEPS]  

model.eval()
with torch.no_grad():
    y_pred = model(X_val).squeeze(0).cpu().numpy() 

if 'scaler' in globals():
    y_pred = scaler.inverse_transform(y_pred)
    y_true = scaler.inverse_transform(y_true)

print("\n------- Метрики по каждому признаку (валидация) -------")
for i, feat in enumerate(features):
    mae = mean_absolute_error(y_true[:, i], y_pred[:, i])
    rmse = mean_squared_error(y_true[:, i], y_pred[:, i], squared=False)
    r2 = r2_score(y_true[:, i], y_pred[:, i])
    print(f"{feat:<30}: MAE = {mae:.3f}, RMSE = {rmse:.3f}, R² = {r2:.3f}")


mae_total = mean_absolute_error(y_true, y_pred)
rmse_total = mean_squared_error(y_true, y_pred, squared=False)
r2_total = r2_score(y_true, y_pred)
print("\n------- Общие метрики (валидация) -------")
print(f"MAE = {mae_total:.3f}, RMSE = {rmse_total:.3f}, R² = {r2_total:.3f}")


In [None]:
df_test = pd.read_csv('/kaggle/input/u-novus/data/test.csv')
df_test['Период'] = pd.to_datetime(df_test['Период'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
df_test = df_test.dropna(subset=['Период'])
df_test['Пост мониторинга'] = LabelEncoder().fit_transform(df_test['Пост мониторинга'])
df_test = df_test.drop(columns=['NO2', 'O3', 'H2S', 'CO', 'SO2'])

TEST_X = df_test[['Пост мониторинга', 'Период']]
TEST_Y = df_test[features].values

X_input = df[features].values[-INPUT_STEPS:].astype(np.float32)
X_input = torch.tensor(X_input).unsqueeze(0).to(DEVICE)


model.eval()
with torch.no_grad():
    y_pred_scaled = model(X_input).squeeze(0).cpu().numpy() 

predicted = scaler.inverse_transform(y_pred_scaled)  
true = TEST_Y

plt.figure(figsize=(18, 12))
time_index = df_test['Период'].values
for i, feature in enumerate(features):
    plt.subplot(3, 2, i+1)
    plt.plot(time_index, true[:, i], label='Истинные')
    plt.plot(time_index, predicted[:, i], label='Предсказанные')
    plt.title(feature)
    plt.xlabel('Время')
    plt.ylabel(feature)
    plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

r2_scores = []
mse_scores = []
rmse_scores = []

for i, feature in enumerate(features):
    y_true = true[:, i]
    y_pred = predicted[:, i]
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)

    r2_scores.append(r2)
    mse_scores.append(mse)
    rmse_scores.append(rmse)

    print(f"{feature}:")
    print(f"  R²:   {r2:.4f}")
    print(f"  MSE:  {mse:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    print()

print("Средние метрики по всем признакам:")
print(f"  R²:   {np.mean(r2_scores):.4f}")
print(f"  MSE:  {np.mean(mse_scores):.4f}")
print(f"  RMSE: {np.mean(rmse_scores):.4f}")

In [None]:
import joblib

joblib.dump(scaler, "/kaggle/working/scaler.pkl")
torch.save(model.state_dict(), "/kaggle/working/m2.pth")

In [None]:
import torch
import joblib
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim=128, num_heads=4, num_layers=4, dropout=0.2):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, model_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.decoder = nn.Sequential(
            nn.Linear(model_dim, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim * OUTPUT_STEPS)
        )
        self.input_dim = input_dim
        self.output_steps = OUTPUT_STEPS

    def forward(self, x):
        x = self.input_proj(x)  
        x = self.encoder(x)     
        x = x[:, -1, :]        
        x = self.decoder(x)     
        x = x.view(-1, self.output_steps, self.input_dim) 
        return x

In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
features = ['Температура, °С', 'Давление, мм рт. ст.', 'Влажность, %', 'Скорость ветра, м/с', 'Направление ветра, °']

scaler = joblib.load("/kaggle/working/scaler.pkl")

model = TransformerModel(input_dim=len(features))
model.load_state_dict(torch.load("/kaggle/working/m2.pth", map_location=DEVICE))
model.to(DEVICE)
model.eval()

In [None]:
new_data = pd.read_csv('/kaggle/input/test-transormer/test/real.csv')  # путь к новому файлу
new_data['Период'] = pd.to_datetime(new_data['Период'])
new_data = new_data.sort_values('Период').dropna()
new_data['Пост мониторинга'] = LabelEncoder().fit_transform(new_data['Пост мониторинга'])
new_data = new_data.drop(columns=['NO2', 'O3', 'H2S', 'CO', 'SO2'])

new_data[features] = scaler.transform(new_data[features])

INPUT_STEPS = 72
X_input = new_data[features].values[-INPUT_STEPS:].astype(np.float32)
X_input = torch.tensor(X_input).unsqueeze(0).to(DEVICE)  # [1, 72, 5]

In [None]:
OUTPUT_STEPS = 72

with torch.no_grad():
    y_pred_scaled = model(X_input).squeeze(0).cpu().numpy()  

predicted = scaler.inverse_transform(y_pred_scaled)

future_periods = pd.date_range(
    start=new_data['Период'].iloc[-1] + pd.Timedelta(minutes=20),
    periods=OUTPUT_STEPS,
    freq='20min'
)

df_pred = pd.DataFrame(predicted, columns=features)
df_pred.insert(0, 'Период', future_periods)

df_pred.to_csv("/kaggle/working/prediction.csv", index=False)

In [None]:
df_pred['Период'] = pd.to_datetime(df_pred['Период'])

mask = df_pred['Период'].dt.time.between(pd.to_datetime("08:00").time(), pd.to_datetime("10:00").time())
df_filtered = df_pred[mask]

df_filtered.to_csv("/kaggle/working/prediction_08_10.csv", index=False)