In [17]:
input_size=288
sequence_length =1
num_layers=2
hidden_size=64

learning_rate = 0.001
num_epochs = 10

num_classes =1
batch_size = 64
best_loss = 1000

In [18]:
import torch
import torch.nn as nn
import numpy as np
import os
import pandas as pd
import dask.dataframe as dd
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F
from IPython.display import clear_output
import time

In [19]:
class MyDataset(Dataset):
    def __init__(self, file_path, chunk_size):
        self.file_path = file_path
        self.chunk_size = chunk_size
        
        self.columns = pd.read_csv(self.file_path, nrows=0).columns.str.strip().str.replace('\xa0', ' ').tolist()
        self.train_columns = [col for col in self.columns if col != 'Reward']  # 排除 'Reward' 欄位
        self.num_samples = sum(1 for _ in open(self.file_path, encoding='utf-8')) - 1  # 計算總樣本數
        
    def __getitem__(self, idx):
        chunk_start = idx // self.chunk_size * self.chunk_size
        df = pd.read_csv(self.file_path, skiprows=chunk_start + 1, nrows=self.chunk_size, header=None, encoding='utf-8')  # 跳過標題和之前的行

        df.columns = self.columns
        
        # 確保 Reward 欄位存在
        if 'Reward' not in df.columns:
            raise KeyError(f"Chunk starting at row {chunk_start + 1} does not contain 'Reward' column.")
        
            
        sample_idx = idx % self.chunk_size
        
        train_data = df[self.train_columns].iloc[sample_idx].values  # 提取除了 'Reward' 之外的欄位
        
        value_data = df['Reward'].iloc[sample_idx]  # 提取 'Reward' 欄位
        
        return torch.tensor(train_data, dtype=torch.float32), torch.tensor(value_data, dtype=torch.float32)
        
    def __len__(self):
        return self.num_samples

In [20]:
"""
讀取csv
"""
file_path = 'E:/專題/data/2021/PredictorData.csv'
dataset = MyDataset(file_path, chunk_size=50000)  # 每次只加載 50000 行
train_dataloader = DataLoader(dataset, batch_size=64, shuffle=False)

In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [22]:
class MyGRU(nn.Module):
    def __init__(self, input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, num_classes=num_classes, sequence_length=sequence_length):
        super(MyGRU, self).__init__()
        self.hidden_size  = hidden_size
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * sequence_length, hidden_size * sequence_length)
        self.fc2 = nn.Linear(hidden_size * sequence_length, num_classes)
    
    def forward(self, x):
        x = x.unsqueeze(1)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out,_ = self.gru(x, h0)
        out = out[:, -1, :]  # 取最後一個時間步的輸出
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [23]:
model = MyGRU().to(device)
loss_criterion  = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [None]:
current_loss = 0
start_time = time.time()
last_saved_time = start_time
for epoch in range(num_epochs):
    for train_data, value_data in train_dataloader:
        train_data = train_data.to(device)
        value_data = value_data.to(device)
        output = model(train_data)
        value_data = value_data.unsqueeze(1)
        loss = loss_criterion(output, value_data)
        current_loss = loss
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        
        clear_output()
        print(f"At epoch: {epoch}, loss: {current_loss}")
        if current_loss <= best_loss:
            best_loss = current_loss
            torch.save(model.state_dict(), 'E:/專題/predictor__reward_model/GRU_predictor_model_{best_accuracy}.pth')
            print(f'模型在 epoch {epoch} 之後被儲存，驗證精度: {best_loss}')
        
        current_time = time.time()
        if current_time - last_saved_time >= 28800:
            timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime(current_time))
            model_path = f'E:/專題/predictor__reward_model/GRU_predictor_model_{timestamp}.pth'
            torch.save(model, model_path)
            last_saved_time = current_time
            print(f'模型已基於時間間隔存儲，時間: {timestamp}')

At epoch: 0, loss: 72213.0078125
