# **Load the Data and Import Libraries**

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

In [None]:
import numpy as np
import torch
from torch import nn, optim
from torch.utils import data
from torchsummary import summary
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import math
import cv2

import os
import gc
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
data_path = "/content/gdrive/MyDrive/Asteroid RL dataset/new_RL_preset/data_pole_axis_RL_preset_batch_0.npy"
data_RL_preset0 = np.load(data_path)

In [None]:
data_path1 = "/content/gdrive/MyDrive/Asteroid RL dataset/new_RL_preset/data_pole_axis_RL_preset_batch_1.npy"
data_RL_preset1 = np.load(data_path1)

data_RL_preset0[0, 0] = data_RL_preset0[0, 0] + data_RL_preset1[0, 0]
data_RL_preset0 = np.concatenate((data_RL_preset0, data_RL_preset1[1:, :]), axis=0)
del data_RL_preset1
gc.collect()

In [None]:
data_path2 = "/content/gdrive/MyDrive/Asteroid RL dataset/new_RL_preset/data_pole_axis_RL_preset_batch_2.npy"
data_RL_preset2 = np.load(data_path2)

In [None]:
print(data_RL_preset0[0, 0])
print(data_RL_preset0[0, 1])
print(data_RL_preset0[0, 2])

gc.collect()

In [None]:
class RewardMapModifier():
    def __init__(self, extends=(0, 1), blur_coef=(5, 3)):
        self.extends = extends
        self.blur_coef = blur_coef

    def extend_hori(self, reward_map, action_maps):
        left_reward = reward_map[..., :, -int(reward_map.shape[-2]*self.extends[1]/2):, :]
        right_reward = reward_map[..., :, :int(reward_map.shape[-2]*self.extends[1]/2), :]

        if action_maps is not None:
            left_actions = action_maps[..., :, -int(action_maps.shape[-2]*self.extends[1]/2):, :].copy()
            right_actions = action_maps[..., :, :int(action_maps.shape[-2]*self.extends[1]/2), :].copy()
            left_actions[..., :, :, 0] = left_actions[..., :, :, 0] - 1
            right_actions[..., :, :, 0] = right_actions[..., :, :, 0] + 1

        if self.extends[1] != 0:
            extended_reward = np.concatenate((left_reward, reward_map, right_reward), axis=-2)
            extended_actions = np.concatenate((left_actions, action_maps, right_actions), axis=-2) if action_maps is not None else action_maps
        else:
            extended_reward = reward_map
            extended_actions = action_maps

        return extended_reward, extended_actions

    def extend_vert(self, reward_map, action_maps):
        top_reward = np.roll(reward_map[..., :int(reward_map.shape[-3]*self.extends[0]/2), :, :], 20, axis=-2)
        bottom_reward = np.roll(reward_map[..., -int(reward_map.shape[-3]*self.extends[0]/2):, :, :], 20, axis=-2)
        top_reward = np.flip(top_reward, axis=-3)
        bottom_reward = np.flip(bottom_reward, axis=-3)

        if action_maps is not None:
            top_actions = np.flip(action_maps[..., :int(action_maps.shape[-3]*self.extends[0]/2), :, :].copy(), -3)
            bottom_actions = np.flip(action_maps[..., -int(action_maps.shape[-3]*self.extends[0]/2):, :, :].copy(), -3)
            top_actions[..., :, :, 1] = 2*0 - top_actions[..., :, :, 1]
            bottom_actions[..., :, :, 1] = 2*1 - bottom_actions[..., :, :, 1]

        if self.extends[0] != 0:
            extended_reward = np.concatenate((top_reward, reward_map, bottom_reward), axis=-3)
            extended_actions = np.concatenate((top_actions, action_maps, bottom_actions), axis=-3) if action_maps is not None else action_maps
        else:
            extended_reward = reward_map
            extended_actions = action_maps

        return extended_reward, extended_actions

    def blur(self, reward_map):
        #reward_map = 2.5 * np.tan( reward_map * (np.pi/2) / 6 )\n",
        if len(reward_map.shape) == 3:
            reward_map[:, :, 0] = cv2.GaussianBlur(reward_map[:, :, 0], (self.blur_coef[0], self.blur_coef[0]), self.blur_coef[1])
        elif len(reward_map.shape) == 4:
            for i in range(reward_map.shape[0]):
                reward_map[i, :, :, 0] = cv2.GaussianBlur(reward_map[i, :, :, 0], (self.blur_coef[0], self.blur_coef[0]), self.blur_coef[1])
                #max_val = np.max(np.abs(reward_map[i, :, :, 0]))
                #reward_map[i, :, :, 0] = 6 * (2/np.pi) * np.arctan(reward_map[i, :, :, 0]/2) / ((2/np.pi) * np.arctan(max_val/2))
        #reward_map = 6 * (2/np.pi) * np.arctan(reward_map/8)
        #reward_map = 6 * 2*(1/(1+np.exp(-reward_map/7)) - 0.5)
        reward_map = 6 * (2/np.pi) * np.arctan(reward_map/2)
        return reward_map

    def operation(self, reward_map, action_maps, order=['extend_hori', 'extend_vert', 'blur']):
        result_reward = reward_map
        result_action = action_maps
        for op in order:
            if op == 'extend_hori':
                result_reward, result_action = self.extend_hori(result_reward, result_action)
            elif op == 'extend_vert':
                result_reward, result_action = self.extend_vert(result_reward, result_action)
            elif op == 'blur':
                result_reward = self.blur(result_reward)
            else:
                raise NotImplementedError()
        return result_reward, result_action

    def ext_N_set(self, N_set):
        return (N_set[0]+2*int(N_set[0]*self.extends[1]/2), N_set[1]+2*int(N_set[1]*self.extends[0]/2))

class EarlyStopping():
    def __init__(self, patience, delta, mode='min'):
        """
        patience : max number of waiting
        delta : min boundary of "change"
        mode :
        verbose :
        """

        self.patience = patience
        self.delta = delta
        self.mode = mode
        self.best_score = np.inf if mode == 'min' else 0
        self.count = 0
        self.early_stop = False

    def __call__(self, score):
        if self.mode == 'min':
            if (self.best_score - score) < self.delta:
                self.count += 1
            else:
                self.best_score = score
                self.count = 0
        elif self.mode == 'max':
            if (score - self.best_score) < self.delta:
                self.count += 1
            else:
                self.best_score = score
                self.count = 0

        if self.count >= self.patience:
            self.early_stop = True

def data_split(dataset, train_ratio=0.7, shuffle=True, copy=False):
    if shuffle:
        idx = np.arange(0, dataset.shape[0])
        np.random.shuffle(idx)
        dataset = dataset[idx]

    trainset = dataset[:int(train_ratio*dataset.shape[0])]
    testset = dataset[int(train_ratio*dataset.shape[0]):]
    if copy:
        trainset = trainset.copy()
        testset = testset.copy()

    return trainset, testset

# **Training with Regression Model**

In [None]:
class QValueNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=512, activation=nn.ReLU, dropout=0.3):
        super().__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.activation = activation

        self.model = nn.Sequential(
            nn.Linear(self.input_dim, self.hidden_dim),
            activation(),
            nn.Dropout(dropout),

            nn.Linear(self.hidden_dim, self.hidden_dim),
            activation(),
            nn.Dropout(dropout),

            #------------------------------
            nn.Linear(self.hidden_dim, self.hidden_dim),
            activation(),
            nn.Dropout(dropout),

            #nn.Linear(self.hidden_dim, self.hidden_dim),
            #activation(),
            #nn.Dropout(dropout),

            nn.Linear(self.hidden_dim, self.hidden_dim//4),
            activation(),
            nn.Dropout(dropout),

            nn.Linear(self.hidden_dim//4, self.hidden_dim//8),
            activation(),
            nn.Dropout(dropout),

            nn.Linear(self.hidden_dim//8, 1)
        )

    def forward(self, X):
        return self.model(X)


class QValueNet_CNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=512, activation=nn.ReLU, dropout=0.3):
        super().__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.activation = activation

        # R_arr encoders (input: [B, C, 40, 20])
        self.r_arr_encoder1 = nn.Sequential(
            nn.Conv2d(1, 8, (9, 5)),  # 1 channel / assumed input is already done padding=1 #(1, 16, 3)
            self.activation(),
            nn.MaxPool2d(2)  # -> 20×10
        )

        self.r_arr_encoder2 = nn.Sequential(
            nn.Conv2d(8, 16, (5, 3)),  # assumed input is already done padding=1 #(16, 32, 3)
            self.activation(),
            nn.Flatten(),     # -> 16×20×10 = 3200  # -> 32×20×10 = 6400
            nn.Linear(3200, 1024)
        )

        # Info encoder (input: [B, 1, 6])
        self.info_encoder = nn.Sequential(
            nn.Linear(6, 32),
            self.activation(),
            nn.Linear(32, 64)
        )

        # RL encoder (input: [B, 1, 4])
        self.rl_encoder = nn.Sequential(
            nn.Linear(4, 32),
            self.activation(),
            nn.Linear(32, 64)
        )

        # Lightcurves encoder (input: [B, 1, 100])
        self.lc_encoder1 = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=15),
            self.activation(),
            nn.MaxPool1d(2),   # → 50
        )

        self.lc_encoder2 = nn.Sequential(
            nn.Conv1d(16, 32, kernel_size=9),
            self.activation(),
            nn.Flatten(),      # → 32×50
            nn.Linear(32*50, 256)
        )

        # Fusion & Head
        self.head = nn.Sequential(
            nn.Linear(1024 + 256 + 64 + 64, 1024),
            self.activation(),
            nn.Dropout(dropout),

            #nn.Linear(1024, 1024),
            #self.activation(),
            #nn.Dropout(dropout), #//// 새로 추가

            nn.Linear(1024, 256),
            self.activation(),
            nn.Dropout(dropout),

            nn.Linear(256, 1)  # e.g., class count or regression value
        )

    def r_padding(self, x, pad=(1, 1)):
        N, C, H, W = x.shape
        pad_H = pad[0]
        pad_W = pad[1]

        out = torch.full((N, C, H + 2*pad_H, W + 2*pad_W), fill_value=0.0, dtype=x.dtype, device=x.device)
        out[:, :, pad_H:pad_H+H, pad_W:pad_W+W] = x
        out[:, :, :, :pad_W] = torch.roll(torch.flip(out[:, :, :, pad_W:pad_W+pad_W], (-2,)), 20, -1)
        out[:, :, :, -pad_W:] = torch.roll(torch.flip(out[:, :, :, -pad_W-pad_W:-pad_W], (-2,)), 20, -1)
        out[:, :, :pad_H, pad_W:pad_W+W] = x[:, :, -pad_H:, :]
        out[:, :, -pad_H:, pad_W:pad_W+W] = x[:, :, :pad_H, :]
        return out

    def lc_padding(self, x, pad=1):
        N, C, W = x.shape

        out = torch.full((N, C, W + 2*pad), fill_value=0.0, dtype=x.dtype, device=x.device)
        out[:, :, pad:pad+W] = x
        out[:, :, :pad] = x[:, :, -pad:]
        out[:, :, -pad:] = x[:, :, :pad]
        return out

    def forward(self, X):
        r_arr = X[..., :800].reshape((X.shape[0], 1, 40, 20))
        lc_arr = X[..., 800:900].reshape((X.shape[0], 1, 100))
        lc_info = X[..., 900:906]
        rl_info = X[..., 906:]

        r_arr_feat = torch.transpose(r_arr, -2, -1)
        r_arr_feat = self.r_padding(r_arr_feat, pad=(4, 2))
        r_arr_feat = self.r_arr_encoder1(r_arr_feat)
        r_arr_feat = self.r_padding(r_arr_feat, pad=(2, 1))
        r_arr_feat = self.r_arr_encoder2(r_arr_feat)

        lc_feat = self.lc_padding(lc_arr, pad=7)
        lc_feat = self.lc_encoder1(lc_feat)
        lc_feat = self.lc_padding(lc_feat, pad=4)
        lc_feat = self.lc_encoder2(lc_feat)

        info_feat = self.info_encoder(lc_info)
        info_feat = torch.squeeze(info_feat, dim=1)

        rl_feat = self.rl_encoder(rl_info)
        rl_feat = torch.squeeze(rl_feat, dim=1)

        fusion_feat = torch.cat((r_arr_feat, lc_feat, info_feat, rl_feat), dim=1)
        out = self.head(fusion_feat)

        PI = 3.14159265358979
        out = 6 * 2 / PI * torch.atan(2 * out) #out/0.8
        #out = 7 * 2 / PI * torch.atan(1.5 * out)

        return out

class CustomLoss(nn.Module):
    def __init__(self, relative, percent):
      super().__init__()
      self.relative = relative
      self.percent = percent

    def forward(self, input, target):
      torch_MSE = nn.MSELoss()
      if self.relative:
          loss = torch_MSE(input/(target+1e-6), target/(target+1e-6))
          loss = torch.sqrt(loss + 1e-6)
      else:
          loss = torch.sqrt(torch_MSE(input, target))
          #weight = 0.5 + 0.5*torch.abs(target)
          #loss = torch.sum(weight*(input-target)**2)/torch.sum(weight)
          #loss = torch.sqrt(loss + 1e-6)
      if self.percent:
          loss = 100 * loss
      return loss

class CustomLoss1(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input, target):
        torch_MSE = nn.MSELoss()
        input_prop = self.processer(input)
        target_prop = self.processer(target)

        #input_prop_pos = torch.where(input_prop > 0, input_prop, 0)
        #input_prop_neg = torch.where(input_prop < 0, input_prop, 0)
        #input_prop_final = input_prop_pos*torch.max(target_prop)/(torch.max(input_prop_pos)+1e-6) + input_prop_neg*torch.min(target_prop)/(torch.min(input_prop_neg)+1e-6)
        #target_prop_pos = torch.where(target_prop > 0, target_prop, 0)
        #target_prop_neg = torch.where(target_prop < 0, target_prop, 0)
        #target_prop_final = target_prop_pos*torch.max(input_prop)/(torch.max(target_prop_pos)+1e-6) + target_prop_neg*torch.min(input_prop)/(torch.min(target_prop_neg)+1e-6)

        loss = torch_MSE(input_prop, target_prop)
        return 1e+6 * loss

    def processer(self, reward_map):
        hori_prop, vert_prop = 3, 3
        reward_map_pos = torch.where(reward_map > 0, reward_map, 0)
        reward_map_neg = torch.where(reward_map < 0, reward_map, 0)

        exp = 2
        div = hori_prop + vert_prop - 0.5
        reward_map_prop = reward_map_pos**exp
        for i in range(1, hori_prop+1):
            reward_map_prop[:, :-i] = reward_map_prop[:, :-i] + reward_map_pos[:, i:]**exp
            reward_map_prop[:,  i:] = reward_map_prop[:,  i:] + reward_map_pos[:, :-i]**exp
        for j in range(1, vert_prop+1):
            reward_map_prop[:-j, :] = reward_map_prop[:-j, :] + reward_map_pos[j:, :]**exp
            reward_map_prop[ j:, :] = reward_map_prop[ j:, :] + reward_map_pos[:-j, :]**exp
        reward_map_prop = (reward_map_prop / div)**(1/exp)
        reward_map_prop = reward_map_prop + reward_map_neg

        return reward_map_prop

class CustomLoss2(nn.Module):
    def __init__(self):
        super().__init__()
        self.epoch = 0

    def forward(self, input, target):
        torch_MSE = nn.MSELoss()
        input_prop = self.processer(input)
        target_prop = self.processer(target)

        eps = 0.3
        input_prop_pos = torch.where(input_prop > 0, input_prop, 0)
        input_prop_neg = torch.where(input_prop < 0, input_prop, 0)
        input_prop_final = input_prop_pos*torch.max(target_prop)/(torch.max(input_prop_pos)+eps) + input_prop_neg*torch.min(target_prop)/(torch.min(input_prop_neg)+eps)
        #target_prop_pos = torch.where(target_prop > 0, target_prop, 0)
        #target_prop_neg = torch.where(target_prop < 0, target_prop, 0)
        #target_prop_final = target_prop_pos*torch.max(input_prop)/(torch.max(target_prop_pos)+1e-6) + target_prop_neg*torch.min(input_prop)/(torch.min(target_prop_neg)+1e-6)

        loss = torch_MSE(input_prop, target_prop)
        return loss

    def processer(self, reward_map):
        hori_prop, vert_prop = 3, 3
        reward_map_pos = torch.where(reward_map > 0, reward_map, 0)
        reward_map_neg = torch.where(reward_map < 0, reward_map, 0)

        exp = 2
        div = hori_prop + vert_prop - 0.5
        reward_map_prop = reward_map_pos**exp
        for i in range(1, hori_prop+1):
            reward_map_prop[:, :-i] = reward_map_prop[:, :-i] + reward_map_pos[:, i:]**exp
            reward_map_prop[:,  i:] = reward_map_prop[:,  i:] + reward_map_pos[:, :-i]**exp
        for j in range(1, vert_prop+1):
            reward_map_prop[:-j, :] = reward_map_prop[:-j, :] + reward_map_pos[j:, :]**exp
            reward_map_prop[ j:, :] = reward_map_prop[ j:, :] + reward_map_pos[:-j, :]**exp
        reward_map_prop = (reward_map_prop / div)**(1/exp)
        reward_map_prop = reward_map_prop + reward_map_neg

        return reward_map_prop

    def setepoch(self, epoch):
        self.epoch = epoch

def train_loop(dataloader, model, loss_fn, optimizer, train_loss, es:EarlyStopping):
    epoch_loss = 0
    n_train = 0

    model.train()
    #with torch.autograd.detect_anomaly(True):
    for X_train, y_train in dataloader:
        X_train = X_train.to(device)
        y_train = y_train.to(device)
        pred = model(X_train)

        #non_extended = torch.logical_and((X_train[:, -4] >= 0), (X_train[:, -4] < 1))
        #non_extended = torch.logical_and(non_extended, (X_train[:, -3] >= 0))
        #non_extended = torch.logical_and(non_extended, (X_train[:, -3] < 1))
        #loss = loss_fn(pred[non_extended], y_train[non_extended])
        loss = loss_fn(pred, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()*X_train.size(0)
        n_train += X_train.size(0)

    epoch_loss /= n_train
    train_loss.append(epoch_loss)

    es(epoch_loss)
    #print("train_loss : {:9.4g}".format(epoch_loss), end=' ')

def test_loop(dataloader, model, loss_fn, test_loss, epoch):
    epoch_loss = 0
    n_test = 0

    model.eval()
    with torch.no_grad():
        for X_test, y_test in dataloader:
            X_test = X_test.to(device)
            y_test = y_test.to(device)
            pred = model(X_test)

            #non_extended = torch.logical_and((X_test[:, -4] >= 0), (X_test[:, -4] < 1))
            #non_extended = torch.logical_and(non_extended, (X_test[:, -3] >= 0))
            #non_extended = torch.logical_and(non_extended, (X_test[:, -3] < 1))
            #epoch_loss += loss_fn(pred[non_extended], y_test[non_extended]).item()*X_test.size(0)

            #epoch_loss += loss_fn(pred, y_test).item()*X_test.size(0)
            epoch_loss += loss_fn(torch.transpose(pred.reshape(40, 20), 0, 1), torch.transpose(y_test.reshape(40, 20), 0, 1)).item()*X_test.size(0)
            n_test += X_test.size(0)

    epoch_loss /= n_test
    test_loss.append(epoch_loss)

    print("train_loss : {:9.4g}".format(train_loss[-1]), end=' ')
    print("| test_loss : {:9.4g}".format(epoch_loss), end=' ')
    print("\n", end=' ')

# Data Processing : scaling data
param = [6, 2] #[6, 2.5]
def scale_reward(data):
    if data_RL_preset0[0, 2] == 1: # already scaled
        return data

    data_RL_preset0[0, 2] = 1
    scaled_data = np.zeros_like(data)

    scaled_data = param[0]*(2/np.pi)*np.arctan(data/param[1])

    return scaled_data

def test_img_show(i_img, loss_fn):
    fig = plt.figure(figsize=(16, 8), dpi=300)
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)

    extent = ( (N_set[0]-ext_N_set[0])/2, (N_set[0]+ext_N_set[0])/2, (N_set[1]+ext_N_set[1])/2, (N_set[1]-ext_N_set[1])/2 )
    if i_img == 0 or True:
        ax1.clear()
        im1 = ax1.imshow(test_img_list[i_img], vmin=-param[0], vmax=param[0], extent=extent)
        #im1 = ax1.imshow(test_img_list[i_img], extent=extent)
        ax1.set_title("TEST_IMAGE_"+str(i_img))
        plt.colorbar(im1, ax=ax1, fraction=0.026, pad=0.04)
        ax1.plot([0, N_set[0]],        [0, 0],               color='red', linestyle='solid')
        ax1.plot([0, N_set[0]],        [N_set[1], N_set[1]], color='red', linestyle='solid')
        ax1.plot([0, 0],               [0, N_set[1]],        color='red', linestyle='solid')
        ax1.plot([N_set[0], N_set[0]], [0, N_set[1]],        color='red', linestyle='solid')

    reward_map_temp = np.zeros((resol*N_set[0], resol*N_set[1]))
    loss_test_img_temp = 0
    model.eval()
    with torch.no_grad():
        for idx in range(N_set[0]*N_set[1]*resol*resol):
            i = idx//int(resol*N_set[1])
            j = idx%int(resol*N_set[1])
            phi_action = (i/(resol*N_set[0]))%1
            theta_action = (j/(resol*N_set[1]))%1

            state = test_img_data[i_img*resol*N_set[0]*N_set[1], :906]
            actions = np.array([phi_action, theta_action, 0.1, 0.1])

            input = torch.tensor(np.concatenate((state, actions))).float().to(device)
            input = torch.unsqueeze(input, 0)
            reward = model(input)
            reward_map_temp[i, j] = reward
        loss_test_img_temp += loss_fn(torch.tensor(reward_map_temp.T).cpu(), torch.tensor(test_img_list[i_img][:, :, 0])).item() # only valid if no extend
    ax2.clear()
    #im2 = ax2.imshow(reward_map_temp.T)#, vmin=-param[0], vmax=param[0])
    im2 = ax2.imshow(reward_map_temp.T, vmin=-np.max(np.abs(reward_map_temp)), vmax=np.max(np.abs(reward_map_temp)))
    ax2.set_title("MODEL_OUTPUT_"+str(i_img)+"(Loss="+str(int(1000*loss_test_img_temp)/1000)+")")
    plt.colorbar(im2, ax=ax2, fraction=0.026, pad=0.04)

    plt.show()
    print("Test Img "+str(i_img)+" Loss = "+str(int(1000*loss_test_img_temp)/1000))

### Data Preprocessing with Small Size Dataset

In [None]:
class Dataset(data.Dataset):
    def __init__(self, x_tensor, y_tensor):
        super(Dataset, self).__init__()

        if not torch.is_tensor(x_tensor):
            self.x = torch.tensor(x_tensor).float()
            self.y = torch.tensor(y_tensor).float()
        else:
            self.x = x_tensor.float()
            self.y = y_tensor.float()

    def __getitem__(self, index): return self.x[index], self.y[index]

    def __len__(self): return self.x.shape[0]

#### (src for showing reward map examples)

In [None]:
# seed
seed = 722
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# hyperparameters
batch_size = 1024
learning_rate = 6e-5
max_epoch = 1000

# other parameters
N_set = (40, 20)
resol = 1

map_modifier = RewardMapModifier(extends=(0, 0), blur_coef=(3, 2)) #if you use CNN, do not use extend method
chunk_size = 256
chunk_set_size = chunk_size*N_set[0]*N_set[1]
online_dataset_path = "/content/gdrive/MyDrive/Asteroid RL dataset/online_dataset/"


data_len2 = int(data_RL_preset2[0, 0])
test_img_num = 10
test_img_idx_choice = np.random.randint(0, (data_len2-1)//800, test_img_num)
dataset_img_idx = np.full(data_len2, False)
for i in test_img_idx_choice:
    dataset_img_idx[i*800+1:(i+1)*800+1] = True

print("test_img_idx (in RL_preset_batch_2) :", test_img_idx_choice)
print("--------------------------------")
print("")

data_RL_preset = data_RL_preset0[1:, :]
test_img_data = data_RL_preset2[dataset_img_idx, :].copy()
del data_RL_preset2
gc.collect()

test_img_list = []
for i in range(test_img_num):
    test_img_list.append(test_img_data[i*resol*N_set[0]*N_set[1]:(i+1)*resol*N_set[0]*N_set[1], -1].reshape((N_set[0], N_set[1])).T)

for i in range(len(test_img_list)):
    test_img_list[i], _ = map_modifier.operation(np.expand_dims(test_img_list[i], axis=-1), None, order=['extend_vert', 'extend_hori', 'blur'])
    #test_img_list[i] = test_img_list[i][:, :, 0]
    gc.collect()


#cut = N_set[0]*N_set[1]*2093 + 1 #1040
#state_data = data_RL_preset[:cut, :-5]
#action_data = data_RL_preset[:cut, -5:-1]
#reward_data = data_RL_preset[:cut, -1:]
state_data = data_RL_preset[:, :-5]
action_data = data_RL_preset[:, -5:-1]
reward_data = data_RL_preset[:, -1:]


new_action_data = 0 * np.array([action_data[0, ...].copy()])
new_reward_data = 0 * np.array([reward_data[0, ...].copy()])

print("Data Shapes Before Map Modifying")
print("--------------------------------")
print("state_data  | "+str(state_data.shape)+", "+str(int(1000*state_data.itemsize*state_data.size/(2**30))/1000)+"GB")
print("action_data | "+str(action_data.shape)+"  , "+str(int(1000*action_data.itemsize*action_data.size/(2**30))/1000)+"GB")
print("reward_data | "+str(reward_data.shape)+"  , "+str(int(1000*reward_data.itemsize*reward_data.size/(2**30))/1000)+"GB")

print("\n--------------------------------")
for i in range(math.ceil(state_data.shape[0]/chunk_set_size)):
    if i != state_data.shape[0]//(chunk_size*N_set[0]*N_set[1]):
        reward_map = reward_data[chunk_set_size*i:chunk_set_size*(i+1)]
        action_maps = action_data[chunk_set_size*i:chunk_set_size*(i+1)]
    else:
        reward_map = reward_data[chunk_set_size*i:]
        action_maps = action_data[chunk_set_size*i:]

    print("Batch Shape : reward / action | "+str(reward_map.shape)+", "+str(action_maps.shape)+" --> ", end='')
    reward_map = np.swapaxes(reward_map.reshape((-1, N_set[0], N_set[1], 1)), -2, -3)
    action_maps = np.swapaxes(action_maps.reshape((-1, N_set[0], N_set[1], 4)), -2, -3)
    reward_map, action_maps = map_modifier.operation(reward_map, action_maps, order=['extend_vert', 'extend_hori', 'blur'])
    print(str(reward_map.shape)+", "+str(action_maps.shape))

    extended_size = reward_map.shape[-2] * reward_map.shape[-3]
    new_action_data = np.concatenate((new_action_data, action_maps.reshape(-1, 4)), axis=0)
    new_reward_data = np.concatenate((new_reward_data, reward_map.reshape(-1, 1)), axis=0)
print("--------------------------------\n")

state_data = np.repeat(state_data[::N_set[0]*N_set[1]], repeats=extended_size, axis=0)
action_data = np.delete(new_action_data, 0, axis=0)
reward_data = np.delete(new_reward_data, 0, axis=0)

del new_action_data, new_reward_data, reward_map, action_maps
del data_RL_preset, data_RL_preset0
gc.collect()

print("Data Shapes After Map Mpdifying")
print("--------------------------------")
print("state_data  | "+str(state_data.shape)+", "+str(int(1000*state_data.itemsize*state_data.size/(2**30))/1000)+"GB")
print("action_data | "+str(action_data.shape)+"  , "+str(int(1000*action_data.itemsize*action_data.size/(2**30))/1000)+"GB")
print("reward_data | "+str(reward_data.shape)+"  , "+str(int(1000*reward_data.itemsize*reward_data.size/(2**30))/1000)+"GB")

ext_N_set = map_modifier.ext_N_set(N_set)

In [None]:
for i in range(1, 120, 30):
    plt.imshow(reward_data[i*800:(i+1)*800, 0].reshape(20, 40))
    plt.colorbar()
    plt.show()

In [None]:
total_data = np.concatenate((state_data, action_data, reward_data), axis=1)
state_shape = state_data.shape[1]
del state_data, action_data, reward_data
gc.collect()

train_data, test_data = data_split(total_data, train_ratio=0.85, shuffle=True, copy=True)
del total_data
gc.collect()

train_state_data = train_data[:, :state_shape].copy()
train_action_data = train_data[:, state_shape:state_shape+4].copy()
train_reward_data = train_data[:, -1].reshape(-1, 1)
del train_data
gc.collect()

test_state_data = test_data[:, :state_shape].copy()
test_action_data = test_data[:, state_shape:state_shape+4].copy()
test_reward_data = test_data[:, -1].reshape(-1, 1)
del test_data
gc.collect()

#### (src for skipping showing reward map examples)

In [None]:
# seed
seed = 722
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# hyperparameters
batch_size = 1024
learning_rate = 6e-5
max_epoch = 1000

# other parameters
N_set = (40, 20)
resol = 1

map_modifier = RewardMapModifier(extends=(0, 0), blur_coef=(3, 2)) #if you use CNN, do not use extend method
chunk_size = 256
chunk_set_size = chunk_size*N_set[0]*N_set[1]
online_dataset_path = "/content/gdrive/MyDrive/Asteroid RL dataset/online_dataset/"


data_len2 = int(data_RL_preset2[0, 0])
test_img_num = 10
test_img_idx_choice = np.random.randint(0, (data_len2-1)//800, test_img_num)
dataset_img_idx = np.full(data_len2, False)
for i in test_img_idx_choice:
    dataset_img_idx[i*800+1:(i+1)*800+1] = True

print("test_img_idx (in RL_preset_batch_2) :", test_img_idx_choice)
print("--------------------------------")
print("")

data_RL_preset = data_RL_preset0[1:, :]
test_img_data = data_RL_preset2[dataset_img_idx, :].copy()
del data_RL_preset2
gc.collect()

test_img_list = []
for i in range(test_img_num):
    test_img_list.append(test_img_data[i*resol*N_set[0]*N_set[1]:(i+1)*resol*N_set[0]*N_set[1], -1].reshape((N_set[0], N_set[1])).T)

for i in range(len(test_img_list)):
    test_img_list[i], _ = map_modifier.operation(np.expand_dims(test_img_list[i], axis=-1), None, order=['extend_vert', 'extend_hori', 'blur'])
    #test_img_list[i] = test_img_list[i][:, :, 0]
    gc.collect()


#cut = N_set[0]*N_set[1]*2093 + 1 #1040
#state_data = data_RL_preset[:cut, :-5]
#action_data = data_RL_preset[:cut, -5:-1]
#reward_data = data_RL_preset[:cut, -1:]
state_data = data_RL_preset[:, :-5]
action_data = data_RL_preset[:, -5:-1]
reward_data = data_RL_preset[:, -1:]


new_action_data = 0 * np.array([action_data[0, ...].copy()])
new_reward_data = 0 * np.array([reward_data[0, ...].copy()])

print("Data Shapes Before Map Modifying")
print("--------------------------------")
print("state_data  | "+str(state_data.shape)+", "+str(int(1000*state_data.itemsize*state_data.size/(2**30))/1000)+"GB")
print("action_data | "+str(action_data.shape)+"  , "+str(int(1000*action_data.itemsize*action_data.size/(2**30))/1000)+"GB")
print("reward_data | "+str(reward_data.shape)+"  , "+str(int(1000*reward_data.itemsize*reward_data.size/(2**30))/1000)+"GB")

print("\n--------------------------------")
for i in range(math.ceil(state_data.shape[0]/chunk_set_size)):
    if i != state_data.shape[0]//(chunk_size*N_set[0]*N_set[1]):
        reward_map = reward_data[chunk_set_size*i:chunk_set_size*(i+1)]
        action_maps = action_data[chunk_set_size*i:chunk_set_size*(i+1)]
    else:
        reward_map = reward_data[chunk_set_size*i:]
        action_maps = action_data[chunk_set_size*i:]

    print("Batch Shape : reward / action | "+str(reward_map.shape)+", "+str(action_maps.shape)+" --> ", end='')
    reward_map = np.swapaxes(reward_map.reshape((-1, N_set[0], N_set[1], 1)), -2, -3)
    action_maps = np.swapaxes(action_maps.reshape((-1, N_set[0], N_set[1], 4)), -2, -3)
    reward_map, action_maps = map_modifier.operation(reward_map, action_maps, order=['extend_vert', 'extend_hori', 'blur'])
    print(str(reward_map.shape)+", "+str(action_maps.shape))

    extended_size = reward_map.shape[-2] * reward_map.shape[-3]
    new_action_data = np.concatenate((new_action_data, action_maps.reshape(-1, 4)), axis=0)
    new_reward_data = np.concatenate((new_reward_data, reward_map.reshape(-1, 1)), axis=0)
print("--------------------------------\n")

state_data = np.repeat(state_data[::N_set[0]*N_set[1]], repeats=extended_size, axis=0)
action_data = np.delete(new_action_data, 0, axis=0)
reward_data = np.delete(new_reward_data, 0, axis=0)

del new_action_data, new_reward_data, reward_map, action_maps
del data_RL_preset, data_RL_preset0
gc.collect()

print("Data Shapes After Map Mpdifying")
print("--------------------------------")
print("state_data  | "+str(state_data.shape)+", "+str(int(1000*state_data.itemsize*state_data.size/(2**30))/1000)+"GB")
print("action_data | "+str(action_data.shape)+"  , "+str(int(1000*action_data.itemsize*action_data.size/(2**30))/1000)+"GB")
print("reward_data | "+str(reward_data.shape)+"  , "+str(int(1000*reward_data.itemsize*reward_data.size/(2**30))/1000)+"GB")

ext_N_set = map_modifier.ext_N_set(N_set)


total_data = np.concatenate((state_data.reshape(-1, ext_N_set[0]*ext_N_set[1], state_data.shape[-1]),
                             action_data.reshape(-1, ext_N_set[0]*ext_N_set[1], action_data.shape[-1]),
                             reward_data.reshape(-1, ext_N_set[0]*ext_N_set[1], reward_data.shape[-1])), axis=-1)
state_shape = state_data.shape[1]
del state_data, action_data, reward_data
gc.collect()

train_data, test_data = data_split(total_data, train_ratio=0.85, shuffle=True, copy=True)
del total_data
gc.collect()

train_data = train_data.reshape(-1, state_shape+4+1)
test_data = test_data.reshape(-1, state_shape+4+1)

train_state_data = train_data[:, :state_shape].copy()
train_action_data = train_data[:, state_shape:state_shape+4].copy()
train_reward_data = train_data[:, -1].reshape(-1, 1)
del train_data
gc.collect()

test_state_data = test_data[:, :state_shape].copy()
test_action_data = test_data[:, state_shape:state_shape+4].copy()
test_reward_data = test_data[:, -1].reshape(-1, 1)
del test_data
gc.collect()

#### Use data_RL_preset2 as Testset

In [None]:
# seed
seed = 722
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# hyperparameters
batch_size = 1024
learning_rate = 6e-5
max_epoch = 1000

# other parameters
N_set = (40, 20)
resol = 1

map_modifier = RewardMapModifier(extends=(0, 0), blur_coef=(3, 2)) #if you use CNN, do not use extend method
chunk_size = 256
chunk_set_size = chunk_size*N_set[0]*N_set[1]
online_dataset_path = "/content/gdrive/MyDrive/Asteroid RL dataset/online_dataset/"


data_len2 = int(data_RL_preset2[0, 0])
test_img_num = 10
test_img_idx_choice = np.random.randint(0, (data_len2-1)//800, test_img_num)
dataset_img_idx = np.full(data_len2, False)
for i in test_img_idx_choice:
    dataset_img_idx[i*800+1:(i+1)*800+1] = True

print("test_img_idx (in RL_preset_batch_2) :", test_img_idx_choice)
print("--------------------------------")
print("")

data_RL_preset = data_RL_preset0[1:, :]
test_img_data = data_RL_preset2[dataset_img_idx, :].copy()
del data_RL_preset2
gc.collect()

test_img_list = []
for i in range(test_img_num):
    test_img_list.append(test_img_data[i*resol*N_set[0]*N_set[1]:(i+1)*resol*N_set[0]*N_set[1], -1].reshape((N_set[0], N_set[1])).T)

for i in range(len(test_img_list)):
    test_img_list[i], _ = map_modifier.operation(np.expand_dims(test_img_list[i], axis=-1), None, order=['extend_vert', 'extend_hori', 'blur'])
    #test_img_list[i] = test_img_list[i][:, :, 0]
    gc.collect()


#cut = N_set[0]*N_set[1]*2093 + 1 #1040
#state_data = data_RL_preset[:cut, :-5]
#action_data = data_RL_preset[:cut, -5:-1]
#reward_data = data_RL_preset[:cut, -1:]
state_data = data_RL_preset[:, :-5]
action_data = data_RL_preset[:, -5:-1]
reward_data = data_RL_preset[:, -1:]


new_action_data = 0 * np.array([action_data[0, ...].copy()])
new_reward_data = 0 * np.array([reward_data[0, ...].copy()])

print("Data Shapes Before Map Modifying")
print("--------------------------------")
print("state_data  | "+str(state_data.shape)+", "+str(int(1000*state_data.itemsize*state_data.size/(2**30))/1000)+"GB")
print("action_data | "+str(action_data.shape)+"  , "+str(int(1000*action_data.itemsize*action_data.size/(2**30))/1000)+"GB")
print("reward_data | "+str(reward_data.shape)+"  , "+str(int(1000*reward_data.itemsize*reward_data.size/(2**30))/1000)+"GB")

print("\n--------------------------------")
for i in range(math.ceil(state_data.shape[0]/chunk_set_size)):
    if i != state_data.shape[0]//(chunk_size*N_set[0]*N_set[1]):
        reward_map = reward_data[chunk_set_size*i:chunk_set_size*(i+1)]
        action_maps = action_data[chunk_set_size*i:chunk_set_size*(i+1)]
    else:
        reward_map = reward_data[chunk_set_size*i:]
        action_maps = action_data[chunk_set_size*i:]

    print("Batch Shape : reward / action | "+str(reward_map.shape)+", "+str(action_maps.shape)+" --> ", end='')
    reward_map = np.swapaxes(reward_map.reshape((-1, N_set[0], N_set[1], 1)), -2, -3)
    action_maps = np.swapaxes(action_maps.reshape((-1, N_set[0], N_set[1], 4)), -2, -3)
    reward_map, action_maps = map_modifier.operation(reward_map, action_maps, order=['extend_vert', 'extend_hori', 'blur'])
    print(str(reward_map.shape)+", "+str(action_maps.shape))

    extended_size = reward_map.shape[-2] * reward_map.shape[-3]
    new_action_data = np.concatenate((new_action_data, action_maps.reshape(-1, 4)), axis=0)
    new_reward_data = np.concatenate((new_reward_data, reward_map.reshape(-1, 1)), axis=0)
print("--------------------------------\n")

state_data = np.repeat(state_data[::N_set[0]*N_set[1]], repeats=extended_size, axis=0)
action_data = np.delete(new_action_data, 0, axis=0)
reward_data = np.delete(new_reward_data, 0, axis=0)

del new_action_data, new_reward_data, reward_map, action_maps
del data_RL_preset, data_RL_preset0
gc.collect()

print("Data Shapes After Map Mpdifying")
print("--------------------------------")
print("state_data  | "+str(state_data.shape)+", "+str(int(1000*state_data.itemsize*state_data.size/(2**30))/1000)+"GB")
print("action_data | "+str(action_data.shape)+"  , "+str(int(1000*action_data.itemsize*action_data.size/(2**30))/1000)+"GB")
print("reward_data | "+str(reward_data.shape)+"  , "+str(int(1000*reward_data.itemsize*reward_data.size/(2**30))/1000)+"GB")

ext_N_set = map_modifier.ext_N_set(N_set)



# ------------------------------
# Shifted Data Generation
# ------------------------------
shift_unit = 1 #1 is minimum unit --> most data generation (20 times)
new_state_data = state_data.reshape(-1, 800, state_shape)
new_action_data = action_data.reshape(-1, 800, 4)
new_reward_data = np.swapaxes(reward_data.reshape(-1, 800, 40, 20), 2, 3)

original_data_img_num = new_state_data.shape[0] + 0

new_state_data = np.tile(new_state_data, (20//shift_unit, 1, 1))
new_action_data = np.tile(new_action_data, (20//shift_unit, 1, 1))
new_reward_data = np.tile(new_reward_data, (20//shift_unit, 1, 1, 1))
for i in range(1, 20//shift_unit+1):
    new_state_data[i*original_data_img_num:(i+1)*original_data_img_num, :, 800:900] = np.roll(new_state_data[i*original_data_img_num:(i+1)*original, :, 800:900], 5*shift_unit, axis=-1)
    new_reward_data[i*original_data_img_num:(i+1)*original_data_img_num, :, :, :] = np.roll(ew_reward_data[i*original_data_img_num:(i+1)*original_data_img_num, :, :, :], 2*shift_unit, axis=-1)





total_data = np.concatenate((state_data, action_data, reward_data), axis=-1)
state_shape = state_data.shape[1]
del state_data, action_data, reward_data
gc.collect()

train_data, test_data = data_split(total_data, train_ratio=0.85, shuffle=True, copy=True)
del total_data, test_data
gc.collect()

train_data = train_data.reshape(-1, state_shape+4+1)

train_state_data = train_data[:, :state_shape].copy()
train_action_data = train_data[:, state_shape:state_shape+4].copy()
train_reward_data = train_data[:, -1].reshape(-1, 1)
del train_data
gc.collect()

In [None]:
test_data = test_img_data.copy()
print("\n"+("-"*20))
print("Test Data Shape : "+str(test_data.shape))

test_state_data = test_data[:, :state_shape].copy()
test_action_data = test_data[:, state_shape:state_shape+4].copy()
test_reward_data = np.swapaxes(test_data[:, -1].reshape(-1, 40, 20, 1), -2, -3)
test_reward_data, _ = map_modifier.operation(test_reward_data, None, order=['extend_vert', 'extend_hori', 'blur'])
test_reward_data = np.swapaxes(test_reward_data, -2, -3).reshape(-1, 1)

del test_data
gc.collect()

#### to Dataset class for pytorch

In [None]:
train_dataset = Dataset(np.concatenate((train_state_data, train_action_data), axis=1), train_reward_data)
test_dataset = Dataset(np.concatenate((test_state_data, test_action_data), axis=1), test_reward_data)

train_dataloader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = data.DataLoader(dataset=test_dataset, batch_size=800, shuffle=False)

del train_state_data, train_action_data, train_reward_data
del test_state_data, test_action_data, test_reward_data
gc.collect()

### Data Preprocessing with Large Size Dataset

In [None]:
class Dataset(data.Dataset):
    def __init__(self, x_tensor, y_tensor, chunk_size=None, online_dataset_path=None, usage=None, N_set=(40, 20), train_ratio=0.7):
        super(Dataset, self).__init__()

        # x_tensor is used as (ndarray)data path list
        # it includes y_data --> delete after loading

        self.chunk_list = x_tensor
        self.y = y_tensor

        self.chunk_size = chunk_size
        self.online_dataset_path = online_dataset_path
        self.usage = usage
        self.N_set = N_set
        self.train_ratio = train_ratio

    def __getitem__(self, index):
        chunk_len = int(self.train_ratio*self.chunk_size*self.N_set[0]*self.N_set[1])
        if self.usage == 'test':
            chunk_len = self.chunk_size*self.N_set[0]*self.N_set[1] - chunk_len
        chunk_idx = index//chunk_len
        chunk = np.load(self.online_dataset_path+self.usage+"_chunk_"+str(chunk_idx)+".npz",)
        x = chunk['x'][index%chunk_len]
        x = torch.from_numpy(x).float()
        y = torch.from_numpy(self.y[index]).float()

        del chunk
        gc.collect()

        return x, y

    def __len__(self): return self.y.shape[0]


def generate_dataset(datasets, online_dataset_path, map_modifier, chunk_size, starting_idx=0, train_ratio=0.7, shuffle=True, N_set=(40, 20)):
    # Setting
    state_data = datasets[0]
    action_data = datasets[1]
    reward_data = datasets[2]

    chunk_set_size = chunk_size*N_set[0]*N_set[1]
    train_chunk_list = []
    test_chunk_list = []
    train_y = np.array([reward_data[0, ...].copy()])
    test_y = np.array([reward_data[0, ...].copy()])

    for i in range(math.ceil(state_data.shape[0]/chunk_set_size)):
        # Map Modifying
        if i != state_data.shape[0]//chunk_set_size:
            reward_map = reward_data[chunk_set_size*i:chunk_set_size*(i+1)]
            action_maps = action_data[chunk_set_size*i:chunk_set_size*(i+1)]
        else:
            reward_map = reward_data[chunk_set_size*i:]
            action_maps = action_data[chunk_set_size*i:]

        print("\n--------------------------------")
        print("Data Shape : reward / action | "+str(reward_map.shape)+", "+str(action_maps.shape)+" --> ", end='')

        reward_map = np.swapaxes(reward_map.reshape((-1, N_set[0], N_set[1], 1)), -2, -3)
        action_maps = np.swapaxes(action_maps.reshape((-1, N_set[0], N_set[1], 4)), -2, -3)
        reward_map, action_maps = map_modifier.operation(reward_map, action_maps, order=['extend_hori', 'extend_vert', 'blur'])

        print(str(reward_map.shape)+", "+str(action_maps.shape))

        extended_size = reward_map.shape[-2] * reward_map.shape[-3]
        chunk_reward_data = reward_map.reshape(-1, 1)
        chunk_action_data = action_maps.reshape(-1, 4)
        if i != state_data.shape[0]//chunk_set_size:
            chunk_state_data = np.repeat(state_data[chunk_set_size*i:chunk_set_size*(i+1):N_set[0]*N_set[1]], repeats=extended_size, axis=0)
        else:
            chunk_state_data = np.repeat(state_data[chunk_set_size*i::N_set[0]*N_set[1]], repeats=extended_size, axis=0)

        del reward_map, action_maps
        gc.collect()


        # Generate Dataset for working on Pytorch
        total_data = np.concatenate((chunk_state_data, chunk_action_data, chunk_reward_data), axis=1)
        state_shape = chunk_state_data.shape[1]
        del chunk_state_data, chunk_action_data, chunk_reward_data
        gc.collect()

        chunk_train_data, chunk_test_data = data_split(total_data, train_ratio=train_ratio, shuffle=shuffle)

        chunk_train_state_data = chunk_train_data[:, :state_shape]
        chunk_train_action_data = chunk_train_data[:, state_shape:state_shape+4]
        chunk_train_reward_data = chunk_train_data[:, -1].reshape(-1, 1)

        chunk_test_state_data = chunk_test_data[:, :state_shape]
        chunk_test_action_data = chunk_test_data[:, state_shape:state_shape+4]
        chunk_test_reward_data = chunk_test_data[:, -1].reshape(-1, 1)

        np.savez_compressed(online_dataset_path+"train_chunk_"+str(i+starting_idx)+".npz",
                            x = np.concatenate((chunk_train_state_data, chunk_train_action_data), axis=1))
                            #y = chunk_train_reward_data)
        np.savez_compressed(online_dataset_path+"test_chunk_"+str(i+starting_idx)+".npz",
                            x = np.concatenate((chunk_test_state_data, chunk_test_action_data), axis=1))
                            #y = chunk_test_reward_data)

        print("File Generated : train_chunk_"+str(i+starting_idx), end='')
        print(" | Size : "+str(int(1000*chunk_train_state_data.itemsize*(chunk_train_state_data.size+chunk_train_action_data.size+chunk_train_reward_data.size)/(2**30))/1000)+"GB")
        print("File Generated : test_chunk_"+str(i+starting_idx), end='')
        print(" | Size : "+str(int(1000*chunk_test_state_data.itemsize*(chunk_test_state_data.size+chunk_test_action_data.size+chunk_test_reward_data.size)/(2**30))/1000)+"GB")

        train_y = np.concatenate((train_y, chunk_train_reward_data), axis=0)
        test_y = np.concatenate((test_y, chunk_test_reward_data), axis=0)

        del total_data, chunk_train_data, chunk_test_data
        del chunk_train_state_data, chunk_train_action_data, chunk_train_reward_data
        del chunk_test_state_data, chunk_test_action_data, chunk_test_reward_data
        gc.collect()

        train_chunk_list.append(online_dataset_path+"train_chunk_"+str(i+starting_idx)+".npz")
        test_chunk_list.append(online_dataset_path+"test_chunk_"+str(i+starting_idx)+".npz")

    train_y = np.delete(train_y, 0, 0)
    test_y = np.delete(test_y, 0, 0)

    print("\n--------------------------------")
    print("File Generated : train_y_from"+str(starting_idx), end='')
    print(" | Size : "+str(int(1000*train_y.itemsize*train_y.size/(2**30))/1000)+"GB")
    print("File Generated : test_y_from"+str(starting_idx), end='')
    print(" | Size : "+str(int(1000*test_y.itemsize*test_y.size/(2**30))/1000)+"GB")

    print("\n[Generation Finished]")
    return train_chunk_list, train_y, test_chunk_list, test_y

In [None]:
# seed
seed = 722
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# hyperparameters
batch_size = 256
learning_rate = 6e-5
max_epoch = 1000

# other parameters
N_set = (40, 20)
resol = 1

# info about generating online datasets
map_modifier = RewardMapModifier(extends=(0, 1), blur_coef=0)
chunk_size = 256
online_dataset_path = "/content/gdrive/MyDrive/Asteroid RL dataset/online_dataset/"
starting_idx = 0

In [None]:
# Data Generation
data_RL_preset0[1:, -1:] = scale_reward(data_RL_preset0[1:, -1:])

data_len = data_RL_preset0[0, 0]
test_img_data = data_RL_preset0[-int(data_RL_preset0[0, 1]):, :]
test_img_num = int(data_RL_preset0[0, 1]/(resol*N_set[0]*N_set[1]))
test_img_list = []
for i in range(test_img_num):
    test_img_list.append(test_img_data[i*resol*N_set[0]*N_set[1]:(i+1)*resol*N_set[0]*N_set[1], -1].reshape((N_set[0], N_set[1])).T)

for i in range(len(test_img_list)):
    test_img_list[i], _ = map_modifier.operation(np.expand_dims(test_img_list[i], axis=-1), None, order=['extend_hori', 'extend_vert', 'blur'])
    test_img_list[i] = test_img_list[i][:, :, 0]
    gc.collect()


data_RL_preset = data_RL_preset0[1:-int(data_RL_preset0[0, 1]), :]
state_data = data_RL_preset[:, :-5]
action_data = data_RL_preset[:, -5:-1]
reward_data = data_RL_preset[:, -1:]

train_data_list, train_y, test_data_list, test_y = generate_dataset(datasets = (state_data, action_data, reward_data),
                                                                    online_dataset_path = online_dataset_path,
                                                                    map_modifier = map_modifier,
                                                                    chunk_size = chunk_size,
                                                                    starting_idx = starting_idx,
                                                                    train_ratio = 0.7,
                                                                    shuffle = True,
                                                                    N_set = N_set)
del data_RL_preset, data_RL_preset0
del state_data, action_data, reward_data
gc.collect()

np.save(online_dataset_path+"train_y_from"+str(starting_idx)+".npy", train_y)
np.save(online_dataset_path+"test_y_from"+str(starting_idx)+".npy", test_y)

ext_N_set = map_modifier.ext_N_set(N_set)
train_dataset = Dataset(train_data_list, train_y, chunk_size=chunk_size, online_dataset_path=online_dataset_path, usage='train', N_set=ext_N_set, train_ratio=0.7)
test_dataset = Dataset(test_data_list, test_y, chunk_size=chunk_size, online_dataset_path=online_dataset_path, usage='test', N_set=ext_N_set, train_ratio=0.7)

train_dataloader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
online_data_list = os.listdir(online_dataset_path)
train_data_list = [file for file in online_data_list if 'train_chunk' in file]
test_data_list = [file for file in online_data_list if 'test_chunk' in file]
ext_N_set = map_modifier.ext_N_set(N_set)

train_dataset = Dataset(train_data_list, train_y, mode='online_dataset', chunk_size=chunk_size, online_dataset_path=online_dataset_path, usage='train', N_set=ext_N_set, train_ratio=0.7)
test_dataset = Dataset(test_data_list, test_y, mode='online_dataset', chunk_size=chunk_size, online_dataset_path=online_dataset_path, usage='test', N_set=ext_N_set, train_ratio=0.7)

train_dataloader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
print(reward_data)
print(np.max(reward_data))
data_RL_preset0[0, 2] = 1

In [None]:
online_data_list = os.listdir(online_dataset_path)
train_data_list = [file for file in online_data_list if 'train_chunk' in file]
test_data_list = [file for file in online_data_list if 'test_chunk' in file]
print(train_data_list)
print(test_data_list)
print()




## **Training Part**

In [None]:
# hyperparameters blur
learning_rate = 8e-5
max_epoch = 200
print(torch.__file__)

model = QValueNet_CNN(input_dim=910, hidden_dim=1024, activation=nn.ELU, dropout=0.15).to(device)
summary(model, (1, model.input_dim))

optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)
#loss_fn = CustomLoss(relative=False, percent=False)
#loss_fn = CustomLoss1()
loss_fn = CustomLoss2()

train_loss = []
test_loss = []

es = EarlyStopping(patience=2000, delta=0.1)
for epoch in tqdm(range(max_epoch)):
    #print("EPOCH "+str(epoch)+" TRAINING...")
    train_loop(train_dataloader, model, loss_fn, optimizer, train_loss, es)
    #print("EPOCH "+str(epoch)+" TESTING...")
    test_loop(test_dataloader, model, loss_fn, test_loss, epoch)
    #print("")

    if es.early_stop:
        print("EarlyStop Triggered : Bestscore = {:7.4g}".format(es.best_score))
        break

    if (epoch+1)%10 == 0 and epoch != 0:
        plt.figure(figsize=(8, 6), dpi=300)
        plt.plot(train_loss[2:], label='train_loss')
        plt.plot(test_loss[2:], label='test_loss')
        plt.legend()
        plt.title("Train/Test Loss (MSE)")
        plt.show()

        for i in range(test_img_num):
            #if (i > 3 and i < 15) or i > 19:
            #    continue
            test_img_show(i, loss_fn)

        PATH = str(epoch+1)+"model.pt"
        torch.save({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'test_loss': test_loss,
        }, PATH)

    print("[epochs:{:2}]".format(epoch+2), end='')

print("DONE")

plt.figure(dpi=300)
plt.plot(train_loss[2:], label='train_loss')
plt.plot(test_loss[2:], label='test_loss')
plt.legend()
plt.title("Train/Test Loss (MSE)")
plt.show()

save no-scaling result

In [None]:
save_epoch = 179
PATH = str(save_epoch)+"model.pt"

torch.save({
    'epoch': save_epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'train_loss': train_loss,
    'test_loss': test_loss,
}, PATH)

Train Continuing

In [None]:
save_epoch = 162
PATH = str(save_epoch)+"model.pt"

# hyperparameters
learning_rate = 8e-5
max_epoch = 400
print(torch.__file__)

model = QValueNet_CNN(input_dim=910, hidden_dim=1024, activation=nn.ELU, dropout=0.15).to(device)
summary(model, (1, model.input_dim))

optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)
loss_fn = CustomLoss(relative=False, percent=False)

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch0 = checkpoint['epoch']
train_loss = checkpoint['train_loss']
test_loss = checkpoint['test_loss']

es = EarlyStopping(patience=2000, delta=0.1)
for epoch in tqdm(range(epoch0+1, max_epoch)):
    #print("EPOCH "+str(epoch)+" TRAINING...")
    train_loop(train_dataloader, model, loss_fn, optimizer, train_loss, es)
    #print("EPOCH "+str(epoch)+" TESTING...")
    test_loop(test_dataloader, model, loss_fn, test_loss, epoch)
    #print("")

    if es.early_stop:
        print("EarlyStop Triggered : Bestscore = {:7.4g}".format(es.best_score))
        break

    if (epoch+1)%10 == 0 and epoch != 0:
        plt.figure(figsize=(8, 6), dpi=300)
        plt.plot(train_loss[2:], label='train_loss')
        plt.plot(test_loss[2:], label='test_loss')
        plt.legend()
        plt.title("Train/Test Loss (MSE)")
        plt.show()

        for i in range(test_img_num):
            #if (i > 3 and i < 15) or i > 19:
            #    continue
            test_img_show(i)

    print("[epochs:{:2}]".format(epoch+2), end='')

print("DONE")

plt.figure(dpi=300)
plt.plot(train_loss[2:], label='train_loss')
plt.plot(test_loss[2:], label='test_loss')
plt.legend()
plt.title("Train/Test Loss (MSE)")
plt.show()

In [None]:
for i in range(test_img_num):
    test_img_show(i)

# **Training with Classification Model**

In [None]:
class QValueClassifierNet_CNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=512, output_dim=3, activation=nn.ReLU, dropout=0.3):
        super().__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.activation = activation

        # R_arr encoders (input: [B, C, 40, 20])
        self.r_arr_encoder1 = nn.Sequential(
            nn.Conv2d(1, 8, 9),  # 1 channel / assumed input is already done padding=1 #(1, 16, 3)
            self.activation(),
            nn.MaxPool2d(2)  # -> 20×10
        )

        self.r_arr_encoder2 = nn.Sequential(
            nn.Conv2d(8, 16, 5),  # assumed input is already done padding=1 #(16, 32, 3)
            self.activation(),
            nn.Flatten(),     # -> 16×20×10 = 3200  # -> 32×20×10 = 6400
            nn.Linear(3200, 1024)
        )

        # Info encoder (input: [B, 1, 6])
        self.info_encoder = nn.Sequential(
            nn.Linear(6, 32),
            self.activation(),
            nn.Linear(32, 64)
        )

        # RL encoder (input: [B, 1, 4])
        self.rl_encoder = nn.Sequential(
            nn.Linear(4, 32),
            self.activation(),
            nn.Linear(32, 64)
        )

        # Lightcurves encoder (input: [B, 1, 100])
        self.lc_encoder1 = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=15),
            self.activation(),
            nn.MaxPool1d(2),   # → 50
        )

        self.lc_encoder2 = nn.Sequential(
            nn.Conv1d(16, 32, kernel_size=9),
            self.activation(),
            nn.Flatten(),      # → 32×50
            nn.Linear(32*50, 256)
        )

        # Fusion & Head
        self.head = nn.Sequential(
            nn.Linear(1024 + 256 + 64 + 64, 1024),
            self.activation(),
            nn.Dropout(dropout),

            nn.Linear(1024, 256),
            self.activation(),
            nn.Dropout(dropout),

            nn.Linear(256, self.output_dim)  # e.g., class count or regression value
        )

    def r_padding(self, x, pad=(1, 1)):
        N, C, H, W = x.shape
        pad_H = pad[0]
        pad_W = pad[1]

        out = torch.full((N, C, H + 2*pad_H, W + 2*pad_W), fill_value=0.0, dtype=x.dtype, device=x.device)
        out[:, :, pad_H:pad_H+H, pad_W:pad_W+W] = x
        out[:, :, :, :pad_W] = torch.roll(torch.flip(out[:, :, :, pad_W:pad_W+pad_W], (-2,)), 20, -1)
        out[:, :, :, -pad_W:] = torch.roll(torch.flip(out[:, :, :, -pad_W-pad_W:-pad_W], (-2,)), 20, -1)
        out[:, :, :pad_H, pad_W:pad_W+W] = x[:, :, -pad_H:, :]
        out[:, :, -pad_H:, pad_W:pad_W+W] = x[:, :, :pad_H, :]
        return out

    def lc_padding(self, x, pad=1):
        N, C, W = x.shape

        out = torch.full((N, C, W + 2*pad), fill_value=0.0, dtype=x.dtype, device=x.device)
        out[:, :, pad:pad+W] = x
        out[:, :, :pad] = x[:, :, -pad:]
        out[:, :, -pad:] = x[:, :, :pad]
        return out

    def forward(self, X):
        r_arr = X[..., :800].reshape((X.shape[0], 1, 40, 20))
        lc_arr = X[..., 800:900].reshape((X.shape[0], 1, 100))
        lc_info = X[..., 900:906]
        rl_info = X[..., 906:]

        r_arr_feat = torch.transpose(r_arr, -2, -1)
        r_arr_feat = self.r_padding(r_arr_feat, pad=(4, 4))
        r_arr_feat = self.r_arr_encoder1(r_arr_feat)
        r_arr_feat = self.r_padding(r_arr_feat, pad=(2, 2))
        r_arr_feat = self.r_arr_encoder2(r_arr_feat)

        lc_feat = self.lc_padding(lc_arr, pad=7)
        lc_feat = self.lc_encoder1(lc_feat)
        lc_feat = self.lc_padding(lc_feat, pad=4)
        lc_feat = self.lc_encoder2(lc_feat)

        info_feat = self.info_encoder(lc_info)
        info_feat = torch.squeeze(info_feat, dim=1)

        rl_feat = self.rl_encoder(rl_info)
        rl_feat = torch.squeeze(rl_feat, dim=1)

        fusion_feat = torch.cat((r_arr_feat, lc_feat, info_feat, rl_feat), dim=1)
        out = self.head(fusion_feat)

        return out


def train_loop(dataloader, model, loss_fn, optimizer, train_loss, es:EarlyStopping):
    epoch_loss = 0
    n_train = 0

    model.train()
    #with torch.autograd.detect_anomaly(True):
    for X_train, y_train in dataloader:
        X_train = X_train.to(device)
        y_train = y_train.squeeze(dim=-1).type(torch.LongTensor).to(device)
        pred_logit = model(X_train)

        non_extended = torch.logical_and((X_train[:, -4] >= 0), (X_train[:, -4] < 1))
        non_extended = torch.logical_and(non_extended, (X_train[:, -3] >= 0))
        non_extended = torch.logical_and(non_extended, (X_train[:, -3] < 1))

        loss = loss_fn(pred_logit[non_extended], y_train[non_extended])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()*X_train.size(0)
        n_train += X_train.size(0)

    epoch_loss /= n_train
    train_loss.append(epoch_loss)

    es(epoch_loss)
    #print("train_loss : {:9.4g}".format(epoch_loss), end=' ')

def test_loop(dataloader, model, loss_fn, test_loss, epoch):
    total = 0
    correct = 0
    conf_mat = np.zeros((class_num, class_num)) # confusion matrix counts (index : [y0, y_pred])

    model.eval()
    with torch.no_grad():
        for X_test, y_test in dataloader:
            X_test = X_test.to(device)
            y_test = y_test.squeeze(dim=-1).type(torch.LongTensor)
            pred_logit = model(X_test)
            _, pred = torch.max(pred_logit, 1)

            non_extended = torch.logical_and((X_test[:, -4] >= 0), (X_test[:, -4] < 1))
            non_extended = torch.logical_and(non_extended, (X_test[:, -3] >= 0))
            non_extended = torch.logical_and(non_extended, (X_test[:, -3] < 1)).cpu()
            for i in range(class_num):
                for j in range(class_num):
                    conf_mat[i, j] += (torch.logical_and((y_test[non_extended] == i), (pred.cpu()[non_extended] == j))).sum().item()

    total += np.sum(conf_mat)
    correct += np.trace(conf_mat)
    test_loss.append(correct/total)

    print("train_loss : {:9.4g}".format(train_loss[-1]), end=' ')
    print("| test_score(%) : {:9.4g}".format(100*correct/total), end=' ')
    print("\n", end=' ')

    return conf_mat

# Data Processing : into classification data
def classify_reward(data, split_ref=[-2, 2]):
    class_num = len(split_ref)+1
    class_idx = np.digitize(data, split_ref)

    return class_idx, class_num

# show training result
def train_res_show():
    fig = plt.figure(figsize=(14.5, 4))
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)

    ax1.plot(train_loss[2:], label='train_loss', color='blue')
    ax11 = ax1.twinx()
    ax11.plot(test_loss[2:], label='test_score', color='orange')
    ax1.legend()
    ax11.legend()
    ax1.set_title("Train/Test Loss (MSE)")

    conf_per = np.zeros((class_num, class_num)) #confusion matrix in percentage
    for i in range(class_num):
        conf_per[i, :] = 100*conf_mat[i, :]/np.sum(conf_mat[i, :])
    im = ax2.imshow(conf_per, vmin=0, vmax=100)
    ax2.set_title("Confusion Matrix")
    ax2.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)
    ax2.xaxis.set_label_position('top')
    ax2.set_xlabel("Predicted Class")
    ax2.set_ylabel("Actual Class")
    tick = np.arange(class_num)
    ax2.set_xticks(tick)
    ax2.set_yticks(tick)
    ax2.set_aspect(1)
    plt.colorbar(im, ax=ax2, fraction=0.036, pad=0.04)
    for i in range(class_num):
        for j in range(class_num):
            ax2.text(j, i, str(int(100*conf_per[i, j])/100)+"%", horizontalalignment='center', verticalalignment='center', color='white')

    plt.show()

# show test image examples
def test_img_show(i_img, test_conf):
    fig = plt.figure(figsize=(16, 8))
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)

    if i_img == 0 or True:
        ax1.clear()
        im1 = ax1.imshow(test_img_list[i_img], vmin=0, vmax=class_num-1)
        ax1.set_title("TEST_IMAGE_"+str(i_img))
        plt.colorbar(im1, ax=ax1, fraction=0.026, pad=0.04)

    class_map_temp = np.zeros((resol*N_set[0], resol*N_set[1]))
    model.eval()
    with torch.no_grad():
        for idx in range(N_set[0]*N_set[1]*resol*resol):
            i = idx//int(resol*N_set[1])
            j = idx%int(resol*N_set[1])
            phi_action = (i/(resol*N_set[0]))%1
            theta_action = (j/(resol*N_set[1]))%1

            state = test_img_data[i_img*resol*N_set[0]*N_set[1], :906]
            actions = np.array([phi_action, theta_action, 0.1, 0.1])

            input = torch.unsqueeze(torch.tensor(np.concatenate((state, actions))).float().to(device), dim=0)
            score = model(input)
            class_map_temp[i, j] = np.argmax(score.cpu().numpy())

    ax2.clear()
    im2 = ax2.imshow(class_map_temp.T, vmin=0, vmax=class_num-1)
    ax2.set_title("MODEL_OUTPUT_"+str(i_img))
    plt.colorbar(im2, ax=ax2, fraction=0.026, pad=0.04)

    plt.show()


    extent = [int((ext_N_set[0]-N_set[0])/2), int((ext_N_set[1]-N_set[1])/2)]

    for i in range(class_num):
        for j in range(class_num):
            test_conf[i, j] += (np.logical_and((test_img_list[i_img][extent[1]:-extent[1], extent[0]:-extent[0], 0] == i), (class_map_temp.T == j))).sum().item()
        test_conf[i, :] = 100*test_conf[i, :]/np.sum(test_conf[i, :])
    return test_conf

def test_conf_show(test_conf):
    fig = plt.figure(figsize=(7, 7))
    ax1 = fig.add_subplot(1, 1, 1)

    im = ax1.imshow(test_conf, vmin=0, vmax=100)
    ax1.set_title("Confusion Matrix")
    ax1.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)
    ax1.xaxis.set_label_position('top')
    ax1.set_xlabel("Predicted Class")
    ax1.set_ylabel("Actual Class")
    tick = np.arange(class_num)
    ax1.set_xticks(tick)
    ax1.set_yticks(tick)
    ax1.set_aspect(1)
    plt.colorbar(im, ax=ax1, fraction=0.036, pad=0.04)
    for i in range(class_num):
        for j in range(class_num):
            ax1.text(j, i, str(int(100*test_conf[i, j])/100)+"%", horizontalalignment='center', verticalalignment='center', color='white')

    plt.show()

In [None]:
class Dataset(data.Dataset):
    def __init__(self, x_tensor, y_tensor):
        super(Dataset, self).__init__()

        if not torch.is_tensor(x_tensor):
            self.x = torch.tensor(x_tensor).float()
            self.y = torch.tensor(y_tensor).float()
        else:
            self.x = x_tensor.float()
            self.y = y_tensor.float()

    def __getitem__(self, index): return self.x[index], self.y[index]

    def __len__(self): return self.x.shape[0]

In [None]:
# seed
seed = 722
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# hyperparameters
batch_size = 1024
learning_rate = 6e-5
max_epoch = 1000

# other parameters
N_set = (40, 20)
resol = 1

map_modifier = RewardMapModifier(extends=(0.2, 0.2), blur_coef=(3, 2))
chunk_size = 256
chunk_set_size = chunk_size*N_set[0]*N_set[1]
online_dataset_path = "/content/gdrive/MyDrive/Asteroid RL dataset/online_dataset/"

split_ref = [-2.5, -1, 1, 2.5]


data_len2 = int(data_RL_preset2[0, 0])
test_img_num = 10
test_img_idx_choice = np.random.randint(0, (data_len2-1)//800, test_img_num)
dataset_img_idx = np.full(data_len2, False)
for i in test_img_idx_choice:
    dataset_img_idx[i*800+1:(i+1)*800+1] = True

print("test_img_idx (in RL_preset_batch_2) :", test_img_idx_choice)
print("--------------------------------")
print("")

data_RL_preset = data_RL_preset0[1:, :]
test_img_data = data_RL_preset2[dataset_img_idx, :].copy()
del data_RL_preset2
gc.collect()

test_img_list = []
for i in range(test_img_num):
    test_img_list.append(test_img_data[i*resol*N_set[0]*N_set[1]:(i+1)*resol*N_set[0]*N_set[1], -1].reshape((N_set[0], N_set[1])).T)

for i in range(len(test_img_list)):
    test_img_list[i], _ = map_modifier.operation(np.expand_dims(test_img_list[i], axis=-1), None, order=['extend_vert', 'extend_hori', 'blur'])
    test_img_list[i], _ = classify_reward(test_img_list[i], split_ref=split_ref)
    #test_img_list[i] = test_img_list[i][:, :, 0]
    gc.collect()


#cut = N_set[0]*N_set[1]*2093 + 1 #1040
#state_data = data_RL_preset[:cut, :-5]
#action_data = data_RL_preset[:cut, -5:-1]
#reward_data = data_RL_preset[:cut, -1:]
state_data = data_RL_preset[:, :-5]
action_data = data_RL_preset[:, -5:-1]
reward_data = data_RL_preset[:, -1:]


new_action_data = 0 * np.array([action_data[0, ...].copy()])
new_reward_data = 0 * np.array([reward_data[0, ...].copy()])

print("Data Shapes Before Map Modifying")
print("--------------------------------")
print("state_data  | "+str(state_data.shape)+", "+str(int(1000*state_data.itemsize*state_data.size/(2**30))/1000)+"GB")
print("action_data | "+str(action_data.shape)+"  , "+str(int(1000*action_data.itemsize*action_data.size/(2**30))/1000)+"GB")
print("reward_data | "+str(reward_data.shape)+"  , "+str(int(1000*reward_data.itemsize*reward_data.size/(2**30))/1000)+"GB")

print("\n--------------------------------")
for i in range(math.ceil(state_data.shape[0]/chunk_set_size)):
    if i != state_data.shape[0]//(chunk_size*N_set[0]*N_set[1]):
        reward_map = reward_data[chunk_set_size*i:chunk_set_size*(i+1)]
        action_maps = action_data[chunk_set_size*i:chunk_set_size*(i+1)]
    else:
        reward_map = reward_data[chunk_set_size*i:]
        action_maps = action_data[chunk_set_size*i:]

    print("Batch Shape : reward / action | "+str(reward_map.shape)+", "+str(action_maps.shape)+" --> ", end='')
    reward_map = np.swapaxes(reward_map.reshape((-1, N_set[0], N_set[1], 1)), -2, -3)
    action_maps = np.swapaxes(action_maps.reshape((-1, N_set[0], N_set[1], 4)), -2, -3)
    reward_map, action_maps = map_modifier.operation(reward_map, action_maps, order=['extend_vert', 'extend_hori', 'blur'])
    print(str(reward_map.shape)+", "+str(action_maps.shape))

    extended_size = reward_map.shape[-2] * reward_map.shape[-3]
    new_action_data = np.concatenate((new_action_data, action_maps.reshape(-1, 4)), axis=0)
    new_reward_data = np.concatenate((new_reward_data, reward_map.reshape(-1, 1)), axis=0)
print("--------------------------------\n")

state_data = np.repeat(state_data[::N_set[0]*N_set[1]], repeats=extended_size, axis=0)
action_data = np.delete(new_action_data, 0, axis=0)
reward_data = np.delete(new_reward_data, 0, axis=0)

del new_action_data, new_reward_data, reward_map, action_maps
del data_RL_preset, data_RL_preset0
gc.collect()

print("Data Shapes After Map Mpdifying")
print("--------------------------------")
print("state_data  | "+str(state_data.shape)+", "+str(int(1000*state_data.itemsize*state_data.size/(2**30))/1000)+"GB")
print("action_data | "+str(action_data.shape)+"  , "+str(int(1000*action_data.itemsize*action_data.size/(2**30))/1000)+"GB")
print("reward_data | "+str(reward_data.shape)+"  , "+str(int(1000*reward_data.itemsize*reward_data.size/(2**30))/1000)+"GB")

ext_N_set = map_modifier.ext_N_set(N_set)


total_data = np.concatenate((state_data, action_data, reward_data), axis=1)
state_shape = state_data.shape[1]
del state_data, action_data, reward_data
gc.collect()

train_data, test_data = data_split(total_data, train_ratio=0.85, shuffle=True, copy=True)
del total_data
gc.collect()

train_state_data = train_data[:, :state_shape].copy()
train_action_data = train_data[:, state_shape:state_shape+4].copy()
train_reward_data = train_data[:, -1].reshape(-1, 1)
del train_data
gc.collect()

test_state_data = test_data[:, :state_shape].copy()
test_action_data = test_data[:, state_shape:state_shape+4].copy()
test_reward_data = test_data[:, -1].reshape(-1, 1)
del test_data
gc.collect()


train_reward_data, class_num = classify_reward(train_reward_data, split_ref=split_ref)
test_reward_data, class_num = classify_reward(test_reward_data, split_ref=split_ref)
train_reward_data = train_reward_data.astype(np.float32)
test_reward_data = test_reward_data.astype(np.float32)
gc.collect()

In [None]:
train_dataset = Dataset(np.concatenate((train_state_data, train_action_data), axis=1), train_reward_data)
del train_state_data, train_action_data, train_reward_data
gc.collect()

test_dataset = Dataset(np.concatenate((test_state_data, test_action_data), axis=1), test_reward_data)
del test_state_data, test_action_data, test_reward_data
gc.collect()

train_dataloader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# hyperparameters
learning_rate = 8e-5
max_epoch = 300

model = QValueClassifierNet_CNN(input_dim=910, hidden_dim=1024, output_dim=class_num, activation=nn.ELU, dropout=0.15).to(device)
summary(model, (1, model.input_dim))

class_weight = 60*torch.ones(class_num)
class_weight[class_weight.shape[0]//2] = 30
class_weight[class_weight.shape[0]//2 + 1:] = 100
class_weight = class_weight.float().to(device)
optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss(weight=class_weight)

train_loss = []
test_loss = []

es = EarlyStopping(patience=2000, delta=0.1)
for epoch in tqdm(range(max_epoch)):
    #print("EPOCH "+str(epoch)+" TRAINING...")
    train_loop(train_dataloader, model, loss_fn, optimizer, train_loss, es)
    #print("EPOCH "+str(epoch)+" TESTING...")
    conf_mat = test_loop(test_dataloader, model, loss_fn, test_loss, epoch)
    #print("")

    if es.early_stop:
        print("EarlyStop Triggered : Bestscore = {:7.4g}".format(es.best_score))
        break

    if (epoch+1)%10 == 0 and epoch != 0:
        #plt.plot(train_loss[2:], label='train_loss')
        #plt.plot(test_loss[2:], label='test_score')
        #plt.legend()
        #plt.title("Train/Test Loss (MSE)")
        #plt.show()
        train_res_show()

        test_conf = np.zeros((class_num, class_num))
        for i in range(test_img_num):
            test_conf = test_img_show(i, test_conf)
        test_conf_show(test_conf)

    print("[epochs:{:2}]".format(epoch+2), end='')

print("DONE")

plt.plot(train_loss[2:], label='train_loss')
plt.plot(test_loss[2:], label='test_loss')
plt.legend()
plt.title("Train/Test Loss (MSE)")
plt.show()