In [1]:
import os
import random
import time
import numpy as np
import json
import logging
import argparse
import torch
import torch.backends.cudnn as cudnn
from torch.nn.functional import logsigmoid
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torch.utils.data
import torch.multiprocessing as mp
import torch.distributed as dist
# from torch.utils.tensorboard import SummaryWriter

import csv
from torch.optim import Adam
from sys import argv
import json
import pdb
from torch.nn import *
from collections import defaultdict
from tqdm import tqdm
import pandas as pd
from datetime import datetime
import shutil
import yaml

from data.utility import Dataset
# from trainer.TransMatch_pretrain import TransMatch
from trainer.TransE import TransE
from util.eval_utils import *

In [2]:
def get_logger():
    logger_name = "main-logger"
    logger = logging.getLogger(logger_name)
    logger.setLevel(logging.INFO)
    handler = logging.StreamHandler()
    fmt = "[%(asctime)s %(levelname)s %(filename)s line %(lineno)d %(process)d] %(message)s"
    handler.setFormatter(logging.Formatter(fmt))
    logger.addHandler(handler)
    return logger
conf = yaml.safe_load(open("./config/train_model_config.yaml"))
conf["dataset"] = "iqon_s"
conf["gpu"] = 0
conf["device"] = torch.device("cuda:%s"%conf["gpu"] if torch.cuda.is_available() else "cpu")
dataset = Dataset(conf)
global logger
logger = get_logger()

### 用预训练好的 TransE模型，为每一个U，提取 topk<I,J> pairs;同理为每一个head提取topk <relation, tail>pairs; 为每一个tail提取topk <relation, head>pairs

In [3]:
conf["user_num"] = len(dataset.user_map)
conf["item_num"] = len(dataset.item_map)
conf["cate_num"] = len(dataset.cate_items)
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
conf['pretrained_model'] = 'TransE'
pretrain_model_file = f"{conf['pretrained_model']}.pth.tar"
pretrain_model_dir = "model/iqon_s/pretrained_model/"
pretrain_model_path = os.path.join(pretrain_model_dir, pretrain_model_file)
if os.path.exists(pretrain_model_path):
    logger.info("=> loading model ...")
    model = torch.load(pretrain_model_path)
    print("Testing with existing model...")
    conf['use_pretrain'] = True
    model.to(conf["device"])
    logger.info(model)

[2024-01-15 14:15:54,247 INFO 2687879523.py line 10 327079] => loading model ...


Testing with existing model...


[2024-01-15 14:15:59,245 INFO 2687879523.py line 15 327079] TransE(
  (u_embeddings_l): Embedding(1770, 32, padding_idx=1769)
  (i_bias_l): Embedding(94250, 1, padding_idx=94249)
  (i_embeddings_i): Embedding(94250, 32, padding_idx=94249)
  (visual_nn_comp): Sequential(
    (0): Linear(in_features=2048, out_features=32, bias=True)
    (1): Sigmoid()
  )
  (visual_nn_per): Sequential(
    (0): Linear(in_features=2048, out_features=32, bias=True)
    (1): Sigmoid()
  )
  (i_bias_v): Embedding(94250, 1, padding_idx=94249)
  (u_embeddings_v): Embedding(1770, 32, padding_idx=1769)
)


In [3]:
train_df = pd.read_csv("data/iqon_s/train.csv", header=None).astype('int')
train_df.columns=["user_idx", "top_idx", "pos_bottom_idx", "neg_bottom_idx"]
test_df = pd.read_csv("data/iqon_s/test.csv", header=None).astype('int')
test_df.columns=["user_idx", "top_idx", "pos_bottom_idx", "neg_bottom_idx"]
valid_df = pd.read_csv("data/iqon_s/val.csv", header=None).astype('int')
valid_df.columns=["user_idx", "top_idx", "pos_bottom_idx", "neg_bottom_idx"]
all_bottoms_id = pd.concat([train_df["pos_bottom_idx"], test_df["pos_bottom_idx"], valid_df["pos_bottom_idx"],
    train_df["neg_bottom_idx"], test_df["neg_bottom_idx"], valid_df["neg_bottom_idx"]], ignore_index=True).unique()

In [4]:
all_user = pd.concat([train_df["user_idx"], test_df["user_idx"], valid_df["user_idx"]], ignore_index=True).unique()
all_user

array([1506, 1598, 1748, ...,  350, 1261,  772])

In [6]:
all_tops = pd.concat([train_df["top_idx"], test_df["top_idx"], valid_df["top_idx"]], ignore_index=True).unique()
all_bottoms = pd.concat([train_df["pos_bottom_idx"], test_df["pos_bottom_idx"], valid_df["pos_bottom_idx"],
    train_df["neg_bottom_idx"], test_df["neg_bottom_idx"], valid_df["neg_bottom_idx"]], ignore_index=True).unique()

all_tops = list(set(list(all_tops)))
all_bottoms = list(set(list(all_bottoms)))
print(len(all_tops), len(all_bottoms))

56702 78350


In [8]:
print(len(list(set(list(all_tops) + list(all_bottoms)))))

94249


In [4]:
train_ij_pairs = train_df[['top_idx', 'pos_bottom_idx']].drop_duplicates().values.tolist()
train_ij_pairs

[[33369, 55036],
 [32593, 16447],
 [15739, 58334],
 [22463, 11177],
 [9183, 86612],
 [57917, 48237],
 [45135, 13483],
 [39982, 40063],
 [72132, 28499],
 [66205, 25554],
 [7442, 81704],
 [46207, 47145],
 [85516, 18247],
 [74925, 62290],
 [45707, 5728],
 [67239, 4779],
 [78981, 31783],
 [53465, 21087],
 [66282, 62507],
 [15366, 647],
 [71121, 6021],
 [87273, 49570],
 [31679, 62023],
 [31883, 73614],
 [51870, 15916],
 [83971, 40068],
 [18712, 63934],
 [41210, 66873],
 [37886, 29255],
 [78986, 88701],
 [34271, 91698],
 [41686, 38406],
 [16917, 46096],
 [49838, 90556],
 [81906, 54949],
 [40610, 75525],
 [71732, 55515],
 [85275, 61313],
 [63582, 33562],
 [61202, 8413],
 [19221, 61268],
 [10007, 2661],
 [90509, 89790],
 [6308, 69793],
 [52367, 92910],
 [67071, 85598],
 [10568, 16054],
 [28646, 30315],
 [34271, 34533],
 [13672, 82366],
 [11807, 42654],
 [4593, 9062],
 [85323, 39669],
 [34687, 24743],
 [46876, 31619],
 [11232, 43569],
 [20743, 65981],
 [26625, 71003],
 [37093, 65555],
 [71082, 

In [5]:
train_ui_pairs = train_df[['user_idx', 'top_idx']].drop_duplicates().values.tolist()

In [6]:
train_uj_pairs = train_df[['user_idx', 'pos_bottom_idx']].drop_duplicates().values.tolist()
train_uj_pairs

[[1506, 55036],
 [1598, 16447],
 [1748, 58334],
 [359, 11177],
 [308, 86612],
 [753, 48237],
 [514, 13483],
 [521, 40063],
 [1411, 28499],
 [512, 25554],
 [24, 81704],
 [300, 47145],
 [45, 18247],
 [103, 62290],
 [1411, 5728],
 [59, 4779],
 [554, 31783],
 [639, 21087],
 [1755, 62507],
 [1468, 647],
 [1133, 6021],
 [546, 49570],
 [912, 62023],
 [209, 73614],
 [1405, 15916],
 [1052, 40068],
 [508, 63934],
 [1608, 66873],
 [1749, 29255],
 [436, 88701],
 [111, 91698],
 [1140, 38406],
 [1646, 46096],
 [1453, 90556],
 [1499, 54949],
 [1601, 75525],
 [690, 55515],
 [526, 61313],
 [1682, 33562],
 [1132, 8413],
 [390, 61268],
 [1689, 2661],
 [1301, 89790],
 [343, 69793],
 [738, 92910],
 [1560, 85598],
 [1523, 16054],
 [865, 30315],
 [1395, 34533],
 [1527, 82366],
 [1496, 42654],
 [1032, 9062],
 [33, 39669],
 [152, 24743],
 [616, 31619],
 [1393, 43569],
 [1204, 65981],
 [751, 71003],
 [1432, 65555],
 [744, 17337],
 [363, 80421],
 [714, 93687],
 [499, 73757],
 [575, 24556],
 [1448, 30174],
 [1301

In [7]:
def to_tensor(data):
    return torch.tensor(data, dtype=torch.int64).to(conf["device"])

In [46]:
a=0
new_u_ij_dict = {}
dataset.visual_features = dataset.visual_features.to(conf['device'])

ij_pairs = to_tensor(train_ij_pairs)
Is = ij_pairs[:, 0]
Js = ij_pairs[:, 1]
i_rep = model.i_embeddings_i(Is)
j_rep = model.i_embeddings_i(Js)
j_bias = model.i_bias_l(Js)
vis_I = dataset.visual_features[Is]
vis_J = dataset.visual_features[Js]
I_visual = model.visual_nn_comp(vis_I) #bs, hidden_dim
J_visual = model.visual_nn_comp(vis_J)
J_bias_v = model.i_bias_v(Js)

# for user_idx in all_user:
for user_idx in range(len(dataset.user_map)):  
    u_idx = to_tensor(user_idx)   #key
    u_rep = model.u_embeddings_l(u_idx.expand(Is.size(0))) #Is.size(0), hd
    distances = model.transE_predict(u_rep, i_rep, j_rep, j_bias)
    u_rep_v = model.u_embeddings_v(u_idx.expand(Is.size(0))) #Is.size(0), hd
    distances_v = model.transE_predict(u_rep_v, I_visual, J_visual, J_bias_v)
    distances += distances_v

    topk_scores, topk_indices = torch.topk(distances.view(-1), conf['top_k_u'], dim=-1)
    topk_i_j_pairs = ij_pairs[topk_indices]
    new_u_ij_dict[int(user_idx)] = topk_i_j_pairs.cpu().numpy().tolist()
    if a < 1:
        print(new_u_ij_dict)
        a+= 1
    else:
        continue
with open('data/iqon_s/u_topk_ijs_dict.json', 'w') as json_file:
    json.dump(new_u_ij_dict, json_file)  

{0: [[24292, 62023], [82533, 58038], [78103, 93990], [43350, 62023], [40943, 67990]]}


In [47]:
a=0
new_u = {}
for key, value in new_u_ij_dict.items():
    i_values = [item[0] for item in value]  # 获取 'i' 的值
    j_values = [item[1] for item in value]  # 获取 'j' 的值

    new_u[key] = [i_values, j_values]
    if a < 1:
        print(new_u)
        print(key, value)
        a+= 1
    else:
        continue
with open('data/iqon_s/u_topk_Is_Js_dict.json', 'w') as json_file:
    json.dump(new_u, json_file) 

{0: [[24292, 82533, 78103, 43350, 40943], [62023, 58038, 93990, 62023, 67990]]}
0 [[24292, 62023], [82533, 58038], [78103, 93990], [43350, 62023], [40943, 67990]]


In [48]:
tensor_list = []
for key, value in new_u.items():
    tensor = torch.tensor(value, dtype=torch.int32)  # 将列表转换为张量
    tensor_list.append(tensor)

stacked_tensor = torch.stack(tensor_list)
stacked_tensor[0]

tensor([[24292, 82533, 78103, 43350, 40943],
        [62023, 58038, 93990, 62023, 67990]], dtype=torch.int32)

In [53]:
a=0
new_i_uj_dict = {}
dataset.visual_features = dataset.visual_features.to(conf['device'])

uj_pairs = to_tensor(train_uj_pairs)
Us = uj_pairs[:, 0]
Js = uj_pairs[:, 1]
u_rep = model.u_embeddings_l(Us)
j_rep = model.i_embeddings_i(Js)
j_bias = model.i_bias_l(Js)
vis_U = model.u_embeddings_v(Us)
vis_J = dataset.visual_features[Js]

J_visual = model.visual_nn_comp(vis_J)
J_bias_v = model.i_bias_v(Js)

# for Ihead in all_items:
for I_idx in range(len(dataset.item_map)):  
    head_idx = to_tensor(I_idx)   #key
    i_rep = model.i_embeddings_i(head_idx.expand(Us.size(0))) #Us.size(0), hd
    distances = model.transE_predict(u_rep, i_rep, j_rep, j_bias)
    I_visual = dataset.visual_features[head_idx] #2048
    I_visual = model.visual_nn_comp(I_visual)
    distances_v = model.transE_predict(vis_U, I_visual.unsqueeze(0).expand(Us.size(0),-1), J_visual, J_bias_v)
    distances += distances_v

    topk_scores, topk_indices = torch.topk(distances.view(-1), k=5, dim=-1) #k=conf['top_k_i']
    topk_u_j_pairs = uj_pairs[topk_indices]
    new_i_uj_dict[int(I_idx)] = topk_u_j_pairs.cpu().numpy().tolist()
    if a < 1:
        print(new_i_uj_dict)
        a+= 1
    else:
        continue
with open('data/iqon_s/I_topk_UJs_dict.json', 'w') as json_file:
    json.dump(new_i_uj_dict, json_file)  

{0: [[1556, 62023], [1451, 71989], [937, 1344], [1398, 83042], [701, 51298]]}


In [55]:
a=0
new_i = {}
for key, value in new_i_uj_dict.items():
    i_values = [item[0] for item in value]  # 获取 'i' 的值
    j_values = [item[1] for item in value]  # 获取 'j' 的值

    new_i[key] = [i_values, j_values]
    if a < 1:
        print(new_i)
        print(key, value)
        a+= 1
    else:
        continue
with open('data/iqon_s/i_topk_Us_Js_dict.json', 'w') as json_file:
    json.dump(new_i, json_file) 

{0: [[1556, 1451, 937, 1398, 701], [62023, 71989, 1344, 83042, 51298]]}
0 [[1556, 62023], [1451, 71989], [937, 1344], [1398, 83042], [701, 51298]]


In [54]:
a=0
new_j_ui_dict = {}
dataset.visual_features = dataset.visual_features.to(conf['device'])

ui_pairs = to_tensor(train_ui_pairs)
Us = ui_pairs[:, 0]
Is = ui_pairs[:, 1]
u_rep = model.u_embeddings_l(Us)
i_rep = model.i_embeddings_i(Is)

vis_U = model.u_embeddings_v(Us)
vis_I = dataset.visual_features[Is]

I_visual = model.visual_nn_comp(vis_I)


# for Itail in all_items:
for J_idx in range(len(dataset.item_map)):  
    tail_idx = to_tensor(J_idx)   #key
    j_rep = model.i_embeddings_i(tail_idx.expand(Us.size(0))) #Us.size(0), hd
    j_bias = model.i_bias_l(tail_idx.expand(Us.size(0)))
    J_bias_v = model.i_bias_v(tail_idx.expand(Us.size(0)))
    distances = model.transE_predict(u_rep, i_rep, j_rep, j_bias)
    J_visual = dataset.visual_features[tail_idx] #2048
    J_visual = model.visual_nn_comp(J_visual)
    distances_v = model.transE_predict(vis_U, I_visual, J_visual.unsqueeze(0).expand(Us.size(0),-1), J_bias_v)
    distances += distances_v

    topk_scores, topk_indices = torch.topk(distances.view(-1), k=5, dim=-1) #k=conf['top_k_i']
    topk_u_i_pairs = ui_pairs[topk_indices]
    new_j_ui_dict[int(J_idx)] = topk_u_i_pairs.cpu().numpy().tolist()
    if a < 1:
        print(new_j_ui_dict)
        a+= 1
    else:
        continue
with open('data/iqon_s/J_topk_UIs_dict.json', 'w') as json_file:
    json.dump(new_j_ui_dict, json_file)  

{0: [[852, 64210], [212, 26453], [1416, 26751], [1311, 90010], [614, 30468]]}


In [56]:
a=0
new_j = {}
for key, value in new_j_ui_dict.items():
    i_values = [item[0] for item in value]  # 获取 'i' 的值
    j_values = [item[1] for item in value]  # 获取 'j' 的值

    new_j[key] = [i_values, j_values]
    if a < 1:
        print(new_j)
        print(key, value)
        a+= 1
    else:
        continue
with open('data/iqon_s/j_topk_Us_Is_dict.json', 'w') as json_file:
    json.dump(new_j, json_file) 

{0: [[852, 212, 1416, 1311, 614], [64210, 26453, 26751, 90010, 30468]]}
0 [[852, 64210], [212, 26453], [1416, 26751], [1311, 90010], [614, 30468]]


In [12]:
!python pretrain.py -d=iqon_s -g=2

use_selfatt: 0 top_k_u: 3 context: 1 use_hard_neg: 0 use_Nor: 0 use_topk_ij_for_u: 1
data prepared, 1769 users, 94249 items, 65663 train, 8208 test, 8208 validation data
Continuing training with existing model...
2024-01-11 00:44:04 Epoch 0 Loss: 0.054628
iqon_s_TransMatch_transE_pcc_3_4_mean_0.20
tes 2024-01-11 00:44:04   AUC: 0.7271
va 2024-01-11 00:44:07   AUC: 0.7329
2024-01-11 00:44:14 Epoch 1 Loss: 0.051718
2024-01-11 00:44:18 Epoch 2 Loss: 0.043138
iqon_s_TransMatch_transE_pcc_3_4_mean_0.20
tes 2024-01-11 00:44:19   AUC: 0.7282
va 2024-01-11 00:44:19   AUC: 0.7381
2024-01-11 00:44:26 Epoch 3 Loss: 0.038228
2024-01-11 00:44:30 Epoch 4 Loss: 0.030242
iqon_s_TransMatch_transE_pcc_3_4_mean_0.20
tes 2024-01-11 00:44:30   AUC: 0.7355
va 2024-01-11 00:44:30   AUC: 0.7390
2024-01-11 00:44:37 Epoch 5 Loss: 0.025514
2024-01-11 00:44:41 Epoch 6 Loss: 0.023443
iqon_s_TransMatch_transE_pcc_3_4_mean_0.20
tes 2024-01-11 00:44:42   AUC: 0.7339
va 2024-01-11 00:44:42   AUC: 0.7346
EarlyStopping 

In [1]:
!python pretrain.py -d=iqon_s -g=2

use_selfatt: 1 top_k_u: 3 context: 1 use_hard_neg: 0 use_Nor: 1 use_topk_ij_for_u: 0
data prepared, 1769 users, 94249 items, 65663 train, 8208 test, 8208 validation data
2024-01-11 16:30:27 Epoch 0 Loss: 0.769108
iqon_s_TransMatch_transE_pcc_3_4_mean_0.20
tes 2024-01-11 16:30:27   AUC: 0.5155
va 2024-01-11 16:30:35   AUC: 0.5113
2024-01-11 16:30:36 Epoch 1 Loss: 0.728146
2024-01-11 16:30:37 Epoch 2 Loss: 0.677836
iqon_s_TransMatch_transE_pcc_3_4_mean_0.20
tes 2024-01-11 16:30:37   AUC: 0.5437
va 2024-01-11 16:30:45   AUC: 0.5461
2024-01-11 16:30:53 Epoch 3 Loss: 0.630826
2024-01-11 16:30:54 Epoch 4 Loss: 0.587808
iqon_s_TransMatch_transE_pcc_3_4_mean_0.20
tes 2024-01-11 16:30:54   AUC: 0.5584
va 2024-01-11 16:31:04   AUC: 0.5570
2024-01-11 16:31:04 Epoch 5 Loss: 0.550516
2024-01-11 16:31:05 Epoch 6 Loss: 0.515955
iqon_s_TransMatch_transE_pcc_3_4_mean_0.20
tes 2024-01-11 16:31:05   AUC: 0.5682
va 2024-01-11 16:31:17   AUC: 0.5643
2024-01-11 16:31:18 Epoch 7 Loss: 0.486033
2024-01-11 16:

### 用预训练好的 TransR模型，为每一个U，提取 topk<I,J> pairs;同理为每一个head提取topk <relation, tail>pairs; 为每一个tail提取topk <relation, head>pairs

In [8]:
conf["user_num"] = len(dataset.user_map)
conf["item_num"] = len(dataset.item_map)
conf["cate_num"] = len(dataset.cate_items)
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
conf['gpu'] = 0
conf['pretrained_model'] = 'TransR'
pretrain_model_file = f"{conf['pretrained_model']}.pth.tar"
pretrain_model_dir = "model/iqon_s/pretrained_model/"
pretrain_model_path = os.path.join(pretrain_model_dir, pretrain_model_file)
if os.path.exists(pretrain_model_path):
    logger.info("=> loading model ...")
    model = torch.load(pretrain_model_path)
    print("Testing with existing model...")
    conf['use_pretrain'] = True
    model.to(conf["device"])
    logger.info(model)

[2024-01-16 18:25:32,056 INFO 703855973.py line 11 3899228] => loading model ...


Testing with existing model...


[2024-01-16 18:25:34,912 INFO 703855973.py line 16 3899228] TransR(
  (u_embeddings_l): Embedding(1770, 32, padding_idx=1769)
  (i_bias_l): Embedding(94250, 1, padding_idx=94249)
  (i_embeddings_i): Embedding(94250, 32, padding_idx=94249)
  (projection_matrix): Embedding(1770, 1024)
  (visual_nn_comp): Sequential(
    (0): Linear(in_features=2048, out_features=32, bias=True)
    (1): Sigmoid()
  )
  (visual_nn_per): Sequential(
    (0): Linear(in_features=2048, out_features=32, bias=True)
    (1): Sigmoid()
  )
  (i_bias_v): Embedding(94250, 1, padding_idx=94249)
  (u_embeddings_v): Embedding(1770, 32, padding_idx=1769)
  (projection_matrix_v): Embedding(1770, 1024)
)


In [9]:
a=0
new_u_ij_dict = {}
conf['gpu'] = 0
dataset.visual_features = dataset.visual_features.to(conf['device'])

ij_pairs = to_tensor(train_ij_pairs)
Is = ij_pairs[:, 0]
Js = ij_pairs[:, 1]
i_rep = model.i_embeddings_i(Is)
j_rep = model.i_embeddings_i(Js)
j_bias = model.i_bias_l(Js)
vis_I = dataset.visual_features[Is]
vis_J = dataset.visual_features[Js]
I_visual = model.visual_nn_comp(vis_I) #bs, hidden_dim
J_visual = model.visual_nn_comp(vis_J)
J_bias_v = model.i_bias_v(Js)


# for user_idx in all_user:
for user_idx in range(len(dataset.user_map)):  
    u_idx = to_tensor(user_idx)   #key
    u_rep = model.u_embeddings_l(u_idx.expand(Is.size(0))) #Is.size(0), hd
    print(u_rep.size())
    projection_matrix = model.projection_matrix(u_idx.expand(Is.size(0))).view(u_rep.size(0), model.hidden_dim, model.hidden_dim).transpose(1,2)
    print(projection_matrix.size())
    i_rep = torch.matmul(i_rep.unsqueeze(1), projection_matrix).squeeze(1)
    j_rep = torch.matmul(j_rep.unsqueeze(1), projection_matrix).squeeze(1)

    distances = model.transE_predict(u_rep, i_rep, j_rep, j_bias)
    u_rep_v = model.u_embeddings_v(u_idx.expand(Is.size(0))) #Is.size(0), hd
    projection_matrix_v = model.projection_matrix_v(u_idx.expand(Is.size(0))).view(u_rep_v.size(0), model.hidden_dim, model.hidden_dim).transpose(1,2)

    I_visual = torch.matmul(I_visual.unsqueeze(1), projection_matrix_v).squeeze(1)
    J_visual = torch.matmul(J_visual.unsqueeze(1), projection_matrix_v).squeeze(1)
    distances_v = model.transE_predict(u_rep_v, I_visual, J_visual, J_bias_v)
    distances += distances_v

    topk_scores, topk_indices = torch.topk(distances.view(-1), conf['top_k_u'], dim=-1)
    topk_i_j_pairs = ij_pairs[topk_indices]
    new_u_ij_dict[int(user_idx)] = topk_i_j_pairs.cpu().numpy().tolist()
    if a < 1:
        print(new_u_ij_dict)
        a+= 1
    else:
        continue
with open('data/iqon_s/TransR_u_topk_ijs_dict.json', 'w') as json_file:
    json.dump(new_u_ij_dict, json_file)

torch.Size([65610, 32])
torch.Size([65610, 32, 32])
{0: [[47535, 74379], [37158, 30329], [42031, 93664], [64146, 61663], [22219, 59785]]}
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.Size([65610, 32, 32])
torch.Size([65610, 32])
torch.

OutOfMemoryError: CUDA out of memory. Tried to allocate 258.00 MiB. GPU 0 has a total capacty of 23.69 GiB of which 165.19 MiB is free. Including non-PyTorch memory, this process has 23.52 GiB memory in use. Of the allocated memory 22.03 GiB is allocated by PyTorch, and 470.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF