In [1]:
# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_tse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_sse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))
    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/tse_sse_2022.csv", index = False)
    out_data = 0

Start Training and get embeddings


2024-03-28 11:27:49,551 - INFO - Start Training on Domain 0...
100% 1379/1379 [00:26<00:00, 52.56batch/s]
2024-03-28 11:28:15,791 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.1498748452299321
100% 1379/1379 [00:26<00:00, 52.75batch/s]
2024-03-28 11:28:41,937 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.07539469600527496
100% 1379/1379 [00:26<00:00, 52.44batch/s]
2024-03-28 11:29:08,237 - INFO - Task_ID: 0	Epoch: 2	Average Training Loss: 0.050490042698768375
2024-03-28 11:29:08,403 - INFO - Start Training on Domain 1...
100% 1544/1544 [00:28<00:00, 53.90batch/s]
2024-03-28 11:29:37,052 - INFO - Task_ID: 1	Epoch: 0	Average Training Loss: 0.02717698731464503
100% 1544/1544 [00:29<00:00, 52.90batch/s]
2024-03-28 11:30:06,243 - INFO - Task_ID: 1	Epoch: 1	Average Training Loss: 0.014271405028478469
100% 1544/1544 [00:28<00:00, 53.27batch/s]
2024-03-28 11:30:35,230 - INFO - Task_ID: 1	Epoch: 2	Average Training Loss: 0.01051456035743749
2024-03-28 11:30:35,411 - INFO - Start 

Training time: 343.6957540512085


100% 1221/1221 [00:05<00:00, 204.29batch/s]


In [1]:
# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_tse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_tse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/tse_tse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()


Start Training and get embeddings


2024-03-28 11:53:18,989 - INFO - Start Training on Domain 0...
100% 1379/1379 [03:50<00:00,  5.97batch/s]
2024-03-28 11:57:09,857 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.18841611560733965
100% 1379/1379 [03:57<00:00,  5.79batch/s]
2024-03-28 12:01:07,840 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.09488462933971992
100% 1379/1379 [03:56<00:00,  5.83batch/s]
2024-03-28 12:05:04,502 - INFO - Task_ID: 0	Epoch: 2	Average Training Loss: 0.0635881830022146
2024-03-28 12:05:04,596 - INFO - Start Training on Domain 1...
100% 1544/1544 [04:23<00:00,  5.86batch/s]
2024-03-28 12:09:28,234 - INFO - Task_ID: 1	Epoch: 0	Average Training Loss: 0.0005071090831098007
100% 1544/1544 [04:24<00:00,  5.84batch/s]
2024-03-28 12:13:52,478 - INFO - Task_ID: 1	Epoch: 1	Average Training Loss: 0.0006724774561233703
100% 1544/1544 [04:23<00:00,  5.86batch/s]
2024-03-28 12:18:15,857 - INFO - Task_ID: 1	Epoch: 2	Average Training Loss: 0.0005877116053254185
2024-03-28 12:18:15,954 - INFO - St

Training time: 3144.8468046188354


100% 3592/3592 [01:13<00:00, 49.08batch/s]


In [2]:
# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_tse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_szse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/tse_szse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 12:48:03,654 - INFO - Start Training on Domain 0...
2024-03-28 12:48:03,654 - INFO - Start Training on Domain 0...
100% 1379/1379 [03:46<00:00,  6.08batch/s]
2024-03-28 12:51:50,476 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.21872177894245443
2024-03-28 12:51:50,476 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.21872177894245443
100% 1379/1379 [03:53<00:00,  5.91batch/s]
2024-03-28 12:55:43,791 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.10983961416331428
2024-03-28 12:55:43,791 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.10983961416331428
100% 1379/1379 [03:53<00:00,  5.90batch/s]
2024-03-28 12:59:37,327 - INFO - Task_ID: 0	Epoch: 2	Average Training Loss: 0.07349410247897825
2024-03-28 12:59:37,327 - INFO - Task_ID: 0	Epoch: 2	Average Training Loss: 0.07349410247897825
2024-03-28 12:59:37,438 - INFO - Start Training on Domain 1...
2024-03-28 12:59:37,438 - INFO - Start Training on Domain 1...
100% 1544/1544 [04:21<00:00,  5.91batch/s]


Training time: 3105.3434252738953


100% 1599/1599 [00:28<00:00, 55.79batch/s]


In [3]:
# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_tse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_nasdaq", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/tse_nasdaq_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 13:41:34,741 - INFO - Start Training on Domain 0...
2024-03-28 13:41:34,741 - INFO - Start Training on Domain 0...
2024-03-28 13:41:34,741 - INFO - Start Training on Domain 0...
100% 1379/1379 [03:51<00:00,  5.97batch/s]
2024-03-28 13:45:25,919 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20027468511409263
2024-03-28 13:45:25,919 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20027468511409263
2024-03-28 13:45:25,919 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20027468511409263
100% 1379/1379 [03:59<00:00,  5.75batch/s]
2024-03-28 13:49:25,674 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.10056276772167207
2024-03-28 13:49:25,674 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.10056276772167207
2024-03-28 13:49:25,674 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.10056276772167207
100% 1379/1379 [03:59<00:00,  5.76batch/s]
2024-03-28 13:53:24,948 - INFO - Task_ID: 0	Epoch: 2	Average Training Loss: 0.06723204988838574
2024-03-28

Training time: 3193.1717114448547


100% 4660/4660 [01:31<00:00, 51.17batch/s]


In [4]:
# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_nasdaq", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_nasdaq", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/nasdaq_nasdaq_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 14:38:00,931 - INFO - Start Training on Domain 0...
2024-03-28 14:38:00,931 - INFO - Start Training on Domain 0...
2024-03-28 14:38:00,931 - INFO - Start Training on Domain 0...
2024-03-28 14:38:00,931 - INFO - Start Training on Domain 0...
100% 1691/1691 [04:49<00:00,  5.84batch/s]
2024-03-28 14:42:50,519 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.18575929325307478
2024-03-28 14:42:50,519 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.18575929325307478
2024-03-28 14:42:50,519 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.18575929325307478
2024-03-28 14:42:50,519 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.18575929325307478
100% 1691/1691 [04:57<00:00,  5.68batch/s]
2024-03-28 14:47:48,220 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.10020671693563003
2024-03-28 14:47:48,220 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.10020671693563003
2024-03-28 14:47:48,220 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.10020671

Training time: 4124.696045160294


100% 4660/4660 [01:25<00:00, 54.76batch/s]


In [5]:
# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_nasdaq", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_sse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/nasdaq_sse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 15:49:32,419 - INFO - Start Training on Domain 0...
2024-03-28 15:49:32,419 - INFO - Start Training on Domain 0...
2024-03-28 15:49:32,419 - INFO - Start Training on Domain 0...
2024-03-28 15:49:32,419 - INFO - Start Training on Domain 0...
2024-03-28 15:49:32,419 - INFO - Start Training on Domain 0...
100% 1691/1691 [04:57<00:00,  5.69batch/s]
2024-03-28 15:54:29,729 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.17000553802774898
2024-03-28 15:54:29,729 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.17000553802774898
2024-03-28 15:54:29,729 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.17000553802774898
2024-03-28 15:54:29,729 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.17000553802774898
2024-03-28 15:54:29,729 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.17000553802774898
100% 1691/1691 [05:03<00:00,  5.57batch/s]
2024-03-28 15:59:33,246 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.09816078691551101
2024-03-28 15:59:33,246

Training time: 4210.011409282684


100% 1221/1221 [00:22<00:00, 53.59batch/s]


In [6]:
# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_nasdaq", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_szse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/nasdaq_szse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 17:01:11,973 - INFO - Start Training on Domain 0...
2024-03-28 17:01:11,973 - INFO - Start Training on Domain 0...
2024-03-28 17:01:11,973 - INFO - Start Training on Domain 0...
2024-03-28 17:01:11,973 - INFO - Start Training on Domain 0...
2024-03-28 17:01:11,973 - INFO - Start Training on Domain 0...
2024-03-28 17:01:11,973 - INFO - Start Training on Domain 0...
100% 1691/1691 [04:49<00:00,  5.84batch/s]
2024-03-28 17:06:01,379 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20332864772347764
2024-03-28 17:06:01,379 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20332864772347764
2024-03-28 17:06:01,379 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20332864772347764
2024-03-28 17:06:01,379 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20332864772347764
2024-03-28 17:06:01,379 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20332864772347764
2024-03-28 17:06:01,379 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.20332864772347764
100

Training time: 4145.893763303757


100% 1599/1599 [00:29<00:00, 54.49batch/s]


In [7]:
test# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_nasdaq", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_tse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/nasdaq_tse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 18:12:06,318 - INFO - Start Training on Domain 0...
2024-03-28 18:12:06,318 - INFO - Start Training on Domain 0...
2024-03-28 18:12:06,318 - INFO - Start Training on Domain 0...
2024-03-28 18:12:06,318 - INFO - Start Training on Domain 0...
2024-03-28 18:12:06,318 - INFO - Start Training on Domain 0...
2024-03-28 18:12:06,318 - INFO - Start Training on Domain 0...
2024-03-28 18:12:06,318 - INFO - Start Training on Domain 0...
100% 1691/1691 [05:01<00:00,  5.60batch/s]
2024-03-28 18:17:08,178 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.12011917662822781
2024-03-28 18:17:08,178 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.12011917662822781
2024-03-28 18:17:08,178 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.12011917662822781
2024-03-28 18:17:08,178 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.12011917662822781
2024-03-28 18:17:08,178 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.12011917662822781
2024-03-28 18:17:08,178 - INFO - Tas

Training time: 4895.21611905098


100% 3592/3592 [02:16<00:00, 26.38batch/s]


In [8]:
test# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_sse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_sse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/sse_sse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 19:36:42,202 - INFO - Start Training on Domain 0...
2024-03-28 19:36:42,202 - INFO - Start Training on Domain 0...
2024-03-28 19:36:42,202 - INFO - Start Training on Domain 0...
2024-03-28 19:36:42,202 - INFO - Start Training on Domain 0...
2024-03-28 19:36:42,202 - INFO - Start Training on Domain 0...
2024-03-28 19:36:42,202 - INFO - Start Training on Domain 0...
2024-03-28 19:36:42,202 - INFO - Start Training on Domain 0...
2024-03-28 19:36:42,202 - INFO - Start Training on Domain 0...
100% 430/430 [01:10<00:00,  6.12batch/s]
2024-03-28 19:37:52,423 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.6148884445791631
2024-03-28 19:37:52,423 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.6148884445791631
2024-03-28 19:37:52,423 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.6148884445791631
2024-03-28 19:37:52,423 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.6148884445791631
2024-03-28 19:37:52,423 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss:

Training time: 1060.3688349723816


100% 1221/1221 [00:24<00:00, 50.21batch/s]


In [1]:
# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cuda:0'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_sse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_szse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=64, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/sse_szse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-04-01 18:49:09,811 - INFO - Start Training on Domain 0...
100% 430/430 [00:07<00:00, 58.28batch/s]
2024-04-01 18:49:17,194 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.7454444737455179
100% 430/430 [00:07<00:00, 60.57batch/s]
2024-04-01 18:49:24,297 - INFO - Task_ID: 0	Epoch: 1	Average Training Loss: 0.4420095902510247
100% 430/430 [00:06<00:00, 62.17batch/s]
2024-04-01 18:49:31,216 - INFO - Task_ID: 0	Epoch: 2	Average Training Loss: 0.31093828257231054
2024-04-01 18:49:31,291 - INFO - Start Training on Domain 1...
100% 513/513 [00:08<00:00, 60.45batch/s]
2024-04-01 18:49:39,781 - INFO - Task_ID: 1	Epoch: 0	Average Training Loss: 0.004480823894443452
100% 513/513 [00:08<00:00, 60.28batch/s]
2024-04-01 18:49:48,296 - INFO - Task_ID: 1	Epoch: 1	Average Training Loss: 0.008673737996546497
100% 513/513 [00:08<00:00, 60.24batch/s]
2024-04-01 18:49:56,816 - INFO - Task_ID: 1	Epoch: 2	Average Training Loss: 0.005971750882929258
2024-04-01 18:49:56,877 - INFO - Start Training on 

Training time: 101.84341597557068


100% 1599/1599 [00:07<00:00, 219.32batch/s]


In [10]:
test# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_sse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_tse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/sse_tse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
2024-03-28 20:14:29,001 - INFO - Start Training on Domain 0...
100% 430/430 [01:11<00:00,  6.00batch/s]
2024-03-28 20:15:40,664 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.6386750003063055
2024-03-28 20:15:40,664 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.6386750003063055
2024-03-28 20:15:40,664 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.6386750003063055
2024-03-28 20:15:40,664 - INFO - Task_ID: 0	

Training time: 1321.904506444931


100% 3592/3592 [01:08<00:00, 52.55batch/s]


In [11]:
test# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_sse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_nasdaq", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/sse_nasdaq_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
2024-03-28 20:38:43,716 - INFO - Start Training on Domain 0...
100% 430/430 [01:13<00:00,  5.88batch/s]
2024-03-28 20:39:56,796 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.40733458674945977
2024-03-28 20:39:56,796 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.40733458674945977
2024-03-28 20:39:56,796 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss

Training time: 1673.729516506195


100% 4660/4660 [02:44<00:00, 28.36batch/s]


In [12]:
test# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_szse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_nasdaq", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/szse_nasdaq_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
2024-03-28 21:10:44,195 - INFO - Start Training on Domain 0...
100% 591/591 [01:39<00:00,  5.95batch/s]
2024-03-28 21:12:23,600 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.5282680408386182
2024-03-28 21:12:23,600 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.5282680408386182
2024-03-28 21

Training time: 1699.732830286026


100% 4660/4660 [01:26<00:00, 54.06batch/s]


In [13]:

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_szse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_szse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/szse_szse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
2024-03-28 21:41:20,133 - INFO - Start Training on Domain 0...
100% 591/591 [01:38<00:00,  6.00batch/s]
2024-03-28 21:42:58,729 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0.5637367855688138
2024-03-28 21:42:58,729 - INFO - Task_ID: 0	E

Training time: 1451.8542320728302


100% 1599/1599 [00:30<00:00, 51.71batch/s]


In [14]:
test# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_szse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_sse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/szse_sse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
2024-03-28 22:06:36,110 - INFO - Start Training on Domain 0...
100% 591/591 [01:38<00:00,  6.02batch/s]
2024-03-28 22:08:14,317 - INFO - Task_ID: 0	Epoch: 0	Average Training Loss: 0

Training time: 1455.2580258846283


100% 1221/1221 [00:22<00:00, 54.26batch/s]


In [15]:
test# -*- coding: utf-8 -*-

import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import os
import logging
import time
import datetime
from tqdm import tqdm
import argparse

import os
import logging
import time
import pandas as pd

from models.Simstock import model
from utils.helper import make_noise
from utils.prepro import dataset_for_modeling
from exp.training import train, test, test_only_inference


# create logger
logger = logging.getLogger('main')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


def log(str): logger.info(str)


parser = argparse.ArgumentParser(description="SimStock")
device ='cpu'
#'cpu'#
datasets = ['train_nasdaq', 'train_sse', 'train_szse', 'train_tse']
test_datasets = ['test_before_nasdaq', 'tset_before_sse', 'test_before_szse', 'test_before_tse']

# dataset param
parser.add_argument("--train_dataset", default="train_2022_szse", type=str, help="one of: {}".format(", ".join(sorted(datasets))))
parser.add_argument("--test_dataset", default="test_2022_tse", type=str, help="one of: {}".format(", ".join(sorted(test_datasets))))
parser.add_argument("--batch_size", default=512, type=int,      help="the number of epoches for each task.")
parser.add_argument("--data_size", default=25, type=int,      help="the number of input features.")

# model param
parser.add_argument("--noise_dim", default=25, type=float,     help="the dimension of the LSTM input noise.")
parser.add_argument("--latent_dim", default=25, type=float,     help="the latent dimension of RNN variables.")
parser.add_argument("--hidden_dim", default=128, type=float,     help="the latent dimension of RNN variables.") #128
parser.add_argument("--noise_type", choices=["Gaussian", "Uniform"], default="Gaussian", help="The noise type to feed into the generator.")
parser.add_argument("--num_rnn_layer", default=1, type=float,   help="the number of RNN hierarchical layers.")
parser.add_argument("--sector_size", default=138, type=int,help="the number of sector size. WARNING : total + 1")
parser.add_argument("--sector_emb", default=256, type=int,help="the number of sector embedding size")
parser.add_argument("--lambda_values", default=0.7, type=float,help="the number of sector argument")

# training param
parser.add_argument("--learning_rate", default=1e-3, type=float,help="the unified learning rate for each single task.")
parser.add_argument("--epoches", default=3, type=int, help="the number of epoches for each task.") # default 3
parser.add_argument("--save_name", default="test", type=str,help="model save weight")

args = parser.parse_args([])


def main(arsgs):
    train_out = dataset_for_modeling(args, train_type = False)
    test_out = dataset_for_modeling(args, train_type = True)
    
    models =  model(args, device).to(device)
    optimizer = torch.optim.Adam(models.parameters(), lr=args.learning_rate)

    starting_time = time.time()


    Es, hiddens = [None], [None]
    for task_id, dataloader in enumerate(train_out):
        E, hidden, rnn_unit = train(dataloader, optimizer, models, args, log, device, Es[-1], hiddens[-1], task_id)
        Es.append(E)
        hiddens.append(hidden)    
    ending_time = time.time()

    print("Training time:", ending_time - starting_time)

    # Testing 
    representation_ll = test(test_out, models, args, log, device, Es[-1], hiddens[-1], is_repre = True) # ~ May 31, 2022 
    
    return representation_ll


if __name__ == "__main__":
    print("Start Training and get embeddings")
    rll = main(args)
        # For test data
    test = pd.read_csv("./data_ex_fund/{}.csv".format(args.test_dataset))

    df_embedding = pd.DataFrame(np.concatenate([rll[i].reshape(-1, 1).detach().cpu().numpy() for i in range(len(rll))], axis=0))
    test = test.reset_index(drop = True)[["Date","Close","Stock_", "IndustryCode_"]]
    out_data = pd.concat([test, pd.DataFrame({"Label": df_embedding.mean(1).values})], axis = 1)
    out_data.to_csv("./main_result_ex_fund/szse_tse_2022.csv", index = False)
    out_data = 0
    torch.cuda.empty_cache()

Start Training and get embeddings


2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
2024-03-28 22:31:59,849 - INFO - Start Training on Domain 0...
100% 591/591 [01:36<00:00,  6.12batch/s]
2024-03-28 22:

Training time: 1426.2113330364227


100% 3592/3592 [01:05<00:00, 55.16batch/s]
