In [1]:
#General libraries
import os, argparse
import pickle
#from sklearn.decomposition import PCA
import numpy as np

#Torch libraries
import torch 

#Custom libraries
#from load_datasets import load_dataset, create_dataloaders
from IE_source.utils import Train_val_split2, Train_val_split3, Dynamics_Dataset2, Test_Dynamics_Dataset
from IE_source.utils import fix_random_seeds,to_np, count_parameters
#from source.ide_func import NNIDEF, NeuralIDE
#from IE_source.solver import IESolver_monoidal
import IE_source.kernels as kernels
from IE_source.experiments import Full_experiment_AttentionalIE_GeneratedFMRI
from torch.utils.data import SubsetRandomSampler
from IE_source.kernels import model_blocks

from IE_source.Attentional_IE_solver import masking_function

if torch.cuda.is_available():  
    device = "cuda:0" 
else:  
    device = "cpu"
    

parser = argparse.ArgumentParser(description='Neural IE')
parser.add_argument('-root_path', metavar='DIR', default='/home/ahf38/project/ANIE/',
                    help='path to dataset')
parser.add_argument('-dataset-name', default='stl10',
                    help='dataset name', choices=['acrobot_dataset'])
parser.add_argument('-j', '--num_workers', default=0, type=int, metavar='N',
                    help='number of data loading workers (default: 32)')
parser.add_argument('--epochs', default=3000, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('-b', '--batch_size', default=20, type=int,
                    metavar='N',
                    help='mini-batch size (default: 256), this is the total '
                         'batch size of all GPUs on the current node when '
                         'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=1e-4, type=float,
                    metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)',
                    dest='weight_decay')
parser.add_argument('--seed', default=None, type=int,
                    help='seed for initializing training. ')
parser.add_argument('--disable-cuda', action='store_true',
                    help='Disable CUDA')
parser.add_argument('--fp16-precision', action='store_true',
                    help='Whether or not to use 16-bit precision GPU training.')

parser.add_argument('--out_dim', default=128, type=int,
                    help='feature dimension (default: 128)')
parser.add_argument('--log-every-n-steps', default=100, type=int,
                    help='Log every n steps')
parser.add_argument('--temperature', default=0.07, type=float,
                    help='softmax temperature (default: 0.07)')
parser.add_argument('--n-views', default=2, type=int, metavar='N',
                    help='Number of views for contrastive learning training.')
parser.add_argument('--gpu-index', default=0, type=int, help='Gpu index.')
parser.add_argument('--model', default='simclr', choices=['simclr','lipschitz_simclr','vae','gan'], 
                    help='Models to be used')
parser.add_argument('--mode', default='train', choices=['train','evaluate'], 
                    help='Set to ''evaluate'' if inference is desired')
parser.add_argument('--validation_split', default=0.25,type=float, 
                    help='Fraction of the samples that will be used for validation')
parser.add_argument('--resume_from_checkpoint', default=None, 
                    help='Give string to run number. Ex: "run12"')
parser.add_argument('--plot_freq', default=1, type=int,help='')
parser.add_argument('--experiment_name', default=None,help='')

args = parser.parse_args("")
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
args.model='nie'
args.mode='train'
#args.mode = 'evaluate'
args.dataset_name = 'integral_equations'
args.seed = 1
fix_random_seeds(args.seed)
args.batch_size = 32
args.log_per_minibatch=True
args.num_minibatches=5
args.experiment_name = ''
args.plot_freq = 1
args.device = device
args.num_dim_plot = 20
args.lr = 1e-3
args.min_lr=1e-6
args.T_max = 50
args.plat_patience = 10
args.factor = 0.5
args.warm_up=15
args.exp_mode = 'Fredholm' #'Fredholm'
# args.temperature=0.001
#args.lr_scheduler = 'ReduceLROnPlateau'
args.experiment_name = 'GeneratedFMRI' #'Data_RandProj_20pcs_150frames', Data_20pcs_150frames
args.data_dim = 'orig' #'Data_2D', 'Data_10D', 'Data_50D', 'Data_orig'
args.lr_scheduler = 'CosineAnnealingLR' #'ReduceLROnPlateau'
# args.resume_from_checkpoint = 'run20'
args.perturbation_to_obs0=None
args.downsample_orig_data=10 # Factor by which we will downsampled the original data 
args.use_first_n_frames = 5000
# args.subtract_mean_per_dim = True
args.segment_len=20
args.validation_split=0.5
args.segment_window_factor = 0
args.randomly_drop_n_last_frames = None
args.drop_n_last_frames=None
args.perturbation_to_obs = False
args.perturbation_to_t = False
args.random_sample_n_points=None
# args.perturbation_to_obs_factor = 10000 #This scales the std of the data, like (std_data)/factor. The previous test used factor=20


args.kernel_split = True
args.free_func_nn = False
args.kernel_type_nn = True
args.G_NN = True
args.num_internal_points = 100 ##For non-attentional model
args.plot_F_func = False
args.f_nn = False
args.max_iterations=3
args.sampling_points=100 ##For attentional model
args.time_points=100  ##Number of dummy points when support_tensors is True

# These options only work with batch_size=1
args.support_tensors=False # Dummy points for training
args.support_test = False # Plotting for inference
args.combine_points=False
args.output_support_tensors = False # Set it to false to return just the real coordinates
args.use_support = False

args.integral_c='cte_2nd_half' #to pass c as a function fitted on few real points defined by 'num_points_for_c' or None 
args.num_points_for_c=1
args.c_scaling_factor=1

args.compute_loss_on_unseen_points = False
args.smoothing_factor=0.5
args.one_curve_per_frame=True

args.dim = 80
args.dim_emb=128
args.n_head=4
args.n_blocks=3
args.n_ff=128
args.attention_type='galerkin'
args.final_block=False

In [3]:
t_max = 1 #frames.shape[0]
t_min = 0
n_points = 100 #This is for the data

index_np = np.arange(0, n_points, 1, dtype=int)
index_np = np.hstack(index_np[:, None])
times_np = np.linspace(t_min, t_max, num=n_points)
times_np = np.hstack([times_np[:, None]])
# print('times_np: ',times_np)

###########################################################
times = torch.from_numpy(times_np[:, :, None]).to(device)
times = times.flatten().float()
# print('times :',times)
###########################################################
args.time_interval=t_min, t_max

In [4]:
model = model_blocks(args.dim,
                     args.dim_emb,
                     args.n_head,
                     args.n_blocks,
                     args.n_ff,
                     args.attention_type,
                     args.final_block,
                     dropout=0.1)

if torch.cuda.is_available():
    model = model.cuda()
    
print('Total: ',count_parameters(model))

Total:  321233


In [5]:
# Load data 
# Data = pickle.load(open( "30_IE_Spirals_noise.pkl", "rb" )) # Emanuele's data 
print('Loading ',os.path.join("./datasets",args.experiment_name + ".p"))
Data_dict = pickle.load(open(os.path.join("./datasets",args.experiment_name + ".p"), "rb" )) #This data was saved in GPU. So transform it to CPU first
print(Data_dict.keys())
Data = Data_dict['Data_'+args.data_dim]#[:50,:]
print('[imported] Data.shape: ',Data.shape)

if args.data_dim=='orig':
    # from sklearn.preprocessing import StandardScaler
    # args.scaling_factor = StandardScaler()
     # from sklearn.preprocessing import StandardScaler
    # args.scaling_factor = StandardScaler()
    import matplotlib.pyplot as plt
    Data = np.log(Data.values)
    args.scaling_factor = np.quantile(np.abs(Data),0.90)
    Data = (Data-np.mean(Data))/args.scaling_factor
    Data = Data[::args.downsample_orig_data,:]
else: 
    args.scaling_factor = np.quantile(np.abs(Data),0.90)

print('Data.shape: ',Data.shape)
args.std_noise = np.mean(np.std(Data,axis=1))/args.perturbation_to_obs_factor if args.perturbation_to_obs else 0
print('scaling_factor: ',args.scaling_factor)


# args.range_imshow = np.array([np.quantile(Data.flatten(), 0.4), np.quantile(Data.flatten(), 0.55)])#np.array([-0.25,0.05]) #
# print('args.range_imshow: ',args.range_imshow)
# args.fitted_pca = Data_dict['pca']
# Data = to_np(Data[:,:4]) #This might be necessary in some cases. Not sure why some of these variables were saved as CUDA.

train_val = 20000 # Number of frames for train and validation. The remaining will be for test
n_steps = 3000 #number of iterations for training. default=3k epochs
# segment_len = args.segment_len

# Data_test = Data[train_val:,:]
Data = Data[:args.use_first_n_frames,:] #Data[:train_val,:]

n_points = Data.shape[0]
extrapolation_points = Data.shape[0]

t_max = 1 #frames.shape[0]
t_min = 0

index_np = np.arange(0, n_points, 1, dtype=int)
index_np = np.hstack(index_np[:, None])
# times_np = np.linspace(t_min, t_max, num=n_points) #Original
times_np = np.linspace(t_min, t_max, num=args.segment_len)
times_np = np.hstack([times_np[:, None]])
# print('times_np: ',times_np)

###########################################################
times = torch.from_numpy(times_np[:, :, None])#.to(args.device)
times = times.flatten()


time_seq = times/t_max
# print('time_seq: ',time_seq)
print('Data.shape: ',Data.shape)
print('times.shape: ',times.shape)
# print('Data_test.shape: ',Data_test.shape)

# scaling_factor = to_np(Data).max()
# args.scaling_factor = np.quantile(Data.flatten(), 0.99) #Data.max()
print('scaling_factor: ',args.scaling_factor)
# if args.data_dim=='orig':
#     Data = args.scaling_factor.fit_transform(Data)
    # Data = torch.Tensor(Data).double()
# else: 
#     Data = Data/args.scaling_factor
# Data = torch.Tensor(Data.values).double()
Data = torch.Tensor(Data).double()
# Data_test = Data_test/args.scaling_factor

# Data = torch.from_numpy(Data).to(args.device)
# if args.subtract_mean_per_dim:
#     Data = Data-Data.mean(axis=0)

# Data_test = torch.Tensor(Data_test).double()

#Original Dataset setup 
if args.one_curve_per_frame: 
    Data_splitting_indices = Train_val_split3(np.copy(index_np),args.validation_split, args.segment_len,args.segment_window_factor) #Just the first 100 are used for training and validation
else:
    Data_splitting_indices = Train_val_split2(np.copy(index_np),args.validation_split, args.segment_len,args.segment_window_factor) #Just the first 100 are used for training and validation
# Train_Data_indices = Data_splitting_indices.train_IDs()
# Val_Data_indices = Data_splitting_indices.val_IDs()

# First half for training and second half for validation
Train_Data_indices = np.arange(len(Data_splitting_indices.train_IDs()))
Val_Data_indices = np.arange(len(Data_splitting_indices.val_IDs()))+len(Data_splitting_indices.train_IDs())

# frames_to_drop = np.random.randint(args.randomly_drop_n_last_frames+1, size=len(Data))
if args.randomly_drop_n_last_frames is not None:
    frames_to_drop = np.random.randint(args.randomly_drop_n_last_frames, size=len(Val_Data_indices)+len(Train_Data_indices))
elif args.drop_n_last_frames is not None:
    frames_to_drop = np.ones(len(Val_Data_indices)+len(Train_Data_indices),dtype=np.int8) * args.drop_n_last_frames
elif args.num_points_for_c is not None:
    args.drop_n_last_frames = args.segment_len-args.num_points_for_c
    frames_to_drop = np.ones(len(Val_Data_indices)+len(Train_Data_indices),dtype=np.int8) * args.drop_n_last_frames
    
print('\nlen(Train_Data_indices): ',len(Train_Data_indices))
print('Train_Data_indices: ',Train_Data_indices)
print('\nlen(Val_Data_indices): ',len(Val_Data_indices))
print('Val_Data_indices: ',Val_Data_indices)
print('frames_to_drop [for train]: ',frames_to_drop[Train_Data_indices])
print('frames_to_drop [for val]: ',frames_to_drop[Val_Data_indices])
# # #Define frames to drop
# if args.randomly_drop_n_last_frames is not None:
#     args.randomly_drop_n_last_frames = np.random.randint(args.randomly_drop_n_last_frames, size=len(Val_Data_indices)+len(Train_Data_indices))
# print('args.randomly_drop_n_last_frames; ',args.randomly_drop_n_last_frames)
            
Dataset = Dynamics_Dataset2(Data,times,args.segment_len,args.segment_window_factor, frames_to_drop)

# times_np_test = np.linspace(t_min, t_max, num=Data_test.shape[0])
# times_np_test = np.linspace(t_min, t_max, num=Data_test.shape[0])
# times_np_test = np.hstack([times_np_test[:, None]])
# times_test = torch.from_numpy(times_np_test[:, :, None])#.to(args.device)
# times_test = times_test.flatten()
# Dataset_all = Test_Dynamics_Dataset(Data_test,times_test)

# For the sampler
train_sampler = SubsetRandomSampler(Train_Data_indices)
valid_sampler = SubsetRandomSampler(Val_Data_indices)
    
# loader_val = torch.utils.data.DataLoader(Dataset, batch_size = args.batch_size)

dataloaders = {'train': torch.utils.data.DataLoader(Dataset, sampler=train_sampler, batch_size = args.batch_size,  
                                                    num_workers=args.num_workers, drop_last=False),
               'val': torch.utils.data.DataLoader(Dataset, sampler=valid_sampler, batch_size = args.batch_size, 
                                                   num_workers=args.num_workers, drop_last=False),
               # 'test': torch.utils.data.DataLoader(Dataset_all, batch_size = len(times_test),  num_workers=args.num_workers)
              }
# print('Data: ', Data)

# after creating the dataloader, move the Data back to GPU
Data = Data.to(args.device)

obs_, ts_, ids_, frames_to_drop_ = next(iter(dataloaders['train']))
print('obs_.shape: ',obs_.shape)
print('ts_.shape: ',ts_.shape)
print('ids_.shape: ',ids_.shape)
print('frames_to_drop_.shape: ',frames_to_drop_.shape)

Loading  ./datasets/GeneratedFMRI.p


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


dict_keys(['Data_2D', 'Data_10D', 'Data_50D', 'Data_orig', 'pca_proj_2D', 'pca_proj_10D', 'pca_proj_50D', 'time', 'model'])
[imported] Data.shape:  (100000, 80)
Data.shape:  (10000, 80)
scaling_factor:  5.3666900239388795
Data.shape:  (5000, 80)
times.shape:  torch.Size([20])
scaling_factor:  5.3666900239388795
bins:  [   0    1    2 ... 4976 4977 4978]
IDs: [4357  248 4885 ...  905 3980  235]

len(Train_Data_indices):  2489
Train_Data_indices:  [   0    1    2 ... 2486 2487 2488]

len(Val_Data_indices):  2490
Val_Data_indices:  [2489 2490 2491 ... 4976 4977 4978]
frames_to_drop [for train]:  [19 19 19 ... 19 19 19]
frames_to_drop [for val]:  [19 19 19 ... 19 19 19]
obs_.shape:  torch.Size([32, 20, 80])
ts_.shape:  torch.Size([32, 20])
ids_.shape:  torch.Size([32, 20])
frames_to_drop_.shape:  torch.Size([32])


In [6]:
exp_mode = args.exp_mode#'Fredholm'
#Fredholm mode
mask = None

#Volterra mode
if exp_mode == 'Volterra':
    masking_map =  masking_function(lambda x: 0.,lambda x: x,n_batch=1)
    mask_times = times
    mask = masking_map.create_mask(mask_times).to(args.device)
    
Full_experiment_AttentionalIE_GeneratedFMRI(model,Data, dataloaders, times, index_np, mask, None, args, extrapolation_points=None)

path_to_experiment:  /home/ahf38/project/ANIE/nie/GeneratedFMRI
Epoch: 0


100%|██████████| 78/78 [00:06<00:00, 11.51it/s]


[plot_dim_vs_time] obs_to_print.shape:  (20, 80)
[plot_dim_vs_time] time_to_print.shape:  (20,)
[plot_dim_vs_time] args.num_dim_plot:  20
[plot_dim_vs_time] dummy_times_to_print.shape:  (20,)
[plot_dim_vs_time] z_all_to_print.shape:  (20, 80)
[plot_dim_vs_time] obs_to_print.shape:  (20, 80)
[plot_dim_vs_time] time_to_print.shape:  (20,)
[plot_dim_vs_time] args.num_dim_plot:  20
[plot_dim_vs_time] dummy_times_to_print.shape:  (20,)
[plot_dim_vs_time] z_all_to_print.shape:  (20, 80)


100%|██████████| 78/78 [00:02<00:00, 38.48it/s]


[plot_dim_vs_time] obs_to_print.shape:  (20, 80)
[plot_dim_vs_time] time_to_print.shape:  (20,)
[plot_dim_vs_time] args.num_dim_plot:  20
[plot_dim_vs_time] dummy_times_to_print.shape:  (20,)
[plot_dim_vs_time] z_all_to_print.shape:  (20, 80)
[plot_dim_vs_time] obs_to_print.shape:  (20, 80)
[plot_dim_vs_time] time_to_print.shape:  (20,)
[plot_dim_vs_time] args.num_dim_plot:  20
[plot_dim_vs_time] dummy_times_to_print.shape:  (20,)
[plot_dim_vs_time] z_all_to_print.shape:  (20, 80)

Best validation loss: 0.14125683526449564
Saving best model for epoch: 1



NameError: name 'save_last_model' is not defined