In [None]:
import numpy as np
import andi_code as andi

In [None]:
AD = andi.andi_datasets()
AD.avail_models_name

In [None]:
#for our use only the first 3 boxes are needed, since we dont use complete datasets 
#but just the "save_trajectories" feature to quickly generate custom datasets when needed
path = "datasets/trajectories/2d/validset/"
n_samples = int(1e2)
N_save = 2000#500#np.asarray([16000,16000,10000,16000,10000])#500#2000
data = AD.andi_dataset(N = n_samples, tasks = [1, 2],
                           dimensions = [2],
                           load_dataset = False, save_dataset = False, save_trajectories = True, 
                           path_trajectories = path, N_save = N_save)

# create dataset with labels for alpha, model, uncertainty!

In [None]:
import numpy as np
import andi as andi

In [None]:
#create dataset with labels for alpha, model, uncertainty!
#no weird shuffling is needed, this is done in training anyway
def create_super_dataset(andi_dataset,T,N,dim,exponents,noise_T=None):
    n_exp = len(exponents)
    # Trajectories per model and exponent. Arbitrarely chosen to obtain balanced classes
    n_per_model = np.ceil(1.6*N/5)
    subdif, superdif = n_exp//2, n_exp//2+1
    n_per_class =  np.zeros((andi_dataset.n_models, n_exp))
    # ctrw, attm
    n_per_class[:2, :subdif] = np.ceil(n_per_model/subdif)
    # fbm
    n_per_class[2, :] = np.ceil(n_per_model/(n_exp-1))
    n_per_class[2, exponents == 2] = 0 # FBM can't be ballistic
    # lw
    n_per_class[3, subdif:] = np.ceil((n_per_model/superdif)*0.8)
    # sbm
    n_per_class[4, :] = np.ceil(n_per_model/n_exp)

    if noise_T == None:
        noise_T = T
    #generate and normalize trajectory at noise_T, then cut down to T
    #thereby generating a noise with respect to a trajectory of lenght noise_T
    dataset = AD.create_dataset(T=noise_T, N=n_per_class, exponents=exponents, dimension=dim, 
                                models=np.arange(5))

    # Normalize trajectories
    n_traj = dataset.shape[0]
    norm_trajs = andi.normalize(dataset[:, 2:].reshape(n_traj*dim, noise_T))
    dataset[:, 2:] = norm_trajs.reshape(dataset[:, 2:].shape)
    if noise_T != T:
        #cut trajectories down to lenght T after normalization
        dataset = dataset[:,0:T+2]
    
    
    # Add localization error, Gaussian noise with sigma = [0.1, 0.5, 1]

    loc_error_amplitude = np.random.choice(np.array([0.1, 0.5, 1]), size = n_traj*dim)
    loc_error = (np.random.randn(n_traj*dim, int(T)).transpose()*loc_error_amplitude).transpose()

    dataset = andi_dataset.create_noisy_localization_dataset(dataset, dimension = dim, T = T, noise_func = loc_error)
    # Add random diffusion coefficients

    trajs = dataset[:, 2:].reshape(n_traj*dim, T)
    displacements = trajs[:, 1:] - trajs[:, :-1]
    # Get new diffusion coefficients and displacements
    diffusion_coefficients = np.random.randn(trajs.shape[0])
    new_displacements = (displacements.transpose()*diffusion_coefficients).transpose()  
    # Generate new trajectories and add to dataset
    new_trajs = np.cumsum(new_displacements, axis = 1)
    new_trajs = np.concatenate((np.zeros((new_trajs.shape[0], 1)), new_trajs), axis = 1)
    dataset[:, 2:] = new_trajs.reshape(dataset[:, 2:].shape)
    
    #add noise value to dataset
    dataset = np.concatenate((loc_error_amplitude.reshape(-1,1),dataset),axis=1)
    #swap around to have fom [model,exponent,noise,x_0,x_1,...]
    dataset[:,[0,2]] = dataset[:,[2,0]]
    dataset[:,[0,1]] = dataset[:,[1,0]]
    
    
    return dataset

In [None]:
AD = andi.andi_datasets()

T = 500
noise_T = T
N = 500000
dim = 1
exponents = np.arange(0.05, 2.01, 0.05)

#output dataset is [model,exponent,noise,x_1,x_2,...]
dataset = create_super_dataset(AD,T,N,dim,exponents,noise_T=noise_T)

In [None]:
path = f"datasets/super/1dim_{T}lenght/"
import os
try:
    os.mkdir(path)
except:
    print("directory exists")
np.savetxt(path + f"andiset{N}.txt", dataset)

In [None]:
import numpy as np
len(np.arange(0.05, 2.01, 0.05))*10000*3.5

#### FBM vs SBM only datasets

In [None]:
import numpy as np
import andi as andi

In [None]:
#create an sbm vs fbm only dataset
#create dataset with labels for alpha, model, uncertainty!
#no weird shuffling is needed, this is done in training anyway
def create_super_dataset_sbmfbm_only(andi_dataset,T,N,dim,exponents,noise_T=None):
    n_exp = len(exponents)
    # Trajectories per model and exponent. Arbitrarely chosen to obtain balanced classes
    n_per_class = int(np.ceil(N/(2*n_exp)))
    
    if noise_T == None:
        noise_T = T
    dataset = AD.create_dataset(T=noise_T, N=n_per_class, exponents=exponents, dimension=dim, 
                             models=[2,4])

   

    # Normalize trajectories
    n_traj = dataset.shape[0]
    norm_trajs = andi.normalize(dataset[:, 2:].reshape(n_traj*dim, noise_T))
    dataset[:, 2:] = norm_trajs.reshape(dataset[:, 2:].shape)
    if noise_T != T:
        #cut trajectories down to lenght T after normalization
        dataset = dataset[:,0:T+2]
    # Add localization error, Gaussian noise with sigma = [0.1, 0.5, 1]

    loc_error_amplitude = np.random.choice(np.array([0.1, 0.5, 1]), size = n_traj*dim)
    loc_error = (np.random.randn(n_traj*dim, int(T)).transpose()*loc_error_amplitude).transpose()

    dataset = andi_dataset.create_noisy_localization_dataset(dataset, dimension = dim, T = T, noise_func = loc_error)
    # Add random diffusion coefficients

    trajs = dataset[:, 2:].reshape(n_traj*dim, T)
    displacements = trajs[:, 1:] - trajs[:, :-1]
    # Get new diffusion coefficients and displacements
    diffusion_coefficients = np.random.randn(trajs.shape[0])
    new_displacements = (displacements.transpose()*diffusion_coefficients).transpose()  
    # Generate new trajectories and add to dataset
    new_trajs = np.cumsum(new_displacements, axis = 1)
    new_trajs = np.concatenate((np.zeros((new_trajs.shape[0], 1)), new_trajs), axis = 1)
    dataset[:, 2:] = new_trajs.reshape(dataset[:, 2:].shape)
    
    #add noise value to dataset
    dataset = np.concatenate((loc_error_amplitude.reshape(-1,1),dataset),axis=1)
    #swap around to have fom [model,exponent,noise,x_0,x_1,...]
    dataset[:,[0,2]] = dataset[:,[2,0]]
    dataset[:,[0,1]] = dataset[:,[1,0]]
    
    
    return dataset

In [None]:
AD = andi.andi_datasets()

T = 100
noise_T = T
N = 50000
dim = 1
exponents = np.arange(0.05, 2, 0.05)

#output dataset is [model,exponent,noise,x_1,x_2,...]
dataset = create_super_dataset_sbmfbm_only(AD,T,N,dim,exponents,noise_T = noise_T)

In [None]:
path = "datasets/super/1dim_100lenght/"
np.savetxt(path + "andiset50000_sbmfbm.txt", dataset)

In [None]:
from load_andi_dataset import *

T=100
noise_T = T
N_train = int(3e5)
dim = 1
use_increments = True
model = 2
N_save = [16000,16000,10000,16000,1000]

#loading from saved trajectories, allows for only one dataset of trajectories usable for all trajectory lenghts
train_path = "datasets/trajectories/"
train_dataset = SingleModel_dataset_from_saved_trajs(path = train_path, task = 1, dim = dim, N_total = N_train,
                                                  T = T, N_save = N_save[model], 
                                                  use_increments = use_increments, model = model)