# Libraries

In [5]:
import os
import sys
import numpy as np
from tqdm import tqdm
import pandas as pd

In [6]:
sys.path.append(os.path.abspath(os.path.join('..'))) 

# Useful functions

In [7]:
def get_d_points(d,n):
    points=[]
    tetas = np.arange(0,2*np.pi,2*np.pi/n)
    for t in tetas:
        points.append((d*np.cos(t),d*np.sin(t)))
    return np.array(points)

In [8]:
def get_c_points(ri,re,n):
    points=[]
    tetas = np.arange(0,2*np.pi,2*np.pi/n)
    for t in tetas:
        points.append((ri*np.cos(t),ri*np.sin(t)))
    for t in tetas:
         points.append((re*np.cos(t),re*np.sin(t)))
    return np.array(points)

# Marginal Datasets

## Generation

Cylinders

In [51]:
re_max = 200
re_min = 10
ri_max = re_max-5
ri_min = re_min-5

In [52]:
def generate_marginal_cylinder(n):
    re = np.random.uniform(re_min,re_max,n)
    ri = []
    for r in re:
        ri.append(np.random.uniform(ri_min,r-5))
    ri = np.array(ri)
    
    ri_ = np.random.uniform(ri_min,ri_max,n)
    re_ = []
    for r in ri_:
        re_.append(np.random.uniform(r+5,re_max))
    re_ = np.array(re_)
    
    re = np.concatenate((re,re_),0)
    ri = np.concatenate((ri,ri_),0)

    return ri, re

Densities

In [37]:
d_min = 1
d_max = 12

In [38]:
def generate_marginal_density(n):
    return np.random.uniform(d_min,d_max,n)

## Save data

In [47]:
marginal_save_path = os.path.join(os.path.abspath('..'),"src/dataset/marginals")

Cylinders

In [53]:
for stage in ['train', 'val', 'test']:
    save_path = os.path.join(marginal_save_path, 'cylinders', stage)
    
    # Create the directories if they don't exist
    os.makedirs(save_path, exist_ok=True)
    
    dataset = []
    n = 20000 if stage == 'train' else (5000 if stage == 'test' else 1000)
    ri, re = generate_marginal_cylinder(n // 2)
    
    for i in tqdm(range(n)):
        dataset.append(get_c_points(ri[i], re[i], 30))
    
    dataset = np.array(dataset)
    save_path = os.path.join(save_path, "cylinders")
    
    # Save the dataset
    np.save(save_path, dataset)


100%|██████████| 20000/20000 [00:04<00:00, 4957.87it/s]
100%|██████████| 1000/1000 [00:00<00:00, 4956.59it/s]
100%|██████████| 5000/5000 [00:01<00:00, 4925.30it/s]


Density

In [None]:
for stage in ['train', 'val', 'test']:
    save_path = os.path.join(marginal_save_path, 'densities', stage)
    
    # Create the directories if they don't exist
    os.makedirs(save_path, exist_ok=True)
    
    dataset = []
    n = 20000 if stage == 'train' else (5000 if stage == 'test' else 1000)
    d = generate_marginal_density(n)
    
    for i in tqdm(range(n)):
        dataset.append(get_d_points(d[i], 30))
    
    dataset = np.array(dataset)
    save_path = os.path.join(save_path, "densities")
    
    # Save the dataset
    np.save(save_path, dataset)


100%|██████████| 20000/20000 [00:02<00:00, 9451.15it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8786.40it/s]
100%|██████████| 5000/5000 [00:00<00:00, 9316.99it/s]


# Meta Datasets

## Parameters

In [24]:
re1_max = 100
re1_min = 20
ri1_max = re2_max = re1_max-5
ri1_min = re2_min = re1_min-5
ri2_max = re2_max-5
ri2_min = re2_min-5
d_min = 1
d_max = 12

radii|value
----|----
re1_max | 100
re1_min | 20
ri1_max / re2_max | 95
ri1_min / re2_min | 15
ri2_max | 90
ri2_min | 10

## Gneration functions

In [25]:
def generate(n):
    
    # Radii generation
    
    # We generate cylinder radii by splitting the total number of radii into three equal parts.
    # For each part, we start by generating one of the three radii (r_ext1, r_int1, or r_int2) uniformly at random.
    # The other two radii are generated conditionally based on the first radii, following the rules detailed in the paper's appendix.
    

    # 1/3 of the cases: Generate r_ext1 first
    re1 = np.random.uniform(re1_min,re1_max,(n//3,1))#100
    ri1 = []
    for r in re1:
        ri1.append(np.random.uniform(ri1_min,r-5))
    ri1 = np.array(ri1)
    ri1 = np.where(ri1>re1-5,re1-5,ri1)

    re2 = np.copy(ri1)
    ri2 = []
    for r in re2:
        ri2.append(np.random.uniform(ri2_min,r-5))
    ri2 = np.array(ri2)
    ri2 = np.where(ri2>re2-5,re2-5,ri2)

    # 1/3 of the cases: Generate r_int1 first
    ri1_ = np.random.uniform(ri1_min,ri1_max,(n//3,1))#np.random.normal(70,10,(n,1))
    re1_ = []
    for r in ri1_:
        re1_.append(np.random.uniform(r+5,re1_max))
    re1_ = np.array(re1_)
    re1_ = np.where(re1_<ri1_+5,ri1_+5,re1_)
    re2_ = np.copy(ri1_)
    ri2_ = []
    for r in re2_:
        ri2_.append(np.random.uniform(ri2_min,r-5))
    ri2_ = np.array(ri2_)
    ri2_ = np.where(ri2_>re2_-5,re2_-5,ri2_)

    # 1/3 of the cases: Generate r_int2 first
    ri2__ = np.random.uniform(ri2_min,ri2_max,(n//3,1))
    re2__ = []
    for r in ri2__:
        re2__.append(np.random.uniform(r+5,re2_max))
    re2__ = np.array(re2__)
    re2__ = np.where(re2__<ri2__+5,ri2__+5,re2__)
    ri1__ = np.copy(re2__)
    re1__ = []
    for r in ri1__:
        re1__.append(np.random.uniform(r+5,re1_max))
    re1__ = np.array(re1__)
    re1__ = np.where(re1__<ri1__+5,ri1__+5,re1__)
    re1 = np.concatenate((re1,re1_,re1__),0)
    ri1 = np.concatenate((ri1,ri1_,ri1__),0)
    re2 = np.concatenate((re2,re2_,re2__),0)
    ri2 = np.concatenate((ri2,ri2_,ri2__),0)
    
    # We also generate densities (d1 and d2) uniformly at random from the range [1, 12].
    
    d1 = np.random.uniform(d_min,d_max,(re1.shape[0],1))
    d2 = np.random.uniform(d_min,d_max,(re1.shape[0],1))
    
    # Calculate the surface area and mass of each cylinder using their radii and densities
    s1=np.pi*(re1**2-ri1**2)
    s2=np.pi*(re2**2-ri2**2)
    m1 = s1*d1/1000 
    m2 = s2*d2/1000
    
    # Generate the mass of the cube (m_cube) uniformly at random from the range [min(m1+m2), max(m1+m2)]
    m_cube = np.random.uniform(min(m1+m2),max(m1+m2),(re1.shape[0],1))
    
    # Calculate the distances x and y while maintaining the equilibrium equation
    x = np.random.uniform(1,99,(re1.shape[0],1))
    y = (m1+m2)*x/m_cube
    xy = x+y
    x = x*100/xy
    y = y*100/xy 

    hyper_params = np.concatenate((x,y,m_cube),axis=1)
    optim_results = np.concatenate((ri1,re1,ri2,re2,d1,d2),axis=1)
    return hyper_params, optim_results

In [26]:
def generate_dataset(n,return_hparams=False):
    hyper_params, optim_results = generate(n)
    dataset_array = np.concatenate((hyper_params,optim_results),axis=1)
    dataset_df = pd.DataFrame(dataset_array,columns=['x','y','m_cube','rayon_i_1','rayon_e_1','rayon_i_2','rayon_e_2','densite_1','densite_2'])
    s1=np.pi*(dataset_df.rayon_e_1**2-dataset_df.rayon_i_1**2)
    s2=np.pi*(dataset_df.rayon_e_2**2-dataset_df.rayon_i_2**2)
    m1 = s1*dataset_df.densite_1/1000 
    m2 = s2*dataset_df.densite_2/1000
    dataset_df['masse_generee']=m1+m2
    dataset_df['equilibre']=((m1+m2)*dataset_df.x-dataset_df.m_cube*dataset_df.y)**2
    if return_hparams:
        return dataset_df,hyper_params
    return dataset_df

## Create and save dataset

In [27]:
save_path = os.path.join(os.path.abspath('..'),"Clean Notebooks/src/dataset/meta")
stages = ['train', 'val', 'test']
stage_sizes = [20000, 5000, 1000]

for i in tqdm(range(len(stages))):
    stage = stages[i]
    dataset_path = os.path.join(save_path, stage)
    
    # Create the directories if they don't exist
    os.makedirs(dataset_path, exist_ok=True)
    
    # dataframe
    dataset, h = generate_dataset(stage_sizes[i], True)
    
    # cylinders
    int_cylinder, ext_cylinder = [], []
    for _, row in dataset.iterrows():
        int_cylinder.append(get_c_points(row['rayon_i_2'], row['rayon_e_2'], 30))
        ext_cylinder.append(get_c_points(row['rayon_i_1'], row['rayon_e_1'], 30))
    int_cylinder = np.array(int_cylinder)
    ext_cylinder = np.array(ext_cylinder)
    
    # densities
    ext_density, int_density = [], []
    for _, row in dataset.iterrows():
        int_density.append(get_d_points(row['densite_2'], 30))
        ext_density.append(get_d_points(row['densite_1'], 30))
    ext_density = np.array(ext_density)
    int_density = np.array(int_density)
    
    # save data
    dataset.to_csv(os.path.join(dataset_path, 'dataset'))
    np.save(os.path.join(dataset_path, 'int_cylinder'), int_cylinder)
    np.save(os.path.join(dataset_path, 'ext_cylinder'), ext_cylinder)
    np.save(os.path.join(dataset_path, 'int_density'), int_density)
    np.save(os.path.join(dataset_path, 'ext_density'), ext_density)


100%|██████████| 3/3 [00:20<00:00,  6.98s/it]
