# SYNTHETIC DATASET GENERATION 

In [1]:
import sys
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
sys.path.append('../')
from utils import *
#from utils.feature_selection import *
from plot import *
from simulation_setup import *
from models import *
from models.Extended_IF import *
from models.Extended_DIFFI import *
from models.Extended_DIFFI_original import *
import math
import seaborn as sns
sns.set()

import os
import pickle 

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

## Training Set 

In [16]:
def generate_random_points_in_ball(size, dimensions, radius):
    if dimensions <= 0 or radius <= 0:
        raise ValueError("Dimensions and radius must be positive values.")
    
    points = []
    
    while len(points) < size:
        point = np.random.uniform(low=-radius, high=radius, size=dimensions)
        if np.linalg.norm(point) <= radius:
            points.append(point)
            
    return points

In [17]:
# Example usage
size = 1000
dimensions = 6
radius = 5.0

X_train = np.array(generate_random_points_in_ball(size, dimensions, radius))

In [32]:
d={
    'X_train':X_train
}
file_to_read = os.getcwd()+'\\ball_6_dim.pkl'
with open(file_to_read, 'wb') as file:
    pickle.dump(d,file)

## Synthetic Anomaly Interval

In [None]:
anomaly_interval=[7,12]
anomaly_interval_2=[np.sqrt(49/2),np.sqrt(144/2)]
anomaly_interval_3=[np.sqrt(49/3),np.sqrt(144/3)]
anomaly_interval_6=[np.sqrt(49/6),np.sqrt(144/6)]

In [None]:
anomaly_interval=[5,10]
anomaly_interval_2=[np.sqrt(25/2),np.sqrt(100/2)]
anomaly_interval_3=[np.sqrt(25/3),np.sqrt(100/3)]
anomaly_interval_6=[np.sqrt(25/6),np.sqrt(100/6)]

## Xaxis

In [2]:
def generate_x_axis(size,min,max):
    x=np.random.uniform(min,max,size=size)
    y=np.random.normal(0,1,size=(x.shape[0],5))
    xy=np.column_stack((x,y))
    return xy

In [20]:
xy1=generate_x_axis(50,anomaly_interval[0],anomaly_interval[1])
xy2=generate_x_axis(50,-anomaly_interval[1],-anomaly_interval[0])
X_xaxis=np.row_stack((xy1,xy2))
X_xaxis.shape

(100, 6)

## Yaxis

In [3]:
def generate_y_axis(size,min,max):
    x=np.random.normal(0,1,size=size)
    y=np.random.uniform(min,max,size=x.shape[0])
    xy=np.column_stack((x,y))
    z=np.random.normal(0,1,size=(x.shape[0],4))
    xyz=np.column_stack((xy,z))
    return xyz

In [22]:
xy1=generate_y_axis(50,anomaly_interval[0],anomaly_interval[1])
xy2=generate_y_axis(50,-anomaly_interval[1],-anomaly_interval[0])
X_yaxis=np.row_stack((xy1,xy2))
X_yaxis.shape

(100, 6)

## Bisect Anomalies

Changing the input parameter d it's possible to choose the dimensionality of the bisect anomaly. The values of d used in the paper were d=2,3,6.

In [4]:
def bisect(size,min,max,d,dim=6):
    data=np.zeros(shape=(size,dim))
    data[:,0]=np.random.uniform(min,max,size=size)
    for i in range(1,d):
        data[:,i]=data[:,0]+np.random.normal(0,1,size=size)

    for i in range(d,dim):
        data[:,i]=np.random.normal(0,1,size=size)

    return data

In [10]:
X_bisect=np.row_stack((bisect(50,5,10,2),bisect(50,-10,-5,2)))
X_bisect_3d=np.row_stack((bisect(50,5,10,3),bisect(50,-10,-5,3)))
X_bisect_4d=np.row_stack((bisect(50,5,10,4),bisect(50,-10,-5,4)))
X_bisect_6d=np.row_stack((bisect(50,5,10,6),bisect(50,-10,-5,6)))

In [25]:
os.chdir('c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\data\\diffi_data')
os.getcwd()

'c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\data\\diffi_data'

In [11]:
d={
    'X_xaxis': X_xaxis,
    'X_yaxis': X_yaxis,
    'X_bisect': X_bisect,
    'X_bisect_3d': X_bisect_3d,
    'X_bisect_6d': X_bisect_6d
}

file_to_read = os.getcwd()+'\\syn_test_anomaly_interval_5_10.pkl'
with open(file_to_read, 'wb') as file:
    pickle.dump(d,file)