In [0]:
import numpy as np
import pandas as pd
from sklearn import preprocessing

In [0]:
from google.colab import drive
drive.mount('/content/drive/')
path = '/content/drive/My Drive/attack detection/project/'

# Read the data

In [0]:
data_df_avg = pd.read_csv(path + 'data/sync_data_averaging.csv')
data_df_over = pd.read_csv(path + 'data/sync_data_downsampling.csv')
data_df_down = pd.read_csv(path + 'data/sync_data_oversampling.csv')

# Global definiation

In [0]:
num_of_rows = data_df_avg.shape[0]
num_of_col = data_df_avg.shape[1]

n_to_change = 1000
num_feature_to_change = 3
normalizing_method = 'standard'

# Normalizing the data

In [0]:
column_names = data_df_avg.columns

def normalize_by_scaler(scaler, df):
  scaled_df = scaler.fit_transform(df)
  scaled_df = pd.DataFrame(scaled_df, columns=column_names)
  return scaled_df

def standard_normalization(df):
  scaler = preprocessing.StandardScaler()
  return normalize_by_scaler(scaler, df)

def min_max_normalization(df):
  scaler = preprocessing.MinMaxScaler()
  return normalize_by_scaler(scaler, df)

In [0]:
if normalizing_method == 'min_max':
  data_df_avg_norm = min_max_normalization(data_df_avg)
  data_df_over_norm = min_max_normalization(data_df_over)
  data_df_down_norm = min_max_normalization(data_df_down)
elif normalizing_method == 'standard':
  data_df_avg_norm = standard_normalization(data_df_avg)
  data_df_over_norm = standard_normalization(data_df_over)
  data_df_down_norm = standard_normalization(data_df_down)

In [0]:
data_df_avg_norm.head()

In [0]:
data_df_over_norm.head()

In [0]:
data_df_down_norm.head()

# Create artificial  anomalies

In [0]:
idx = data_df_avg.sample(n=n_to_change).index
rand_features_idx = np.random.randint(num_of_col, size=(n_to_change, num_feature_to_change))
rand_new_features_vals = np.random.rand(n_to_change, num_feature_to_change)

y_df = pd.Series(np.zeros(num_of_rows))
y_df[idx] = 1

In [0]:
def create_artificial_anomalies(df):
  
  for i in range(n_to_change):
    record_idx = idx[i]
    new_features_vals = rand_new_features_vals[i]
    features_idx = rand_features_idx[i]
    for j in range(num_feature_to_change):    
      df.iloc[record_idx, features_idx[j]] = new_features_vals[j]
    df['label'] = y_df
  return df

In [0]:
data_df_avg_anomal = create_artificial_anomalies(data_df_avg_norm)
data_df_over_anomal = create_artificial_anomalies(data_df_over_norm)
data_df_down_anomal = create_artificial_anomalies(data_df_down_norm)

In [0]:
data_df_avg_anomal.head()

In [0]:
data_df_over_anomal.head()

In [0]:
data_df_down_anomal.head()

In [0]:
data_df_avg_anomal.to_csv(path + 'data/sync_data_averaging_anomal_' + str(num_feature_to_change) + '_' + normalizing_method + '.csv', index=False)
data_df_over_anomal.to_csv(path + 'data/sync_data_oversampling_anomal_' + str(num_feature_to_change) + '_' + normalizing_method + '.csv', index=False)
data_df_down_anomal.to_csv(path + 'data/sync_data_downsampling_anomal_' + str(num_feature_to_change) + '_' + normalizing_method + '.csv', index=False)