In [None]:
import os
import datetime
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import math
import time
from datetime import datetime, timedelta

In [None]:
def get_training_testing(type):
  """
    Given a string with value 'training' or 'testing' returns the corresponding pandas DataFrame
    Inputs:
        type : string
    Outputs:
        electricity_data : pandas DataFrame
  """
  dir_path = './'

  if type == 'testing attack':
    file_name = str(METER_ID) + '_' + str(ATTACK_INJECTION) + '_61_75.csv'
    file_list = [file_name]

  elif type == 'testing':
    file_name = str(METER_ID) + '_61_75.csv'
    file_list = [file_name]


  df_list = []
  for file in file_list:
      df = pd.read_csv(os.path.join(dir_path, file))
      df['filename'] = file
      df_list.append(df)

  electricity_data = pd.concat(df_list)
  electricity_data = electricity_data.drop(columns=['filename'])

  return electricity_data

def calculate_date(day):
  """
    Transforms a numeric value representing the day of the year into a value of type string with the format %Y-%m-%d
    Inputs:
        day : int
    Outputs:
        date_string : string
  """
  start_date = datetime(2008, 12, 31)
  future_date = start_date + timedelta(days = day)
  date_string = future_date.strftime('%Y-%m-%d')

  return date_string

def data_preprocessing(df):
  """
    Transforms a pandas DataFrame with columns 'ID', 'DT', 'Usage' into
    DataFrame with columns 'ID', 'Usage', 'Dia', 'Media_hora' and 'Fecha' and finally into a DataFrame
    with 'ID' and 'Datetime' as indexes and 'Usage' as the main column. 'Datetime' format its as follows: %Y-%m-%d %H:%M:%S
    Inputs:
        df : pandas DataFrame
    Outputs:x
        df : pandas DataFrame
  """

  df['Dia'] = df['DT'].astype(str).str[:3].astype(int) # Crear una nueva columna "dia" a partir de la columna "fecha_hora"
  df['Media_hora'] = df['DT'].astype(str).str[3:].astype(int) # Crear una nueva columna "hora" a partir de la columna "fecha_hora"
  df.drop('DT', axis=1, inplace=True) # Eliminar la columna "fecha_hora" original
  df['Fecha'] = df['Dia'].apply(calculate_date)

  df['Fecha'] = pd.to_datetime(df['Fecha']) # Convierte la columna de fecha a datetime
  df['Minutos'] = (df['Media_hora'] - 1) * 30 # Calcula la hora correspondiente en minutos
  df['Timedelta'] = pd.to_timedelta(df['Minutos'], unit='m') # Convierte los minutos a timedelta
  df['Datetime'] = df['Fecha'] + df['Timedelta'] # Suma la columna de fecha y la columna de timedelta
  df['Datetime'] = pd.to_datetime(df['Datetime'], format = '%Y-%m-%d %H:%M:%S')
  df['Is_Attack'] = 'False'

  # Option 1: con todos los IDs a la vez
  '''df = df.drop(columns=['Dia', 'Media_hora', 'Fecha', 'Timedelta', 'Minutos']) # Elimina las columnas innecesarias
  df.set_index(['Datetime','ID'], inplace=True)'''

  # Option 2: uno por uno cada ID
  df = df.drop(columns=['Dia', 'Media_hora', 'ID', 'Fecha', 'Timedelta', 'Minutos']) # Elimina las columnas innecesarias

  new_order = ['Datetime', 'Usage', 'Is_Attack']

  df = df[new_order]

  return df

In [None]:
METER_ID = 5196
ATTACK_INJECTION = 'Swap'

#Get training and testing dataframe
df_test_attack = get_training_testing('testing attack')
df_test = get_training_testing('testing')

df_test_attack = data_preprocessing(df_test_attack)
df_test = data_preprocessing(df_test)

rows, cols = df_test.shape
num_modifications = len(df_test) * 0.10 #El 10% son valores modificados
random_rows = np.random.randint(0, rows, int(num_modifications)) # Generar índices aleatorios para seleccionar las celdas a modificar
random_rows[:5]

for row in random_rows
  df_test.iloc[row,1] = df_test_attack.iloc[row,1]
  df_test.iloc[row,2] = 'Attack'

new_filename_test = str(METER_ID) + "_Injection_" +  str(ATTACK_INJECTION) + "_Test.csv"
df_test.to_csv(new_filename_test, index=False)
print(f"El fichero {new_filename_test} ha sido creado con éxito.")

El fichero 5196_Injection_Swap_Test.csv ha sido creado con éxito.
