# Loading data captured with OZM

## Initial data analysis
First, we'll analyze metadata and data from the dataset whose structure should be based on the NILMTK-DF format.

Let's count the number of rows in each data file in the new parquet format

In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
def modulus(val):
    """
    get modulus of a complex number
    """
    res_val = []
    #print('modulus')
    if (isinstance(val, list)):
        for i in range(len(val)):
            val_item = val[i]
            #print (val_item) 
            if isinstance(val_item, list):
                if(len(val_item) == 2):
                    res = np.sqrt(val_item[0]**2 + val_item[1]**2)
                    val_item = res
            res_val.append(val_item)
            #print(res_val)
    else:
        res_val = val
        #print (res_val)
    return res_val

def array_to_list(array):
    """Convert numpy array to list"""
    if isinstance(array, np.ndarray):
        return array_to_list(array.tolist())
    elif isinstance(array, list):
        return [array_to_list(item) for item in array]
    elif isinstance(array, tuple):
        return tuple(array_to_list(item) for item in array)
    else:
        return array

## Reading a data file

In [3]:
df_path = ".\data\ozm1\ozm_fan_200MS.parquet"
print(os.path.exists(df_path))
print(os.getcwd())
df = pd.read_parquet(df_path)
item = df.iloc[0]['Voltage_Harmonics_Complex']
print(item)
print(type(item))

True
C:\Users\carlo\jupyter_notebooks\10 APLICATIVOS  UAL  JUNIO
[array([  32.4375, -247.038 ]) array([ 0.003406, -0.243409])
 array([1.27972, 1.02882]) array([ 0.026858, -0.118185])
 array([-0.796598,  3.21396 ]) array([-0.004229, -0.000101])
 array([2.51022, 1.04484]) array([ 0.018434, -0.037959])
 array([-1.8301,  1.0085]) array([-0.011857,  0.009189])
 array([-0.051177,  0.033542]) array([0.011324, 0.006756])
 array([-8.37000e-04,  8.72074e-01]) array([ 0.002346, -0.038296])
 array([ 0.143313, -0.159524]) array([ 0.019544, -0.007958])
 array([0.332123, 0.406834]) array([ 0.014667, -0.025447])
 array([-0.070492,  0.209501]) array([-0.011868, -0.022374])
 array([0.051322, 0.01837 ]) array([-0.004982, -0.014967])
 array([-0.145041,  0.138235]) array([ 0.009626, -0.023382])
 array([-0.167187,  0.103101]) array([-0.001768, -0.001143])
 array([-0.015455,  0.018475]) array([-0.010076, -0.003495])
 array([-0.014363,  0.029135]) array([-0.000143, -0.004501])
 array([ 0.141028, -0.080551]) a

0        [239.95425926196853, 3.3079093001169184, 2.701...
1        [242.12684891868145, 1.5281954031615197, 1.084...
2        [242.5702061057994, 0.2218705199209665, 1.8971...
3        [242.50256694082643, 0.14188803530248772, 2.03...
4        [242.53347239059602, 0.01846863841759863, 1.98...
                               ...                        
39403    [244.75933004190054, 0.32007489714908915, 1.89...
39404    [244.6773134517379, 0.05796792183613278, 2.032...
39405    [244.65726304248562, 0.02422492010306742, 2.00...
39406    [244.6480277329249, 0.015282713404366387, 1.99...
39407    [244.4975795552177, 0.2887791588272949, 2.1860...

## Convert array to list

In [4]:
df = df.applymap(array_to_list)
item = df.iloc[0]['Voltage_Harmonics_Complex']
print(item)
print(type(item))

[[32.4375, -247.038], [0.003406, -0.243409], [1.27972, 1.02882], [0.026858, -0.118185], [-0.796598, 3.21396], [-0.004229, -0.000101], [2.51022, 1.04484], [0.018434, -0.037959], [-1.8301, 1.0085], [-0.011857, 0.009189], [-0.051177, 0.033542], [0.011324, 0.006756], [-0.000837, 0.872074], [0.002346, -0.038296], [0.143313, -0.159524], [0.019544, -0.007958], [0.332123, 0.406834], [0.014667, -0.025447], [-0.070492, 0.209501], [-0.011868, -0.022374], [0.051322, 0.01837], [-0.004982, -0.014967], [-0.145041, 0.138235], [0.009626, -0.023382], [-0.167187, 0.103101], [-0.001768, -0.001143], [-0.015455, 0.018475], [-0.010076, -0.003495], [-0.014363, 0.029135], [-0.000143, -0.004501], [0.141028, -0.080551], [0.015828, -0.027799], [0.020446, -0.055777], [0.029274, -0.02007], [-0.175692, 0.058992], [0.019989, 0.003976], [-0.075031, 0.118115], [0.012733, 0.001155], [0.027213, -0.009475], [-0.026719, -0.005775], [0.015123, 0.016113], [-0.010431, -0.012448], [-0.020987, 0.002164], [0.005068, 0.005591], [

## Calculate the modulus of all complex parts for the voltage field

In [5]:
df = df.applymap(modulus)
item = df.iloc[0]['Voltage_Harmonics_Complex']
print(item)
print(type(item))

[249.15851350144553, 0.2434328287577499, 1.641996915587846, 0.12119837618136639, 3.311209334246931, 0.004230205905154027, 2.7189878767659112, 0.042198317940410845, 2.0895784885952478, 0.015000872307969293, 0.06118946880795747, 0.013186224326925431, 0.8720744016682292, 0.0383677902934219, 0.21444468411457535, 0.021102078096718338, 0.5251852917637736, 0.029371256323146955, 0.22104250963332822, 0.025326770421828362, 0.05451059148459132, 0.01577439105005325, 0.20036418568696354, 0.025285920983820227, 0.19642125437436755, 0.002105296416184666, 0.024086980923312077, 0.01066493323935973, 0.032482980066490204, 0.004503271033371187, 0.16241108455090125, 0.03198921669875647, 0.059406335057803385, 0.03549326662903825, 0.18533141916037874, 0.020380596090399317, 0.1399314267275225, 0.01278527723594604, 0.02881532567922841, 0.02733597603891253, 0.02209827816821935, 0.01624064238261529, 0.021098271611674733, 0.00754611853869259, 0.03831676924011209, 0.004634051575025897, 0.04350997561479436, 0.005505

## Complete example

In [6]:
def modulus(val):
    """
    get modulus of a complex number
    """
    res_val = []
    #print('modulus')
    if (isinstance(val, list)):
        for i in range(len(val)):
            val_item = val[i]
            #print (val_item) 
            if isinstance(val_item, list):
                if(len(val_item) == 2):
                    res = np.sqrt(val_item[0]**2 + val_item[1]**2)
                    val_item = res
            res_val.append(val_item)
            #print(res_val)
    else:
        res_val = val
        #print (res_val)
    return res_val

def array_to_list(array):
    """Convert numpy array to list"""
    if isinstance(array, np.ndarray):
        return array_to_list(array.tolist())
    elif isinstance(array, list):
        return [array_to_list(item) for item in array]
    elif isinstance(array, tuple):
        return tuple(array_to_list(item) for item in array)
    else:
        return array

In [7]:
 
    
    
    
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_fan_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('fan.csv') 




    


## Checking number of rows for each file

In [8]:
with open(r'.\data\ozm1\ozm_fan_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero freezer=',len(df.index))

filas fichero freezer= 72443


In [9]:
with open(r'.\data\ozm1\ozm_freezer_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero freezer=',len(df.index))

filas fichero freezer= 72443


In [10]:
with open(r'.\data\ozm1\ozm_fridge_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero fridge=',len(df.index))


filas fichero fridge= 72443


In [12]:
with open(r'.\data\ozm1\ozm_bulb_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero kettle=',len(df.index))


filas fichero kettle= 72514


In [13]:
with open(r'.\data\ozm1\ozm_microvawe_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero microwave=',len(df.index))


filas fichero microwave= 72514


In [14]:
with open(r'.\data\ozm1\ozm_monitor_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero monitor=',len(df.index))


filas fichero monitor= 72509


In [15]:
with open(r'.\data\ozm1\ozm_oil_heater_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero oil_heater=',len(df.index))

filas fichero oil_heater= 72508


In [16]:
with open(r'.\data\ozm1\ozm_oven_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero oven=',len(df.index))

filas fichero oven= 72514


In [17]:
with open(r'.\data\ozm1\ozm_vacuum_cleaner_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero vacuum cleaner=',len(df.index))

filas fichero vacuum cleaner= 72514


In [18]:
with open(r'.\data\ozm1\ozm_water_heater_200MS.parquet', 'rb') as f:
    df = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero water_heater=',len(df.index))


filas fichero water_heater= 72443


In [19]:
with open(r'.\data\ozm1\ozm_aggregation_200MS.parquet', 'rb') as f:
    df_datos_agregados = pd.read_parquet(f, engine='pyarrow')
print ('filas fichero agregation=',len(df_datos_agregados.index))

filas fichero agregation= 72510


The first N rows of the DataFrame can be visualized as follows

In [20]:
N=4
df_datos_agregados.head(N)


Unnamed: 0,Tag,FirstTimestamp,OriginalTimestamp,Time,Voltage,Current,Frequency,Flag,Unbalance,Power_Factor,Phi,Active_Power,Reactive_Power,Apparent_Power,Current_THD,Voltage_THD,Power_Harmonics,Voltage_Harmonics_Complex,Current_Harmonics_Complex
0,start,1686296078147,1686296108046,1686296078147,249.569,0.126192,49.9827,0,0,0.637181,13.9694,20.0671,-5.03741,20.689735,280.471,3.91128,"[20.25, -8.9e-05, 0.023729, 5.1e-05, -0.12404,...","[[20.7074, -248.65], [-0.016208, -0.056902], [...","[[0.026855, -0.079203], [0.001193, 0.001225], ..."
1,start,1686296078147,1686296108046,1686296078347,249.532,0.122507,49.9829,0,0,0.626578,13.4584,19.1541,-4.62669,19.704964,287.456,3.93832,"[19.3334, 1.3e-05, 0.025698, -1e-05, -0.125821...","[[24.1286, -248.299], [-0.032152, 0.060932], [...","[[0.025955, -0.075341], [0.000339, 0.000392], ..."
2,start,1686296078147,1686296108046,1686296078547,249.636,0.124721,49.9835,0,0,0.638109,12.2018,19.8674,-4.33612,20.335104,288.337,3.93237,"[20.0523, 9.6e-05, 0.021593, 0.000109, -0.1216...","[[21.8112, -248.62], [0.00244, -0.195232], [1....","[[0.024329, -0.07852], [0.001633, -0.000473], ..."
3,start,1686296078147,1686296108046,1686296078747,249.561,0.126381,49.9838,0,0,0.635802,12.5227,20.053,-4.49449,20.550546,295.005,3.92831,"[20.2353, -6.2e-05, 0.023654, 2.9e-05, -0.1255...","[[19.5068, -248.74], [-0.011084, -0.01596], [1...","[[0.024299, -0.079446], [0.002231, 0.002313], ..."


And the last N rows

In [21]:
df_datos_agregados.tail(N)

Unnamed: 0,Tag,FirstTimestamp,OriginalTimestamp,Time,Voltage,Current,Frequency,Flag,Unbalance,Power_Factor,Phi,Active_Power,Reactive_Power,Apparent_Power,Current_THD,Voltage_THD,Power_Harmonics,Voltage_Harmonics_Complex,Current_Harmonics_Complex
72506,end,1686310549573,1686310579865,1686310579176,245.404,0.133911,50.004,0,0,0.566956,9.75458,18.6316,-3.21547,18.907005,411.473,4.4896,"[18.704, -0.0002, 0.023459, 6.6e-05, -0.052995...","[[27.4231, -243.821], [-0.07573, -0.04279], [1...","[[0.021543, -0.074289], [0.002526, 0.000194], ..."
72507,end,1686310549573,1686310579865,1686310579376,245.437,0.129352,50.0042,0,0,0.588233,8.74824,18.6751,-2.88352,18.896381,392.795,4.46056,"[18.7384, 8.5e-05, 0.024827, 0.000109, -0.0508...","[[26.9921, -243.902], [0.020353, 0.009572], [1...","[[0.020079, -0.074605], [0.004438, -0.000544],..."
72508,end,1686310549573,1686310579865,1686310579576,245.326,0.140014,50.0042,0,0,0.58969,7.3863,20.2554,-2.63702,20.426305,385.7,4.51483,"[20.342, 9.9e-05, 0.021606, 5.8e-05, -0.054049...","[[26.2666, -243.87], [-0.057687, -0.096385], [...","[[0.01957, -0.081305], [0.000742, -0.001476], ..."
72509,end,1686310549573,1686310579865,1686310579776,245.34,0.13901,50.0044,0,0,0.583052,6.9598,19.8849,-2.43653,20.033593,400.886,4.482,"[19.9597, 0.000115, 0.025704, 8.4e-05, -0.0545...","[[25.838, -243.93], [0.010238, 0.048307], [1.2...","[[0.018449, -0.079872], [0.003134, 0.001708], ..."


Filter by a specific value

In [22]:
df_datos_agregados[df_datos_agregados.Unbalance ==0 ].head()

Unnamed: 0,Tag,FirstTimestamp,OriginalTimestamp,Time,Voltage,Current,Frequency,Flag,Unbalance,Power_Factor,Phi,Active_Power,Reactive_Power,Apparent_Power,Current_THD,Voltage_THD,Power_Harmonics,Voltage_Harmonics_Complex,Current_Harmonics_Complex
0,start,1686296078147,1686296108046,1686296078147,249.569,0.126192,49.9827,0,0,0.637181,13.9694,20.0671,-5.03741,20.689735,280.471,3.91128,"[20.25, -8.9e-05, 0.023729, 5.1e-05, -0.12404,...","[[20.7074, -248.65], [-0.016208, -0.056902], [...","[[0.026855, -0.079203], [0.001193, 0.001225], ..."
1,start,1686296078147,1686296108046,1686296078347,249.532,0.122507,49.9829,0,0,0.626578,13.4584,19.1541,-4.62669,19.704964,287.456,3.93832,"[19.3334, 1.3e-05, 0.025698, -1e-05, -0.125821...","[[24.1286, -248.299], [-0.032152, 0.060932], [...","[[0.025955, -0.075341], [0.000339, 0.000392], ..."
2,start,1686296078147,1686296108046,1686296078547,249.636,0.124721,49.9835,0,0,0.638109,12.2018,19.8674,-4.33612,20.335104,288.337,3.93237,"[20.0523, 9.6e-05, 0.021593, 0.000109, -0.1216...","[[21.8112, -248.62], [0.00244, -0.195232], [1....","[[0.024329, -0.07852], [0.001633, -0.000473], ..."
3,start,1686296078147,1686296108046,1686296078747,249.561,0.126381,49.9838,0,0,0.635802,12.5227,20.053,-4.49449,20.550546,295.005,3.92831,"[20.2353, -6.2e-05, 0.023654, 2.9e-05, -0.1255...","[[19.5068, -248.74], [-0.011084, -0.01596], [1...","[[0.024299, -0.079446], [0.002231, 0.002313], ..."
4,start,1686296078147,1686296108046,1686296078947,249.598,0.123971,49.9838,0,0,0.627943,12.3643,19.4303,-4.29724,19.899855,298.247,3.93704,"[19.6033, -1e-06, 0.029308, 3.3e-05, -0.124549...","[[21.7027, -248.595], [-0.017383, 0.020425], [...","[[0.023987, -0.076762], [0.002385, 0.001993], ..."


Filter by a numeric value

In [23]:
df_datos_agregados[df_datos_agregados.Frequency >=50 ].head()

Unnamed: 0,Tag,FirstTimestamp,OriginalTimestamp,Time,Voltage,Current,Frequency,Flag,Unbalance,Power_Factor,Phi,Active_Power,Reactive_Power,Apparent_Power,Current_THD,Voltage_THD,Power_Harmonics,Voltage_Harmonics_Complex,Current_Harmonics_Complex
48,start,1686296078147,1686296108046,1686296087744,249.747,0.123005,50.0006,0,0,0.624193,12.7392,19.1753,-4.37371,19.667729,286.225,3.89507,"[19.3459, 0.000103, 0.020642, 3.4e-05, -0.1169...","[[28.5111, -248.053], [0.015342, -0.173915], [...","[[0.02625, -0.074974], [0.000987, -0.000508], ..."
49,start,1686296078147,1686296108046,1686296087944,249.643,0.121401,50.0013,0,0,0.638196,12.2784,19.3418,-4.24749,19.802642,282.975,4.00634,"[19.5161, -5.3e-05, 0.02575, 2.4e-05, -0.12000...","[[28.1027, -247.99], [-0.013752, 0.07199], [1....","[[0.025715, -0.075783], [0.001376, -0.000479],..."
50,start,1686296078147,1686296108046,1686296088143,249.595,0.124444,50.0019,0,0,0.623012,12.0115,19.3512,-4.15615,19.79244,311.285,3.9031,"[19.5339, -1.9e-05, 0.024298, 0.000135, -0.124...","[[31.1497, -247.584], [0.019802, -0.019757], [...","[[0.026297, -0.075589], [0.000597, 0.001537], ..."
51,start,1686296078147,1686296108046,1686296088343,249.659,0.121313,50.0025,0,0,0.620279,12.1637,18.7863,-4.08689,19.225677,293.462,3.95061,"[18.9608, -3.7e-05, 0.024562, 2.9e-05, -0.1213...","[[30.3277, -247.751], [-0.034737, -0.061506], ...","[[0.025483, -0.073412], [-2.6e-05, 0.000609], ..."
52,start,1686296078147,1686296108046,1686296088543,249.596,0.117324,50.0031,0,0,0.633109,12.8476,18.5397,-4.26719,19.024438,290.509,3.90887,"[18.7102, -4.5e-05, 0.024442, 7e-05, -0.121317...","[[32.5086, -247.411], [-0.033986, -0.059571], ...","[[0.026723, -0.072113], [-2.2e-05, 0.000774], ..."


### GENERATION OF PREPROCESSED FILES

In each file, we will calculate the apparent power for each timestamp value.

Additionally, we need to obtain the integer part of the voltage and current harmonics values using the modulus function, so we need to apply this function to the file data.

We also need to reorganize the columns in the agreed order in NILMTK.

In the data files, the auto-incremental value is not useful as it is not an electrical variable. Remember that this value is used as the dataset index.

For this purpose, we will use the timestamp value as an index, since this value is one-to-one.

We will reconstruct the CSV files by removing this field.


## Generation of data file from ozm_fan_200MS.parquet

In [24]:
 
    
    
    
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_fan_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('fan.csv') 



## Generation of data file from ozm_freezer_200MS.parquet

In [25]:
 
    
    
    
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_freezer_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('freezer.csv') 



## Generation of data file from ozm_monitor_200MS.parquet

In [26]:
    
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_monitor_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('monitor.csv') 


## Generation of data file from ozm_vacuum_cleaner_200MS.parquet

In [27]:
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_vacuum_cleaner_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)


df_reset.to_csv('vacuum_cleaner.csv')

#print (df_reset)
    
#df    

## Generation of data file from water_heater.h5

In [28]:
    
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_water_heater_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)


df_reset.to_csv('water_heater.csv')

#print (df_reset)
    
#df    

## Generation of data file from ozm_bulb_200MS.parquet

In [29]:
  
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_bulb_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('bulb.csv') 

## Generation of data file from ozm_fridge_200MS.parquet

In [30]:
  
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_fridge_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('fridge.csv') 

## Generation of data file from ozm_microvawe_200MS.parquet

In [31]:
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_microvawe_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('microwave.csv') 

## Generation of data file from ozm_oil_heater_200MS.parquet

In [32]:
  
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_oil_heater_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('oil_heater.csv') 

## Generation of data file from ozm_oven_200MS.parquet

In [33]:
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_oven_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('oven.csv') 

## Generation of data file from ozm_aggregation_200MS.parquet

In [34]:
  
import pandas as pd
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import export_png
# select a palette
from bokeh.palettes import Category10




df_path = ".\data\ozm1\ozm_aggregation_200MS.parquet"
#print(os.path.exists(df_path))
#print(os.getcwd())
df = pd.read_parquet(df_path)  #leemos todo el fichero y lo cargamos en un dataframe





# se calcula todo en una sola vez para todo el dataframe
#item = df['Voltage_Harmonics_Complex']  #recupera de la fila 1 el voltaje complejo
#print(item)
#print(type(item))
df = df.applymap(array_to_list)  #crea un dataframe de listas
#item = df['Voltage_Harmonics_Complex']
#print(item)
#print(type(item))
df = df.applymap(modulus) #crea un dataframe con los modulos

#almacenamos el voltaje complejo
item = df['Voltage_Harmonics_Complex']
#print(item)

#almacenamos la corriente compleja
items = df['Current_Harmonics_Complex']
#print(items)


#eliminamos columnas que no vamos a necesitar
df.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Tag'],axis = 'columns', inplace=True)


#insertamos nuevas columnas en el dataframe
df.insert (1,'Voltage_Harmonics_Complex',item)
df.insert (2,'Current_Harmonics_Complex',items)



#renombramos columnas
df.rename(columns={'Time':'timestamp','Active_Power':'W','Reactive_Power':'VAR','Apparent_Power':'VA' },inplace=True)
df.rename(columns={'Frequency':'f','Voltage':'VLN','Power_Factor':'PF','Current':'A' },inplace=True)

#eliminamos columnas que no vamos a necesitar
#df.drop (['FirstTimestamp','OriginalTimestamp','Tag'],axis = 'columns', inplace=True)

#eliminamos columnas que no vamos a necesitar
df.drop (['FirstTimestamp','OriginalTimestamp'],axis = 'columns', inplace=True)


#df_datos_agregados.drop (['Voltage_Harmonics_Complex','Current_Harmonics_Complex','Power_Harmonics'],axis = 'columns', inplace=True)
df.drop (['Flag','Unbalance','Phi'],axis = 'columns', inplace=True)

#reordenamos las columnas
df=df [['timestamp','W','VAR','VA','f','VLN','PF','A','Voltage_Harmonics_Complex',  'Current_Harmonics_Complex','Power_Harmonics' ]]

#eliminamos columna indice y la sustituimos por el campo timestamp
df_reset=df.set_index('timestamp')

#print (df_reset)

df_reset.to_csv('aggregation.csv') 

## Date Analysis


In [35]:
import pandas as pd

with open(r'.\data\ozm1\ozm_aggregation_200MS.parquet', 'rb') as f:
    df_datos_agregados = pd.read_parquet(f, engine='pyarrow')

print ('filas fichero aggregation.hdf5=',len(df_datos_agregados.index))

filas fichero aggregation.hdf5= 72510


In [36]:
N=1
df_datos_agregados.head(N)

Unnamed: 0,Tag,FirstTimestamp,OriginalTimestamp,Time,Voltage,Current,Frequency,Flag,Unbalance,Power_Factor,Phi,Active_Power,Reactive_Power,Apparent_Power,Current_THD,Voltage_THD,Power_Harmonics,Voltage_Harmonics_Complex,Current_Harmonics_Complex
0,start,1686296078147,1686296108046,1686296078147,249.569,0.126192,49.9827,0,0,0.637181,13.9694,20.0671,-5.03741,20.689735,280.471,3.91128,"[20.25, -8.9e-05, 0.023729, 5.1e-05, -0.12404,...","[[20.7074, -248.65], [-0.016208, -0.056902], [...","[[0.026855, -0.079203], [0.001193, 0.001225], ..."


In [37]:
FirstTimestamp=df_datos_agregados.iat[0,1]

In [38]:
OriginalTimestamp=df_datos_agregados.iat[0,2]

In [39]:
Time=df_datos_agregados.iat[0,3]

In [40]:
import pandas as pd
import numpy as np
import datetime
df=pd.DataFrame()

i=[FirstTimestamp,OriginalTimestamp,Time]
df['date']=pd.Series(i)
df['date']=df['date'].apply(lambda d: datetime.datetime.fromtimestamp(int(d)/1000).strftime('%Y-%m-%d %H:%M:%S'))

df



Unnamed: 0,date
0,2023-06-09 09:34:38
1,2023-06-09 09:35:08
2,2023-06-09 09:34:38


In [41]:
df_datos_agregados.tail(N)

Unnamed: 0,Tag,FirstTimestamp,OriginalTimestamp,Time,Voltage,Current,Frequency,Flag,Unbalance,Power_Factor,Phi,Active_Power,Reactive_Power,Apparent_Power,Current_THD,Voltage_THD,Power_Harmonics,Voltage_Harmonics_Complex,Current_Harmonics_Complex
72509,end,1686310549573,1686310579865,1686310579776,245.34,0.13901,50.0044,0,0,0.583052,6.9598,19.8849,-2.43653,20.033593,400.886,4.482,"[19.9597, 0.000115, 0.025704, 8.4e-05, -0.0545...","[[25.838, -243.93], [0.010238, 0.048307], [1.2...","[[0.018449, -0.079872], [0.003134, 0.001708], ..."


In [43]:
FirstTimestamp1=df_datos_agregados.iat[72509,1]

In [44]:
OriginalTimestamp1=df_datos_agregados.iat[72509,2]

In [45]:
Time1 =df_datos_agregados.iat[72509,3]

In [46]:
import pandas as pd
import numpy as np
import datetime
df=pd.DataFrame()

i=[FirstTimestamp1,OriginalTimestamp1,Time1]
df['date']=pd.Series(i)
df['date']=df['date'].apply(lambda d: datetime.datetime.fromtimestamp(int(d)/1000).strftime('%Y-%m-%d %H:%M:%S'))

df


Unnamed: 0,date
0,2023-06-09 13:35:49
1,2023-06-09 13:36:19
2,2023-06-09 13:36:19


## Removal of characters in CSV file and placement in conversion path

### 1- Processing aggregation.csv file

In [47]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('aggregation.csv','r') as firstfile, open('./data/ozm1/electricity/1.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 2- Processing water_heater.csv file

In [48]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('water_heater.csv','r') as firstfile, open('./data/ozm1/electricity/8.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 3- Processing fan.csv file

In [49]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('fan.csv','r') as firstfile, open('./data/ozm1/electricity/9.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 4- Processing freezer.csv file

In [50]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('freezer.csv','r') as firstfile, open('./data/ozm1/electricity/11.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 5- Processing monitor.csv file

In [51]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('monitor.csv','r') as firstfile, open('./data/ozm1/electricity/4.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 6- Processing vacuum_cleaner.csv file

In [1]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('vacuum_cleaner.csv','r') as firstfile, open('./data/ozm1/electricity/6.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 7- Processing bulb.csv file

In [52]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('bulb.csv','r') as firstfile, open('./data/ozm1/electricity/5.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 8- Processing fridge.csv file

In [53]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('fridge.csv','r') as firstfile, open('./data/ozm1/electricity/10.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 9- Processing microwave.csv file


In [54]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('microwave.csv','r') as firstfile, open('./data/ozm1/electricity/3.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 10- Processing oil_heater.csv file

In [55]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('oil_heater.csv','r') as firstfile, open('./data/ozm1/electricity/7.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 

### 11-Procesado fichero oven.csv

In [56]:
import re
#copia el contenido de un fichero en otro
OLD= 'timestamp,W,VAR,VA,f,VLN,PF,A,Voltage_Harmonics_Complex,Current_Harmonics_Complex,Power_Harmonics'
NUEVO='timestamp,W,VAR,VA,f,VLN,PF,A,VH1,VH2,VH3,VH4,VH5,VH6,VH7,VH8,VH9,VH10,VH11,VH12,VH13,VH14,VH15,VH16,VH17,VH18,VH19,VH20,VH21,VH22,VH23,VH24,VH25,VH26,VH27,VH28,VH29,VH30,VH31,VH32,VH33,VH34,VH35,VH36,VH37,VH38,VH39,VH40,VH41,VH42,VH43,VH44,VH45,VH46,VH47,VH48,VH49,VH50,IH1,IH2,IH3,IH4,IH5,IH6,IH7,IH8,IH9,IH10,IH11,IH12,IH13,IH14,IH15,IH16,IH17,IH18,IH19,IH20,IH21,IH22,IH23,IH24,IH25,IH26,IH27,IH28,IH29,IH30,IH31,IH32,IH33,IH34,IH35,IH36,IH37,IH38,IH39,IH40,IH41,IH42,IH43,IH44,IH45,IH46,IH47,IH48,IH49,IH50,PH1,PH2,PH3,PH4,PH5,PH6,PH7,PH8,PH9,PH10,PH11,PH12,PH13,PH14,PH15,PH16,PH17,PH18,PH19,PH20,PH21,PH22,PH23,PH24,PH25,PH26,PH27,PH28,PH29,PH30,PH31,PH32,PH33,PH34,PH35,PH36,PH37,PH38,PH39,PH40,PH41,PH42,PH43,PH44,PH45,PH46,PH47,PH48,PH49,PH50'
with open('oven.csv','r') as firstfile, open('./data/ozm1/electricity/2.csv','a') as secondfile: 
    
    for line in firstfile: 
        line2=line.replace(OLD,NUEVO)
        line2 = line2.replace('"[', '')
        line2 = line2.replace(']"', '')    
     
      #  print (line2)
        secondfile.write(line2) 