In [183]:
import pandas as pd
import os

def load_clean_csv(path):
    df = pd.read_csv(path)
    # Drop unnamed columns (often index junk)
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    df.columns = df.columns.str.lower()
    return df

# Reusable function
def load_and_index(path):
    df = load_clean_csv(path)
    return df.set_index('time') if 'time' in df.columns else df


In [184]:
!pwd

/Users/alex_barajas/Model_Comparison/Data/Clean_data


In [185]:
simulink_dir = "../simulink"

# Reload + merge
simulink_p = load_and_index(os.path.join(simulink_dir, 'simulink_p.csv'))
simulink_q = load_and_index(os.path.join(simulink_dir, 'simulink_q.csv'))
simulink_vabc = load_and_index(os.path.join(simulink_dir, 'simulink_vabc.csv'))
simulink_Iabc = load_and_index(os.path.join(simulink_dir, 'simulink_Iabc.csv'))

simulink_all = pd.concat(
    [simulink_p, simulink_q, simulink_vabc, simulink_Iabc],
    axis=1
)

# Reset if you want 'time' as a column instead of index
simulink_all = simulink_all.reset_index()
simulink_all = simulink_all[(simulink_all['time'] >= 11) & (simulink_all['time'] <= 15)]


In [173]:
simulink_all

Unnamed: 0,time,p,q,v_a,v_b,v_c,i_a,i_b,i_c
110000,11.0000,10000.0,-2.900279e-09,-5.879790e-13,-0.866025,0.866025,-1.805737e-13,-0.524864,0.524864
110001,11.0001,10000.0,-5.629219e-09,3.769018e-02,-0.884255,0.846565,2.284253e-02,-0.535912,0.513070
110002,11.0002,10000.0,7.627126e-10,7.532681e-02,-0.901228,0.825902,4.565261e-02,-0.546199,0.500546
110003,11.0003,10000.0,-1.929201e-09,1.128564e-01,-0.916921,0.804064,6.839781e-02,-0.555710,0.487312
110004,11.0004,10000.0,4.465779e-09,1.502256e-01,-0.931310,0.781085,9.104581e-02,-0.564430,0.473385
...,...,...,...,...,...,...,...,...,...
149996,14.9996,10000.0,2.826517e-09,-1.502256e-01,-0.781085,0.931310,-9.104581e-02,-0.473385,0.564430
149997,14.9997,10000.0,9.728593e-11,-1.128564e-01,-0.804064,0.916921,-6.839781e-02,-0.487312,0.555710
149998,14.9998,10000.0,-2.633875e-09,-7.532681e-02,-0.825902,0.901228,-4.565261e-02,-0.500546,0.546199
149999,14.9999,10000.0,-5.340617e-09,-3.769018e-02,-0.846565,0.884255,-2.284253e-02,-0.513070,0.535912


In [174]:

# pwr_dyn_dir = '../PowerDynamics'

# # Load each file
# pwr_p = load_and_index(os.path.join(pwr_dyn_dir, 'pwrdyn_bus15_p.csv'))
# pwr_q = load_and_index(os.path.join(pwr_dyn_dir, 'pwrdyn_bus15_q.csv'))
# pwr_v = load_and_index(os.path.join(pwr_dyn_dir, 'pwrdyn_bus15_v.csv'))
# pwr_all = pd.concat([pwr_p, pwr_q, pwr_v], axis=1).reset_index()


In [117]:


pf_dir = '../PowerFactory'

# Helper function
def load_pf_csv(filename, col_names):
    path = os.path.join(pf_dir, filename)
    df = pd.read_csv(path, skiprows=2, header=None)
    df.columns = col_names
    return df



# Create clean uniform time vector from Simulink data
sim_time = simulink_all['time']
uniform_time = sim_time[(sim_time >= 11.0) & (sim_time <= 15.0)].reset_index(drop=True)

# General cleaner
def bin_and_average_pf(df, uniform_time, value_columns):
    # Build bin edges for digitizing
    bin_edges = np.concatenate([uniform_time.values, [uniform_time.values[-1] + 0.00005]])
    
    # Digitize time column
    df['bin'] = np.digitize(df['time'], bins=bin_edges) - 1
    
    # Remove out-of-range bins
    df = df[(df['bin'] >= 0) & (df['bin'] < len(uniform_time))]

    # Group and average all value columns
    grouped = df.groupby('bin')[value_columns].mean().reset_index()
    
    # Add clean time column from uniform time
    grouped['time'] = uniform_time[grouped['bin']].values
    
    return grouped.drop(columns='bin')  # Cleaned and ready

# Load each
pf_p     = load_pf_csv('pf_p.csv',     ['time', 'p'])
pf_q     = load_pf_csv('pf_q.csv',     ['time', 'q'])
pf_vabc  = load_pf_csv('pf_Vabc.csv',  ['time', 'v_a', 'v_b', 'v_c'])
pf_Iabc  = load_pf_csv('pf_Iabc.csv',  ['time', 'i_a', 'i_b', 'i_c'])


pf_clean_p    = bin_and_average_pf(pf_p,    uniform_time, ['p'])
pf_clean_q    = bin_and_average_pf(pf_q,    uniform_time, ['q'])
pf_clean_vabc = bin_and_average_pf(pf_vabc, uniform_time, ['v_a', 'v_b', 'v_c'])
pf_clean_Iabc = bin_and_average_pf(pf_Iabc, uniform_time, ['i_a', 'i_b', 'i_c'])



# # Use the Simulink time vector as the reference (already clean)
# sim_time = simulink_all['time']
# uniform_time = sim_time[(sim_time >= 11.0) & (sim_time <= 15.0)].reset_index(drop=True)

# # Digitize PF time into bins defined by uniform time
# bin_edges = np.concatenate([uniform_time.values, [uniform_time.values[-1] + 0.00005]])  # tiny extension for last bin
# pf_p['bin'] = np.digitize(pf_p['time'], bins=bin_edges) - 1

# # Remove invalid bins
# pf_p = pf_p[(pf_p['bin'] >= 0) & (pf_p['bin'] < len(uniform_time))]

# # Group and average
# binned_avg = pf_p.groupby('bin')['p'].mean().reset_index()
# binned_avg['time'] = uniform_time[binned_avg['bin']].values
# pf_clean = binned_avg[['time', 'p']].rename(columns={'p': 'p_avg'})

pf_all = pf_clean_p.merge(pf_clean_q, on='time')\
             .merge(pf_clean_Iabc, on='time')\
             .merge(pf_clean_vabc, on='time')


In [118]:
pf_all

Unnamed: 0,p,time,q,i_a,i_b,i_c,v_a,v_b,v_c
0,0.00680,11.0000,0.0046,0.0,0.0,0.0,-0.62340,-0.4371,1.06050
1,0.00795,11.0001,0.0020,0.0,0.0,0.0,-0.59495,-0.4680,1.06295
2,0.00790,11.0002,0.0024,0.0,0.0,0.0,-0.54890,-0.5164,1.06530
3,0.00780,11.0003,0.0027,0.0,0.0,0.0,-0.51350,-0.5518,1.06530
4,0.00760,11.0004,0.0030,0.0,0.0,0.0,-0.47730,-0.5864,1.06370
...,...,...,...,...,...,...,...,...,...
39996,0.00740,14.9996,0.0036,0.0,0.0,0.0,-0.90790,0.8327,0.07520
39997,0.00720,14.9997,0.0039,0.0,0.0,0.0,-0.92440,0.8098,0.11460
39998,0.00710,14.9998,0.0042,0.0,0.0,0.0,-0.93950,0.7857,0.15380
39999,0.00690,14.9999,0.0045,0.0,0.0,0.0,-0.95310,0.7603,0.19280


Unnamed: 0,time,p,q,v_a,v_b,v_c,i_a,i_b,i_c
110000,11.0000,10000.0,-2.900279e-09,-5.879790e-13,-0.866025,0.866025,-1.805737e-13,-0.524864,0.524864
110001,11.0001,10000.0,-5.629219e-09,3.769018e-02,-0.884255,0.846565,2.284253e-02,-0.535912,0.513070
110002,11.0002,10000.0,7.627126e-10,7.532681e-02,-0.901228,0.825902,4.565261e-02,-0.546199,0.500546
110003,11.0003,10000.0,-1.929201e-09,1.128564e-01,-0.916921,0.804064,6.839781e-02,-0.555710,0.487312
110004,11.0004,10000.0,4.465779e-09,1.502256e-01,-0.931310,0.781085,9.104581e-02,-0.564430,0.473385
...,...,...,...,...,...,...,...,...,...
149996,14.9996,10000.0,2.826517e-09,-1.502256e-01,-0.781085,0.931310,-9.104581e-02,-0.473385,0.564430
149997,14.9997,10000.0,9.728593e-11,-1.128564e-01,-0.804064,0.916921,-6.839781e-02,-0.487312,0.555710
149998,14.9998,10000.0,-2.633875e-09,-7.532681e-02,-0.825902,0.901228,-4.565261e-02,-0.500546,0.546199
149999,14.9999,10000.0,-5.340617e-09,-3.769018e-02,-0.846565,0.884255,-2.284253e-02,-0.513070,0.535912


In [186]:

pf_dir = '../PowerFactory'

# Helper function
def load_pf_csv(filename, col_names):
    path = os.path.join(pf_dir, filename)
    df = pd.read_csv(path, skiprows=2, header=None)
    df.columns = col_names
    return df

# Load each
pf_p     = load_pf_csv('pf_p.csv',     ['time', 'p'])
pf_q     = load_pf_csv('pf_q.csv',     ['time', 'q'])
pf_vabc  = load_pf_csv('pf_Vabc.csv',  ['time', 'v_a', 'v_b', 'v_c'])
pf_Iabc  = load_pf_csv('pf_Iabc.csv',  ['time', 'i_a', 'i_b', 'i_c'])

# Drop any duplicate time entries by keeping the first (or average if you want)
pf_p = pf_p.groupby('time', as_index=False).mean()
pf_q = pf_q.groupby('time', as_index=False).mean()
pf_vabc = pf_vabc.groupby('time', as_index=False).mean()
pf_Iabc = pf_Iabc.groupby('time', as_index=False).mean()


# Merge on time
pf_all = pf_p.merge(pf_q, on='time')\
             .merge(pf_vabc, on='time')\
             .merge(pf_Iabc, on='time')


In [187]:
pf_Iabc

Unnamed: 0,time,i_a,i_b,i_c
0,0.000000,0.015000,-0.007500,-0.007500
1,0.000100,0.015000,-0.007500,-0.007500
2,0.000200,0.015000,-0.007500,-0.007500
3,0.000300,0.015000,-0.007500,-0.007500
4,0.000400,0.015000,-0.007500,-0.007500
...,...,...,...,...
165000,14.999618,-0.030983,0.049932,-0.018949
165001,14.999718,-0.030983,0.049932,-0.018949
165002,14.999818,-0.030983,0.049932,-0.018949
165003,14.999918,-0.030983,0.049932,-0.018949


In [188]:
pf_q

Unnamed: 0,time,q
0,11.0000,0.0046
1,11.0001,0.0020
2,11.0002,0.0024
3,11.0003,0.0027
4,11.0004,0.0030
...,...,...
39996,14.9996,0.0036
39997,14.9997,0.0039
39998,14.9998,0.0042
39999,14.9999,0.0045


In [189]:
# Trim all dataframes to time between 10 and 15
pf_all = pf_all[(pf_all['time'] >= 11) & (pf_all['time'] <= 15)]
pwr_all = pwr_all[(pwr_all['time'] >= 11) & (pwr_all['time'] <= 15)]
simulink_all = simulink_all[(simulink_all['time'] >= 11) & (simulink_all['time'] <= 15)]

In [190]:
# active and reactive power to kw for all
# PF data: MW → kW
pf_all['p_kw'] = pf_all['p'] * 1000

# PowerDynamics: pu → kW (S_base = 100 MVA)
S_base = 100e6  # 100 MVA
pwr_all['p_kw'] = pwr_all['p'] * S_base / 1000  # VA → W → kW

# Simulink: W → kW
simulink_all['p_kw'] = simulink_all['p'] / 1000

pf_all['q_kw'] = pf_all['q'] * 1000

# PowerDynamics: pu → kW (S_base = 100 MVA)
S_base = 100e6  # 100 MVA
pwr_all['q_kw'] = pwr_all['q'] * S_base / 1000  # VA → W → kW

# Simulink: W → kW
simulink_all['q_kw'] = simulink_all['q'] / 1000

In [191]:
simulink_all.time

110000    11.0000
110001    11.0001
110002    11.0002
110003    11.0003
110004    11.0004
           ...   
149996    14.9996
149997    14.9997
149998    14.9998
149999    14.9999
150000    15.0000
Name: time, Length: 40001, dtype: float64

In [192]:
# convert PowerFactory Iabc to pu

# import numpy as np


# # Assuming pf_all['i_a'], ['i_b'], ['i_c'] are in kA:
# pf_all['i_a'] = (pf_all['i_a'] * 1000) 
# pf_all['i_b'] = (pf_all['i_b'] * 1000)
# pf_all['i_c'] = (pf_all['i_c'] * 1000) 



In [193]:
pf_all

Unnamed: 0,time,p,q,v_a,v_b,v_c,i_a,i_b,i_c,p_kw,q_kw
0,11.000,0.0068,0.0046,-0.6234,-0.4371,1.0605,-0.035394,-0.009940,0.045334,6.8,4.6
1,11.001,0.0068,0.0046,-0.2544,-0.7692,1.0237,-0.020876,-0.026663,0.047539,6.8,4.6
2,11.002,0.0068,0.0046,0.1516,-0.9895,0.8380,-0.003322,-0.039510,0.042832,6.8,4.6
3,11.003,0.0068,0.0046,0.5355,-1.0659,0.5305,0.014715,-0.046612,0.031897,6.8,4.6
4,11.004,0.0068,0.0046,0.8416,-0.9874,0.1458,0.030612,-0.046937,0.016325,6.8,4.6
...,...,...,...,...,...,...,...,...,...,...,...
3996,14.996,0.0067,0.0047,0.2968,0.6858,-0.9826,0.024889,0.025519,-0.050409,6.7,4.7
3997,14.997,0.0067,0.0047,-0.0945,0.9162,-0.8218,0.006217,0.040215,-0.046432,6.7,4.7
3998,14.998,0.0067,0.0047,-0.4713,1.0072,-0.5359,-0.013403,0.048788,-0.035385,6.7,4.7
3999,14.999,0.0067,0.0047,-0.7764,0.9447,-0.1684,-0.030983,0.049932,-0.018949,6.7,4.7


In [194]:
pwr_all

Unnamed: 0,time,p,q,v,p_kw,q_kw
2201,11.01,0.00010,0.287887,0.971211,10.000000,28788.738454
2202,11.01,0.00010,0.287887,0.971211,10.000000,28788.738454
2203,11.02,0.00010,0.287887,0.971211,10.000000,28788.738454
2204,11.02,0.00010,0.287887,0.971211,10.000000,28788.738454
2205,11.03,0.00010,0.287887,0.971211,10.000000,28788.738454
...,...,...,...,...,...,...
3002,14.98,-0.29611,0.274957,0.953687,-29611.005901,27495.665300
3003,14.99,-0.29361,0.274779,0.953781,-29361.028847,27477.946828
3004,14.99,-0.29361,0.274779,0.953781,-29361.028847,27477.946828
3005,15.00,-0.29113,0.274607,0.953875,-29113.048340,27460.700866


In [195]:
simulink_all

Unnamed: 0,time,p,q,v_a,v_b,v_c,i_a,i_b,i_c,p_kw,q_kw
110000,11.0000,10000.0,-2.900279e-09,-5.879790e-13,-0.866025,0.866025,-1.805737e-13,-0.524864,0.524864,10.0,-2.900279e-12
110001,11.0001,10000.0,-5.629219e-09,3.769018e-02,-0.884255,0.846565,2.284253e-02,-0.535912,0.513070,10.0,-5.629219e-12
110002,11.0002,10000.0,7.627126e-10,7.532681e-02,-0.901228,0.825902,4.565261e-02,-0.546199,0.500546,10.0,7.627126e-13
110003,11.0003,10000.0,-1.929201e-09,1.128564e-01,-0.916921,0.804064,6.839781e-02,-0.555710,0.487312,10.0,-1.929201e-12
110004,11.0004,10000.0,4.465779e-09,1.502256e-01,-0.931310,0.781085,9.104581e-02,-0.564430,0.473385,10.0,4.465779e-12
...,...,...,...,...,...,...,...,...,...,...,...
149996,14.9996,10000.0,2.826517e-09,-1.502256e-01,-0.781085,0.931310,-9.104581e-02,-0.473385,0.564430,10.0,2.826517e-12
149997,14.9997,10000.0,9.728593e-11,-1.128564e-01,-0.804064,0.916921,-6.839781e-02,-0.487312,0.555710,10.0,9.728593e-14
149998,14.9998,10000.0,-2.633875e-09,-7.532681e-02,-0.825902,0.901228,-4.565261e-02,-0.500546,0.546199,10.0,-2.633875e-12
149999,14.9999,10000.0,-5.340617e-09,-3.769018e-02,-0.846565,0.884255,-2.284253e-02,-0.513070,0.535912,10.0,-5.340617e-12


In [196]:
pf_all

Unnamed: 0,time,p,q,v_a,v_b,v_c,i_a,i_b,i_c,p_kw,q_kw
0,11.000,0.0068,0.0046,-0.6234,-0.4371,1.0605,-0.035394,-0.009940,0.045334,6.8,4.6
1,11.001,0.0068,0.0046,-0.2544,-0.7692,1.0237,-0.020876,-0.026663,0.047539,6.8,4.6
2,11.002,0.0068,0.0046,0.1516,-0.9895,0.8380,-0.003322,-0.039510,0.042832,6.8,4.6
3,11.003,0.0068,0.0046,0.5355,-1.0659,0.5305,0.014715,-0.046612,0.031897,6.8,4.6
4,11.004,0.0068,0.0046,0.8416,-0.9874,0.1458,0.030612,-0.046937,0.016325,6.8,4.6
...,...,...,...,...,...,...,...,...,...,...,...
3996,14.996,0.0067,0.0047,0.2968,0.6858,-0.9826,0.024889,0.025519,-0.050409,6.7,4.7
3997,14.997,0.0067,0.0047,-0.0945,0.9162,-0.8218,0.006217,0.040215,-0.046432,6.7,4.7
3998,14.998,0.0067,0.0047,-0.4713,1.0072,-0.5359,-0.013403,0.048788,-0.035385,6.7,4.7
3999,14.999,0.0067,0.0047,-0.7764,0.9447,-0.1684,-0.030983,0.049932,-0.018949,6.7,4.7


In [197]:
simulink_all

Unnamed: 0,time,p,q,v_a,v_b,v_c,i_a,i_b,i_c,p_kw,q_kw
110000,11.0000,10000.0,-2.900279e-09,-5.879790e-13,-0.866025,0.866025,-1.805737e-13,-0.524864,0.524864,10.0,-2.900279e-12
110001,11.0001,10000.0,-5.629219e-09,3.769018e-02,-0.884255,0.846565,2.284253e-02,-0.535912,0.513070,10.0,-5.629219e-12
110002,11.0002,10000.0,7.627126e-10,7.532681e-02,-0.901228,0.825902,4.565261e-02,-0.546199,0.500546,10.0,7.627126e-13
110003,11.0003,10000.0,-1.929201e-09,1.128564e-01,-0.916921,0.804064,6.839781e-02,-0.555710,0.487312,10.0,-1.929201e-12
110004,11.0004,10000.0,4.465779e-09,1.502256e-01,-0.931310,0.781085,9.104581e-02,-0.564430,0.473385,10.0,4.465779e-12
...,...,...,...,...,...,...,...,...,...,...,...
149996,14.9996,10000.0,2.826517e-09,-1.502256e-01,-0.781085,0.931310,-9.104581e-02,-0.473385,0.564430,10.0,2.826517e-12
149997,14.9997,10000.0,9.728593e-11,-1.128564e-01,-0.804064,0.916921,-6.839781e-02,-0.487312,0.555710,10.0,9.728593e-14
149998,14.9998,10000.0,-2.633875e-09,-7.532681e-02,-0.825902,0.901228,-4.565261e-02,-0.500546,0.546199,10.0,-2.633875e-12
149999,14.9999,10000.0,-5.340617e-09,-3.769018e-02,-0.846565,0.884255,-2.284253e-02,-0.513070,0.535912,10.0,-5.340617e-12


In [198]:
# Create an output directory if needed
output_dir = '../'
os.makedirs(output_dir, exist_ok=True)

# Save each trimmed DataFrame
pf_all.to_csv(os.path.join(output_dir, 'pf_all.csv'), index=False)
pwr_all.to_csv(os.path.join(output_dir, 'pwr_all.csv'), index=False)
simulink_all.to_csv(os.path.join(output_dir, 'simulink_all.csv'), index=False)
