In [None]:
import pandas as pd
import glob

# Get all CSV files in current directory
csv_files = glob.glob('*.csv')

# Initialize empty list to store dataframes
dfs = []
# Read each CSV file and store in list
for file in csv_files:
    df = pd.read_csv(file, sep = ',', header = 1)
    display(df.head())
    # Ensure timestamp column exists and is in datetime format
    if 'timestamp' in df.columns:
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        # Convert only string columns to numeric by removing units
        for col in df.columns:
            if col != 'timestamp' and df[col].dtype == 'object':
                try:
                    df[col] = df[col].str.extract('([-+]?\d*\.?\d+)').astype(float)
                except AttributeError:
                    # Column is not string type, skip it
                    continue
        # Save modified dataframe
        output_filename = file.replace('.csv', '_modified.csv')
        df.to_csv(output_filename, index=False)
        dfs.append(df)
    else:
        print(f"Warning: {file} does not contain timestamp column")

    display(df.head())
if len(dfs) > 0:
    # Merge all dataframes on timestamp column, keeping only matching timestamps
    merged_df = dfs[0]
    for df in dfs[1:]:
        merged_df = pd.merge(merged_df, df, on='timestamp', how='inner')
    
    # Forward fill NA values within each period
    merged_df = merged_df.fillna(method='ffill')
    
    print(f"Successfully merged {len(dfs)} CSV files")
    print(f"Final dataframe shape: {merged_df.shape}")
else:
    print("No valid CSV files found for merging")


In [63]:
# Read Cogeneratrices_Cinergie.csv file
cogen_df = pd.read_csv('Cogen.csv',sep = ";", header=0)
echangeur_df = pd.read_csv('Echangeurs.csv',sep = ";", header=0)

import plotly.express as px

# Convert timestamp to datetime and set as index
def set_timestamp(df):
    df['timestamp'] = pd.to_datetime(df['Date'] + ' ' + df['UTC Time'])
    df.set_index('timestamp', inplace=True)
    df.drop(columns=['Date', 'UTC Time','Record'], inplace=True)

set_timestamp(cogen_df)
set_timestamp(echangeur_df)

display(cogen_df.head())
display(echangeur_df.head())
echangeur_df['dT_RU'] = echangeur_df['Tc_RU'] - echangeur_df['Tf_RU']
# Create line plot for Tc_ru and Tf_ru temperatures
fig = px.line(echangeur_df, x=echangeur_df.index, y=['Tc_RU', 'Tf_RU'],    line_shape = 'spline'
)

# Update line colors
fig.update_traces(line_color='orange', selector=dict(name='Tc_RU'))
fig.update_traces(line_color='blue', selector=dict(name='Tf_RU'))

# Add conditional coloring for Tc_ru > 74
fig.add_scatter(
    x=echangeur_df[echangeur_df['Tc_RU'] > 74].index,
    y=echangeur_df[echangeur_df['Tc_RU'] > 74]['Tc_RU'],
    mode='markers',
    line_color='red',
    name='Tc_ru > 74°C',
    showlegend=False,
    line_shape = 'spline'
)

fig.update_layout(
    title='Temperature Réseau Urbain',
    xaxis_title='Date',
    yaxis_title='Temperature (°C)',
    height=600,
    template='simple_white'
)

fig.show()

fig_2 = px.line(echangeur_df, x=echangeur_df.index, y=['dT_RU'], line_shape = 'spline')
fig_2.show()

# Convert numeric columns by removing units
for col in cogen_df.columns:
    try:
        cogen_df[col] = cogen_df[col].str.extract('([-+]?\d*\.?\d+)').astype(float)
    except (AttributeError, ValueError):
        # Skip columns that can't be converted to numeric
        continue

# Create interactive plot with plotly express
fig = px.line(cogen_df, x=cogen_df.index, y=cogen_df.columns)
fig.update_layout(
    xaxis_title="Timestamp",
    yaxis_title="Values",
    legend_title="Variables",
    height=600
)
fig.show()


# Create stacked area plot for power values
power_cols = ['JEN1_P', 'JEN2_P','LIEB_P']
fig = px.area(cogen_df, x=cogen_df.index, y=power_cols, 
              title='Power Output by Generator',
              labels={'value': 'Power', 'variable': 'Generator'},
              height=600)

fig.update_layout(
    xaxis_title="Timestamp",
    yaxis_title="Power",
    showlegend=True,
    template ='simple_white'
)
fig.update_traces(line=dict(width=0))
fig.show()


# Create stacked area plot for heat exchanger and other power values
cogen_df['Total_Gen_P'] = cogen_df['JEN1_P'] + cogen_df['JEN2_P'] + cogen_df['LIEB_P']
area_cols = ['HX1_P', 'HX2_P', 'Sech_P', 'Dig_P']

# Create stacked area plot for heat exchangers
fig = px.area(cogen_df, x=cogen_df.index, y=area_cols,
              title='Power Output by Heat Exchangers and Other Systems',
              labels={'value': 'Power', 'variable': 'System'},
              height=600)
fig.update_traces(selector=dict(type='scatter'), line=dict(width=0))

# Add total generator power as a line on top
fig.add_scatter(x=cogen_df.index, y=cogen_df['Total_Gen_P'], 
                name='Total Generator Power',
                mode='lines',
                line=dict(width=2))

fig.update_layout(
    xaxis_title="Timestamp", 
    yaxis_title="Power",
    showlegend=True,
    template='simple_white'
)
fig.show()

# 100% stacked area plot
# Calculate total generator power as denominator for percentages
total_gen_power = cogen_df[power_cols].sum(axis=1)

# Calculate percentage of total generator power for each heat/other system
pct_df = pd.DataFrame()
for col in heat_power_cols:
    pct_df[col] = cogen_df[col] / total_gen_power * 100

# Calculate loss column as difference from 100%
pct_df['Loss'] = 100 - pct_df[heat_power_cols].sum(axis=1)

# Add loss column to plot columns
plot_cols = heat_power_cols + ['Loss']

fig = px.area(pct_df, x=pct_df.index, y=plot_cols,
              title='Heat Exchanger and Other System Power as % of Total Generator Output',
              labels={'value': '% of Generator Power', 'variable': 'System'},
              height=600)

fig.update_layout(
    xaxis_title="Timestamp",
    yaxis_title="Percent of Total Generator Power", 
    showlegend=True,
    template='simple_white'
)
fig.update_traces(line=dict(width=0))
fig.show()




Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



Unnamed: 0_level_0,LIEB_Tc,LIEB_Tf,LIEB_Q,LIEB_P,JEN1_Tc,JEN1_Tf,JEN1_Q,JEN1_P,JEN2_Tc,JEN2_Tf,JEN2_Q,JEN2_P,HX1_P,HX2_P,Sech_P,Dig_P
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2025-02-12 11:11:57,59,62,9,30,78,63,57,938,79,65,59,954,358,348,1082,0
2025-02-12 11:15:00,59,62,9,31,78,63,57,956,80,65,60,1039,342,364,1140,0
2025-02-12 11:30:00,59,62,9,32,77,63,57,867,81,65,60,1101,336,351,1198,0
2025-02-12 11:45:00,59,62,9,33,76,63,57,821,81,65,62,1142,355,372,1198,0
2025-02-12 12:00:00,59,62,9,30,77,63,57,877,81,64,60,1130,363,370,1252,0


Unnamed: 0_level_0,Tc_RU,Tf_RU,Hz1_RU,Hz2_RU,DP_RU,Tc_HX1_RU,Tc_HX2_RU,Tc_HX1_CIN,Tf_HX1_CIN,Q_HX1_CIN,Hz_HX1_CIN,Tc_HX2_CIN,Tf_HX2_CIN,Q_HX2_CIN,Hz_HX2_CIN
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2025-02-12 11:12:08,70,58,50,50,0,69,71,72,62,30,51,73,62,30,51
2025-02-12 11:15:00,70,58,50,50,0,68,71,71,63,30,51,74,62,31,51
2025-02-12 11:30:00,69,58,50,49,0,68,71,71,63,30,51,73,62,30,51
2025-02-12 11:45:00,70,58,50,50,0,69,71,71,63,30,51,72,62,31,51
2025-02-12 12:00:00,70,58,50,50,0,68,71,72,63,30,51,73,62,30,51


In [None]:
import plotly.express as px
merged_df.head()
# px.line(merged_df)