In [2]:
import pandas as pd
import os

# Define the path to the folder containing the Parquet files
folder_path = 'data/CO2'  # Adjust this to your actual folder path

# List all Parquet files in the directory
parquet_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.parquet')]

# Read and concatenate all Parquet files into a single DataFrame
df_list = [pd.read_parquet(file) for file in parquet_files]
df = pd.concat(df_list, ignore_index=True)
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df = df.sort_values(by='timestamp')
df['timestamp_local'] = df['timestamp'].dt.tz_localize('UTC').dt.tz_convert('Europe/Brussels')

# Display the first few rows of the DataFrame
print(df.head())

                    timestamp            EventProcessedUtcTime  PartitionId  \
14068 2025-01-02 17:47:47.165 2025-01-02 17:49:32.562593+00:00            0   
14069 2025-01-02 17:47:52.214 2025-01-02 17:49:32.562626+00:00            0   
14070 2025-01-02 17:47:57.265 2025-01-02 17:49:32.562661+00:00            0   
14071 2025-01-02 17:48:02.315 2025-01-02 17:49:32.562694+00:00            0   
14072 2025-01-02 17:48:07.369 2025-01-02 17:49:32.562727+00:00            0   

                  EventEnqueuedUtcTime  co2_value  temperature  humidity  \
14068 2025-01-02 17:47:47.248000+00:00        496         27.0      41.9   
14069 2025-01-02 17:47:52.295000+00:00        517         26.9      42.0   
14070 2025-01-02 17:47:57.342000+00:00        495         27.0      41.9   
14071 2025-01-02 17:48:02.404000+00:00        511         26.9      42.1   
14072 2025-01-02 17:48:07.451000+00:00        492         27.0      42.2   

                       timestamp_local  
14068 2025-01-02 18:47:47.1

In [20]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming df_aggregated is already defined and contains your data
df_aggregated = df.set_index('timestamp_local').resample('10min').mean()
df_aggregated = df_aggregated.sort_values(by='timestamp') 

# Create subplots with shared x-axis and separate y-axes
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.05)

# Add CO2 values trace with fill
fig.add_trace(
    go.Scatter(
        x=df_aggregated.index,
        y=df_aggregated['co2_value'],
        mode='lines',
        name='CO2 (ppm)',
        line=dict(color='#00008B'),
        fill='tozeroy',  # Fill under the curve
        fillcolor='rgba(0, 0, 139, 0.05)'  # Light fill color (transparent blue)
    ),
    row=1, col=1
)

# Add Temperature trace with fill
fig.add_trace(
    go.Scatter(
        x=df_aggregated.index,
        y=df_aggregated['temperature'],
        mode='lines',
        name='Temperature (°C)',
        line=dict(color='#FF0000'),
        fill='tozeroy',  # Fill under the curve
        fillcolor='rgba(255, 0, 0, 0.05)'  # Light fill color (transparent red)
    ),
    row=2, col=1
)

# Add Humidity trace with fill
fig.add_trace(
    go.Scatter(
        x=df_aggregated.index,
        y=df_aggregated['humidity'],
        mode='lines',
        name='Humidity (%)',
        line=dict(color='#008000'),
        fill='tozeroy',  # Fill under the curve
        fillcolor='rgba(0, 128, 0, 0.05)'  # Light fill color (transparent green)
    ),
    row=3, col=1
)

# Update layout for better visualization and responsiveness
fig.update_layout(
    autosize=True,
    height=400,  # Reduced height
    margin=dict(l=50, r=20, t=0, b=0),  # Adjusted margins
    plot_bgcolor='white',
    paper_bgcolor='white',
    showlegend=False,  # Remove legend from the plot
    font=dict(family="Arial, sans-serif", size=12)
)

# Update y-axis titles for each subplot
fig.update_yaxes(title_text="CO2 (ppm)", row=1, col=1, title_standoff=10)
fig.update_yaxes(title_text="Temperature (°C)", row=2, col=1, title_standoff=10)
fig.update_yaxes(title_text="Humidity (%)", row=3, col=1, title_standoff=10)

# Add grid in light grey
fig.update_xaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgrey')
fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgrey')
fig.update_yaxes(title_text="CO2 (ppm)", range=[df_aggregated['co2_value'].min()*0.9, df_aggregated['co2_value'].max()*1.1], row=1, col=1)
fig.update_yaxes(title_text="Temperature (°C)", range=[df_aggregated['temperature'].min()*0.9, df_aggregated['temperature'].max()*1.1], row=2, col=1)
fig.update_yaxes(title_text="Humidity (%)", range=[df_aggregated['humidity'].min()*0.9, df_aggregated['humidity'].max()*1.1], row=3, col=1)

# Configuration for responsiveness
config = {
    'responsive': True,
    'displayModeBar': False,  # Hide the mode bar
    'scrollZoom': False  # Disable scroll zoom
}

# Save the figure as an HTML file
fig.write_html("./timeseries.html", config=config, include_plotlyjs='cdn', full_html=False)

# Show the figure (optional if running in a script)
fig.show(config=config)