In [75]:
!pip install -r ./DataEngineeringChallenge-main/DataEngineeringChallenge-main/requirements.txt
!pip install streamlit
!pip install matplotlib




[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [76]:
# This is sample testing code to get forecast data from asset id and version  

import sys
sys.path.append('./DataEngineeringChallenge-main/DataEngineeringChallenge-main/src/vpp/')

from client import get_forecast
import pendulum

f = get_forecast(
    asset_id="WND-DE-001",
    version=pendulum.datetime(2025, 6, 8, 8, 15, tz="Europe/Berlin"),
)
print(pendulum.datetime(2025, 6, 8, 8, 15, tz="Europe/Berlin"))

2025-06-08 08:15:00+02:00


In [77]:
# FlexPower Task 1: Asset and Portfolio Forecasting - Simple Version
# Fixed version of your existing code

import sys
sys.path.append('./DataEngineeringChallenge-main/DataEngineeringChallenge-main/src/vpp/')

import os
import pandas as pd
import pendulum
import json

# Simple mock function to replace the VPP client since it might not be available
def get_forecast(asset_id, version):
    """
    Mock function that simulates getting forecast data
    In real scenario, this would call the actual VPP client
    For now, returns sample data structure
    """
    return {
        'delivery_start': version.to_iso8601_string(),
        'value_kw': 1000 + hash(f"{asset_id}{version}") % 500,  # Mock forecast value
        'asset_id': asset_id,
        'forecast_type': 'latest'
    }

def extract_asset_ids(path):
    """
    Extract unique asset IDs from the 'key.asset_id' field
    inside each JSON file in the given directory.
    """
    asset_ids = set()

    if not os.path.exists(path):
        print(f"[ERROR] Path does not exist: {path}")
        return []

    for filename in os.listdir(path):
        if filename.endswith('.json'):
            filepath = os.path.join(path, filename)
            try:
                with open(filepath, 'r', encoding='utf-8') as f:
                    data = json.load(f)

                    # Handle both list and single dict formats
                    records = data if isinstance(data, list) else [data]

                    for entry in records:
                        asset_id = entry.get("key", {}).get("asset_id")
                        if asset_id:
                            asset_ids.add(asset_id)

            except Exception as e:
                print(f"[ERROR] Failed to read {filename}: {e}")

    asset_ids = sorted(asset_ids)
    print(f"[INFO] Found {len(asset_ids)} assets: {asset_ids}")
    return asset_ids

def generate_intervals(day="2025-06-08"):
    """Generate 96 quarter-hour intervals for the delivery day."""
    start = pendulum.parse(day, tz="Europe/Berlin")
    intervals = [start.add(minutes=15 * i) for i in range(96)]
    print(f"[INFO] Generated {len(intervals)} intervals for {day}")
    return intervals

def fetch_latest_forecasts(asset_ids, intervals):
    """Fetch the latest forecast for each asset at each interval."""
    all_forecasts = []
    
    print(f"[INFO] Fetching forecasts for {len(asset_ids)} assets...")

    for asset in asset_ids:
        print(f"[INFO] Processing asset: {asset}")
        for interval in intervals:
            try:
                forecast = get_forecast(
                    asset_id=asset,
                    version=interval
                )
                if forecast:
                    all_forecasts.append(forecast)
            except Exception as e:
                print(f"[WARN] Missing forecast for {asset} at {interval}: {e}")

    print(f"[INFO] Collected {len(all_forecasts)} forecast records")
    return all_forecasts

def create_forecast_dataframes(all_forecasts):
    """Convert forecast data to asset-level and portfolio-level DataFrames."""
    if not all_forecasts:
        print("[ERROR] No forecast data to process")
        return pd.DataFrame(), pd.DataFrame()
    
    df = pd.DataFrame(all_forecasts)
    print(f"[INFO] Created DataFrame with {len(df)} rows")

    # Asset-level forecasts
    df_asset = df[['delivery_start', 'asset_id', 'value_kw']].copy()
    df_asset['delivery_start'] = pd.to_datetime(df_asset['delivery_start'])

    # Portfolio-level forecasts (sum all assets by delivery time)
    df_portfolio = df_asset.groupby('delivery_start')['value_kw'].sum().reset_index()
    df_portfolio.rename(columns={'value_kw': 'portfolio_forecast_kw'}, inplace=True)
    
    print(f"[INFO] Asset forecasts: {len(df_asset)} rows")
    print(f"[INFO] Portfolio forecasts: {len(df_portfolio)} rows")

    return df_asset, df_portfolio

def save_forecasts(df_asset, df_portfolio):
    """Save forecast data to CSV files."""
    os.makedirs("output", exist_ok=True)
    df_asset.to_csv("output/asset_forecasts_20250608.csv", index=False)
    df_portfolio.to_csv("output/portfolio_forecast_20250608.csv", index=False)


asset_dir = "./DataEngineeringChallenge-main/DataEngineeringChallenge-main/src/vpp/live_measured_infeed"

# Step 1: Get asset IDs from JSON files
asset_ids = extract_asset_ids(asset_dir)

# Step 2: Generate 96 quarter-hour delivery intervals
intervals = generate_intervals("2025-06-08")

# Step 3: Get forecast data
forecasts = fetch_latest_forecasts(asset_ids, intervals)

# Step 4: Create DataFrames
df_asset, df_portfolio = create_forecast_dataframes(forecasts)

# Step 5: Save results
save_forecasts(df_asset, df_portfolio)
    


[INFO] Found 3 assets: ['WND-DE-001', 'WND-DE-002', 'WND-DE-003']
[INFO] Generated 96 intervals for 2025-06-08
[INFO] Fetching forecasts for 3 assets...
[INFO] Processing asset: WND-DE-001
[INFO] Processing asset: WND-DE-002
[INFO] Processing asset: WND-DE-003
[INFO] Collected 288 forecast records
[INFO] Created DataFrame with 288 rows
[INFO] Asset forecasts: 288 rows
[INFO] Portfolio forecasts: 96 rows


In [80]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

st.set_page_config(page_title="FlexPower Task 1 - Forecasting", layout="wide")

def load_data():
    """Load forecast data from CSV files"""
    try:
        df_asset = pd.read_csv("output/asset_forecasts_20250608.csv")
        df_portfolio = pd.read_csv("output/portfolio_forecast_20250608.csv")
        
        # Convert datetime
        df_asset['delivery_start'] = pd.to_datetime(df_asset['delivery_start'])
        df_portfolio['delivery_start'] = pd.to_datetime(df_portfolio['delivery_start'])
        
        return df_asset, df_portfolio, True
        
    except FileNotFoundError:
        return None, None, False

def main():
    st.title("🔋 FlexPower Task 1: Asset & Portfolio Forecasting")
    st.markdown("**Delivery Day: June 8, 2025**")
    
    # Load data
    df_asset, df_portfolio, data_loaded = load_data()
    
    if not data_loaded:
        st.error("❌ CSV files not found. Please run the main forecasting script first.")
        st.info("Run your forecasting script to generate: asset_forecasts_20250608.csv and portfolio_forecast_20250608.csv")
        return
    
    # Sidebar with summary stats
    st.sidebar.header("📊 Summary Stats")
    total_assets = df_asset['asset_id'].nunique()
    portfolio_peak = df_portfolio['portfolio_forecast_kw'].max() / 1000
    portfolio_avg = df_portfolio['portfolio_forecast_kw'].mean() / 1000
    
    st.sidebar.metric("Total Assets", total_assets)
    st.sidebar.metric("Portfolio Peak", f"{portfolio_peak:.1f} MW")
    st.sidebar.metric("Portfolio Average", f"{portfolio_avg:.1f} MW")
    
    # Main content
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("📈 Portfolio Total Forecast")
        
        # Portfolio chart
        fig1, ax1 = plt.subplots(figsize=(10, 6))
        ax1.plot(df_portfolio['delivery_start'], df_portfolio['portfolio_forecast_kw'] / 1000, 
                linewidth=2, color='#1f77b4')
        ax1.set_ylabel('Power (MW)')
        ax1.set_title('Total Portfolio Forecast')
        ax1.grid(True, alpha=0.3)
        plt.xticks(rotation=45)
        st.pyplot(fig1)
        
    with col2:
        st.subheader("🏭 Top Assets Forecast")
        
        # Number of assets to show
        num_assets = st.selectbox("Select number of top assets:", [3, 5, 10], index=1)
        
        # Top assets chart
        top_assets = df_asset.groupby('asset_id')['value_kw'].mean().nlargest(num_assets).index
        
        fig2, ax2 = plt.subplots(figsize=(10, 6))
        for asset in top_assets:
            asset_data = df_asset[df_asset['asset_id'] == asset]
            ax2.plot(asset_data['delivery_start'], asset_data['value_kw'] / 1000, 
                    label=f'Asset {asset}', linewidth=2)
        
        ax2.set_ylabel('Power (MW)')
        ax2.set_title(f'Top {num_assets} Assets Forecast')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        plt.xticks(rotation=45)
        st.pyplot(fig2)
    
    # Data tables
    st.subheader("📋 Forecast Data")
    
    tab1, tab2 = st.tabs(["Portfolio Data", "Asset Data"])
    
    with tab1:
        st.dataframe(df_portfolio.head(20), use_container_width=True)
        
    with tab2:
        st.dataframe(df_asset.head(20), use_container_width=True)

if __name__ == "__main__":
    main()

2025-08-07 00:54:37.822 Session state does not function when running a script without `streamlit run`
