In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import sys
import pathlib

PROJECT_ROOT = pathlib.Path().cwd().parent
sys.path.append(str(PROJECT_ROOT))

from src.settings import Config
from src.utils import Network, Results, FineNetwork, CoarseNetwork
from src.models import SpatialAggregation, TemporalAggregation
from src.visualization import Visualizer
from Full_GTEP_Solve.modules_v2 import read_data, GTEP

import pickle
import xarray as xr
import numpy as np
from numba import njit, prange
import pandas as pd
import matplotlib.pyplot as plt
import time
import gurobipy as gp
from gurobipy import GRB

In [5]:
import numpy as np
import pandas as pd
import xarray as xr
from pathlib import Path
from typing import Any
from functools import lru_cache
from dataclasses import asdict
import hashlib
import json
from numba import njit, prange
from datetime import datetime

from sklearn.neighbors import NearestNeighbors

In [6]:
config = Config(
    year=2013,
    cf_k_neighbors=1,
    demand_decay_alpha=0.4,
    granularity="high_bis",
    active_features=['position', 'time_series', 'duration_curves', 'ramp_duration_curves', 'intra_correlation']
)

# # Display configuration help.
# config.help()

In [7]:
Fine_NTW = FineNetwork(config)
fine_ntw = Fine_NTW.build_fine_ntw()


  counties.geometry.centroid.y,

  counties.geometry.centroid.x


In [8]:
Coarse_NTW = CoarseNetwork(config, fine_ntw)
coarse_ntw = Coarse_NTW.build_coarse_ntw()

In [9]:
ntw = Network(fine_ntw["nodes"], fine_ntw["time_series"], config)

In [10]:
spatial = SpatialAggregation(ntw.features, config)

In [11]:
distances = spatial.distance_metrics

Computing distance metrics for 385 nodes. This might take a while...
Computing position distance...
position distance computed in 0:00:01.903200.
Computing time_series distance...
time_series distance computed in 0:00:01.920175.
Computing ramp_duration_curves distance...
ramp_duration_curves distance computed in 0:00:01.000139.
Computing duration_curves distance...
duration_curves distance computed in 0:00:01.066451.
Computing intra_correlation distance...
intra_correlation distance computed in 0:00:00.
Computing inter_correlation distance...
inter_correlation distance computed in 0:00:52.478903.
All distance metrics computed.
Starting normalization...
Normalization completed in 0:00:00.
Total computation time: 0:00:58.369377.
Saving metrics...
Metrics saved to c:\Users\g630d\Documents\00_Cours\2024-2025_MIT\00 Thesis\dev\results\distance_metrics\v11d728cf.


In [12]:
assignment_dict = spatial.aggregate()

In [13]:
temp = TemporalAggregation(config, ntw.features, assignment_dict)

In [14]:
rep_days = temp.aggregate()

In [38]:
results = Results(config, fine_ntw, assignment_dict, rep_days)

Results saved to c:\Users\g630d\Documents\00_Cours\2024-2025_MIT\00 Thesis\dev\results\joint_aggregation_results\v6efb3e6a


# Full pipeline

In [46]:
from pathlib import Path
import hashlib
import json
from dataclasses import asdict
from sklearn.model_selection import ParameterGrid

class StaticPreprocessor:
    """Handles network construction and feature preparation that doesn't change with hyperparameters"""
    def __init__(self, granularity: str, year: int = 2013, active_features: list = ['position', 'time_series', 'duration_curves', 'ramp_duration_curves', 'intra_correlation'], inter_correlation: bool = True):
        self.config = Config(
            year=year,
            granularity=granularity,
            active_features=active_features,
        )
        self.config.model_hyper.inter_correlation = inter_correlation

        self.network_data = None
        self.fine_data = None
        self.ntw = None

    def preprocess(self):
        """Run all static preprocessing steps"""
        # Build network based on granularity
        config = self.config
        if self.config.data_preproc.granularity == "coarse":
            fine_builder = FineNetwork(config)
            self.fine_data = fine_builder.build_fine_ntw()
            coarse_builder = CoarseNetwork(config, self.fine_data)
            self.network_data = coarse_builder.build_coarse_ntw()
        elif self.config.data_preproc.granularity == "fine":
            fine_builder = FineNetwork(config)
            self.fine_data = fine_builder.build_fine_ntw()
            self.network_data = self.fine_data
        else:
            raise ValueError("Unsupported granularity. Use 'fine' or 'coarse'.")
        
        self.ntw = Network(
            self.network_data["nodes"],
            self.network_data["time_series"],
            config
        )

        return self

class DynamicProcessor:
    """Handles parameter-dependent operations that can vary during grid search"""
    def __init__(self, preprocessor: StaticPreprocessor):
        self.preprocessor = preprocessor
        self.base_config = preprocessor.config
        self.ntw = preprocessor.ntw

    def run_with_hyperparameters(self, 
                               weights: dict,
                               n_representative_nodes: int,
                               k_representative_days: int) -> tuple[dict, str]:
        """Execute parameter-dependent pipeline steps"""
        ntw = self.ntw
        if ntw is None:
            raise ValueError("Network data not initialized. Run static preprocessing first.")
        
        # Update config with current hyperparameters
        current_config = self.base_config
        current_config.model_hyper.weights = weights
        current_config.model_hyper.n_representative_nodes = n_representative_nodes
        current_config.model_hyper.k_representative_days = k_representative_days

        # Spatial aggregation
        spatial_agg = SpatialAggregation(ntw.features, current_config)
        spatial_results = spatial_agg.aggregate()

        # Temporal aggregation
        temporal_agg = TemporalAggregation(current_config, ntw.features, spatial_results)
        temporal_results = temporal_agg.aggregate()

        # Process and save results
        results = Results(current_config, self.preprocessor.network_data, spatial_results, temporal_results)
        
        return results.results, self._get_result_hash(current_config)

    def _get_result_hash(self, config: Config) -> str:
        """Generate unique hash for current configuration"""
        config_dict = {
            "data_preproc" : asdict(config.data_preproc),
            "model_hyper": config.model_hyper.__dict__
        }
        version_hash = hashlib.md5(json.dumps(config_dict, sort_keys=True).encode()).hexdigest()[:8]   
        return version_hash

In [54]:
# Static preprocessing (run once)
static_prep = StaticPreprocessor(granularity="fine").preprocess()

# Dynamic processor (reused for multiple runs)
processor = DynamicProcessor(static_prep)

res, version_hash = processor.run_with_hyperparameters(
    weights={
        'position': 1.0,
        'time_series': 0.8,
        'duration_curves': 1.2,
        'ramp_duration_curves': 1.0,
        'intra_correlation': 1.0,
        'inter_correlation': 1.0
    },
    n_representative_nodes=10,
    k_representative_days=15
)

# # Define parameter grid
# param_grid = {
#     'weights': [
#         {'position': 1.0, 'time_series': 0.8, 'duration_curves': 1.2},
#         {'position': 0.8, 'time_series': 1.0, 'duration_curves': 1.0}
#     ],
#     'n_representative_nodes': [10, 15],
#     'k_representative_days': [8, 10]
# }

# # Execute grid search
# results = {}
# for params in ParameterGrid(param_grid):
#     res, version_hash = processor.run_with_hyperparameters(
#         weights=params['weights'],
#         n_representative_nodes=params['n_representative_nodes'],
#         k_representative_days=params['k_representative_days']
#     )
#     results[version_hash] = {'params': params, 'results': res}


  counties.geometry.centroid.y,

  counties.geometry.centroid.x


Computing distance metrics for 385 nodes. This might take a while...
Computing position distance...
position distance computed in 0:00:01.038343.
Computing time_series distance...
time_series distance computed in 0:00:02.813008.
Computing ramp_duration_curves distance...
ramp_duration_curves distance computed in 0:00:02.180739.
Computing duration_curves distance...
duration_curves distance computed in 0:00:02.343365.
Computing intra_correlation distance...
intra_correlation distance computed in 0:00:00.011087.
Computing inter_correlation distance...
inter_correlation distance computed in 0:00:56.406467.
All distance metrics computed.
Starting normalization...
Normalization completed in 0:00:00.017601.
Total computation time: 0:01:04.810610.
Saving metrics...
Metrics saved to c:\Users\g630d\Documents\00_Cours\2024-2025_MIT\00 Thesis\dev\results\distance_metrics\v6cdfff21.
Results saved to c:\Users\g630d\Documents\00_Cours\2024-2025_MIT\00 Thesis\dev\results\joint_aggregation_results\v85

In [48]:
version_hash

'91c16280'