In [0]:
def write_dataset(country_code, dataset_name, df):
    """
    Write a single dataset to a Delta table.
    """
    if df is None or len(df) == 0:
        return

    # Replace spaces and invalid characters in column names for pandas DataFrame
    df.columns = [col.replace(" ", "_") for col in df.columns]

    table_name = f"{country_code.lower()}__{dataset_name}"
    full_name = f"{Config.DATABASE}.{table_name}"

    spark_df = spark.createDataFrame(df.reset_index())

    (spark_df.write
        .format("delta")
        .mode("overwrite")        # A single dataset → safe overwrite
        .saveAsTable(full_name))

    print(f"  → Saved to Delta: {full_name} ({spark_df.count()} rows)")

API_KEY = '7b785108-53d7-42f8-931e-3d28c4323c68'

COUNTRIES = {
    'ES': 'Spain', 'PT': 'Portugal', 'FR': 'France', 'DE': 'Germany',
    'IT': 'Italy', 'GB': 'Great Britain', 'NL': 'Netherlands',
    'BE': 'Belgium', 'AT': 'Austria', 'CH': 'Switzerland', 'PL': 'Poland',
    'CZ': 'Czechia', 'DK': 'Denmark', 'SE': 'Sweden', 'NO': 'Norway',
    'FI': 'Finland', 'GR': 'Greece', 'IE': 'Ireland', 'RO': 'Romania',
    'BG': 'Bulgaria', 'HU': 'Hungary', 'SK': 'Slovakia', 'SI': 'Slovenia',
    'HR': 'Croatia', 'EE': 'Estonia', 'LT': 'Lithuania', 'LV': 'Latvia'
}

# YOU REQUESTED THIS EXACT BLOCK KEPT UNCHANGED
VALID_BORDERS = {
    ('ES', 'PT'), ('ES', 'FR'),
    ('FR', 'BE'), ('FR', 'CH'), ('FR', 'DE'), ('FR', 'IT'),
    ('BE', 'NL'), ('BE', 'DE'),
    ('NL', 'DE'), ('NL', 'GB'),
    ('GB', 'NL'), ('GB', 'FR'), ('GB', 'IE'),
    ('DE', 'CZ'), ('DE', 'PL'), ('DE', 'CH'), ('DE', 'DK'), ('DE', 'AT'),
    ('DK', 'DE'), ('DK', 'NO'), ('DK', 'SE'),
    ('SE', 'NO'), ('SE', 'FI'), ('SE', 'DK'),
    ('NO', 'NL'), ('NO', 'GB'), ('NO', 'SE'), ('NO', 'DK'),
    ('FI', 'EE'), ('FI', 'SE'),
    ('EE', 'LV'),
    ('LV', 'LT'),
    ('LT', 'PL'),
    ('PL', 'SK'), ('PL', 'CZ'),
    ('CZ', 'AT'), ('CZ', 'SK'),
    ('AT', 'SI'), ('AT', 'IT'), ('AT', 'CH'), ('AT', 'CZ'), ('AT', 'DE'),
    ('SI', 'HR'), ('SI', 'IT'), ('SI', 'AT'),
    ('HR', 'HU'), ('HR', 'SI'),
    ('HU', 'SK'), ('HU', 'RO'), ('HU', 'HR'), ('HU', 'AT'),
    ('SK', 'HU'), ('SK', 'CZ'), ('SK', 'PL'),
    ('RO', 'BG'), ('RO', 'HU'),
    ('BG', 'GR'), ('BG', 'RO'),
    ('GR', 'BG')
}

START_DATE = '2023-01-01'
END_DATE   = '2025-10-31'

DATABASE = "european_grid_raw"

# ============================================================================
# DELTA WRITER
# ============================================================================

def write_dataset(country_code, dataset_name, df):
    """
    Write a single dataset to a Delta table.
    """
    if df is None or len(df) == 0:
        return

    # Replace spaces and invalid characters in column names for pandas DataFrame
    df.columns = [col.replace(" ", "_") for col in df.columns]

    table_name = f"{country_code.lower()}__{dataset_name}"
    full_name = f"{Config.DATABASE}.{table_name}"

    spark_df = spark.createDataFrame(df.reset_index())

    (spark_df.write
        .format("delta")
        .mode("overwrite")        # A single dataset → safe overwrite
        .saveAsTable(full_name))

    print(f"  → Saved to Delta: {full_name} ({spark_df.count()} rows)")

# ============================================================================
# DATA COLLECTOR
# ============================================================================

class EuropeanGridDataCollector:

    def __init__(self, api_key):
        self.client = EntsoePandasClient(api_key=api_key)
        self.countries = Config.COUNTRIES

        self.start = pd.Timestamp(Config.START_DATE, tz="UTC")
        self.end   = pd.Timestamp(Config.END_DATE,   tz="UTC")

    # -------------------------------
    # SINGLE COUNTRY DATA
    # -------------------------------
    def collect_country_data(self, country_code):
        c = country_code
        print(f"\n==== Collecting for {c} ({self.countries[c]}) ====")

        # EXACT same logic and ordering preserved
        try:
            print(f"    → load_actual...")
            df = self.client.query_load(c, start=self.start, end=self.end)
            write_dataset(c, "load_actual", df)
        except Exception as e: print(f"    ✗ load_actual: {e}")
        time.sleep(1)

        try:
            print(f"    → load_forecast...")
            df = self.client.query_load_forecast(c, start=self.start, end=self.end)
            write_dataset(c, "load_forecast", df)
        except Exception as e: print(f"    ✗ load_forecast: {e}")
        time.sleep(1)

        try:
            print(f"    → generation...")
            df = self.client.query_generation(c, start=self.start, end=self.end)
            write_dataset(c, "generation", df)
        except Exception as e: print(f"    ✗ generation: {e}")
        time.sleep(1)

        try:
            print(f"    → wind_forecast...")
            df = self.client.query_wind_and_solar_forecast(c, start=self.start, end=self.end, psr_type='B19')
            write_dataset(c, "wind_forecast", df)
        except Exception as e: print(f"    ✗ wind_forecast: {e}")
        time.sleep(1)

        try:
            print(f"    → solar_forecast...")
            df = self.client.query_wind_and_solar_forecast(c, start=self.start, end=self.end, psr_type='B16')
            write_dataset(c, "solar_forecast", df)
        except Exception as e: print(f"    ✗ solar_forecast: {e}")
        time.sleep(1)

        try:
            print(f"    → installed_capacity...")
            df = self.client.query_installed_generation_capacity(c, start=self.start, end=self.end)
            write_dataset(c, "installed_capacity", df)
        except Exception as e: print(f"    ✗ installed_capacity: {e}")

    # -------------------------------
    # CROSS-BORDER FLOWS
    # -------------------------------
    def collect_crossborder_flows(self):
        print("\n=== Collecting Cross-Border Flows ===")

        flows_list = []

        for from_c, to_c in Config.VALID_BORDERS:
            print(f"  → {from_c} ↔ {to_c}...", end="")

            try:
                flow = self.client.query_crossborder_flows(
                    from_c, to_c, start=self.start, end=self.end
                )
                if flow is not None and len(flow) > 0:
                    df = pd.DataFrame(flow)
                    df["from_country"] = from_c
                    df["to_country"]   = to_c
                    flows_list.append(df)
                    print(" ✓")
                else:
                    print(" ✗ No data")
            except:
                print(" ✗ Failed")

            time.sleep(0.5)

        if flows_list:
            df = pd.concat(flows_list, ignore_index=True)
            spark.table(Config.DATABASE + ".crossborder_flows") \
                if spark._jsparkSession.catalog().tableExists(Config.DATABASE + ".crossborder_flows") \
                else None

            spark_df = spark.createDataFrame(df)
            (spark_df.write.format("delta").mode("overwrite")
                .saveAsTable(f"{Config.DATABASE}.crossborder_flows"))

            print("  → Saved cross-border flows table")

    # -------------------------------
    # MAIN COLLECTOR
    # -------------------------------
    def collect_all(self):
        for c in self.countries.keys():
            self.collect_country_data(c)

        self.collect_crossborder_flows()

# ============================================================================
# RUN PIPELINE
# ============================================================================

collector = EuropeanGridDataCollector(api_key=API_KEY)
collector.collect_all()
print("\nCOMPLETE.")
