In [2]:
import requests
import os
import json

# API base URL and token
BASE_URL = "https://renforce.esapro.it/api"
API_TOKEN = "2c23937283ed6125f52c4ba7399c7baebdfb3f051ac9272550847066011d7d5a83807bb0c3c807e2450fcb7fd4843d126badcf4c3f0ee52ca548b43db77e100e48becf05cb5e93529564055dd8a85d9beb089420a91eec2a022fbf8f984d9138a26afc5296e60aed14024c60ddcb95dff130b3f9fb7af13598a48dbc62348389"

# Plant code
PLANT_CODE = "P1130"
RESOLUTION=1

# Headers for authentication
HEADERS = {
    "X-API-Token": API_TOKEN
}

# Function to retrieve plant configurations
def get_plant_configurations(plant_code):
    """
    Retrieves the plant configurations from the API.

    Args:
        plant_code (str): The plant code.

    Returns:
        dict: The API response as a dictionary.
    """
    url = f"{BASE_URL}/{plant_code}/config"
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Failed to retrieve configurations: {response.status_code}, {response.text}")

# Main execution
try:
    # Retrieve configurations
    configurations = get_plant_configurations(PLANT_CODE)

    # Print the response on the screen with better formatting
    print("\nAPI Response:")
    print("=" * 40)
    for config in configurations:
        for key, value in config.items():
            print(f"{key}: {value}")
        print("-" * 40)

    # Save the response to a file
    base_dir = "Data"
    os.makedirs(base_dir, exist_ok=True)
    file_path = os.path.join(base_dir, f"{PLANT_CODE}_configurations.json")
    with open(file_path, "w", encoding="utf-8") as file:
        json.dump(configurations, file, indent=4)
    print(f"\nConfigurations saved to: {file_path}")

    # Create a constant variable for the version
    VERSION = configurations[-1].get("v", "N/A")  # Assuming the version is in the first configuration
    print(f"\nVERSION constant created: {VERSION}")

except Exception as e:
    print("Error:", e)


API Response:
v: 1
t: 1980-01-01 00:00:00
----------------------------------------

Configurations saved to: Data/P1130_configurations.json

VERSION constant created: 1


In [3]:
import requests
import os
import csv
import json

def get_blocks(plant_code, config_version, block_types=None):
    """
    Retrieves a list of blocks (devices) from a specific plant configuration.

    Args:
        plant_code (str): The plant code (e.g., "P1234").
        config_version (int): Configuration identifier.
        block_types (str or list, optional): Type(s) of blocks to filter (e.g., "INVERTER" or ["COUNTER", "INVERTER"]).

    Returns:
        list: Array of block objects representing devices in the plant.
    """
    url = f"{BASE_URL}/{plant_code}/blocks/{config_version}"
    params = {}
    if block_types:
        params["type"] = block_types if isinstance(block_types, str) else ",".join(block_types)
    
    try:
        response = requests.get(url, headers=HEADERS, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            print("Response Status Code:", response.status_code)
            print("Response Text:", response.text)
            raise Exception(f"Failed to retrieve blocks: {response.status_code}, {response.text}")
    except requests.exceptions.RequestException as e:
        raise Exception(f"An error occurred while making the request: {e}")

def save_blocks_to_csv(plant_code, blocks):
    """
    Saves the blocks to a CSV file in the specified directory structure.

    Args:
        plant_code (str): The plant code.
        blocks (list): List of block objects.
    """
    # Define the directory structure
    base_dir = "Data"
    plant_dir = os.path.join(base_dir, plant_code)

    # Create directories if they don't exist
    os.makedirs(plant_dir, exist_ok=True)

    # Define the CSV file path
    csv_file_path = os.path.join(plant_dir, f"{plant_code}_blocks.csv")

    # Write blocks to the CSV file
    with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
        writer = csv.writer(csv_file)
        # Write the header
        writer.writerow(["ID", "Name", "Type", "Description", "OID", "Properties", "Sensors", "Visible", "Enabled"])
        # Write the data
        for block in blocks:
            writer.writerow([
                block.get("id", "N/A"),
                block.get("name", "N/A"),
                block.get("type", "N/A"),
                block.get("desc", "N/A"),
                block.get("oid", "N/A"),
                block.get("props", "N/A"),
                block.get("sens", "N/A"),
                block.get("visible", "N/A"),
                block.get("enabled", "N/A")
            ])

    print(f"Blocks saved to: {csv_file_path}")

def print_blocks(blocks):
    """
    Prints the blocks nicely on the screen.

    Args:
        blocks (list): List of block objects.
    """
    print("\nBlocks List:")
    for block in blocks:
        print(f"ID: {block.get('id', 'N/A')}")
        print(f"Name: {block.get('name', 'N/A')}")
        print(f"Type: {block.get('type', 'N/A')}")
        print(f"Description: {block.get('desc', 'N/A')}")
        print(f"OID: {block.get('oid', 'N/A')}")
        print(f"Properties: {block.get('props', 'N/A')}")
        print(f"Sensors: {block.get('sens', 'N/A')}")
        print(f"Visible: {block.get('visible', 'N/A')}")
        print(f"Enabled: {block.get('enabled', 'N/A')}")
        print("-" * 40)  # Separator for readability

# Main execution
try:
    # Retrieve blocks
    blocks = get_blocks(PLANT_CODE, VERSION)

    # Print blocks nicely
    print_blocks(blocks)

    # Save blocks to CSV
    save_blocks_to_csv(PLANT_CODE, blocks)

except Exception as e:
    print("Error:", e)


Blocks List:
ID: 1
Name: P1130
Type: PLANT
Description: P1130 Collection
OID: 1
Properties: {'alarm_check_timerange': '10:00 - 14:00', 'alarm_irradiance_threshold': '100', 'avoid_night_oos': '0', 'capacity': '4103.2', 'contractual_irr_link': '', 'delivery_period': '30', 'energy_alerts': '0', 'field_alarm_ttl': '10', 'field_data_ttl': '10', 'irr_link.0': '171', 'sunrise_o': '0', 'sunset_o': '0', 'sys_loss': '0.05', 'temp_link.0': '173', 'up_inverter_thld': '0.9', 'up_irr_thld': '600', 'up_irrsd_thld': '0.05', 'up_jbox_thld': '0.9', 'user_irr_link': '', 'user_temp_link': ''}
Sensors: ['J44', 'J30', 'J101', 'J7', 'J34', 'J33', 'J31', 'J1', 'J104', 'J4', 'J102']
Visible: True
Enabled: True
----------------------------------------
ID: 17
Name: Cabina 01
Type: AGGREGATE
Description: 
OID: 1.1
Properties: {'aggr_stop': '1', 'alarm_irradiance_filter': '0', 'capacity': '1370.8', 'context': '', 'irr_link.0': '171', 'minor_aggr': '0', 'temp_link.0': '173', 'user_irr_link': '', 'user_temp_link': 

In [4]:
import requests
import os
import csv


def get_sensors(plant_code, config_version, sensor_types=None):
    """
    Retrieves a list of sensors from a specific plant configuration.

    Args:
        plant_code (str): The plant code (e.g., "P1234").
        config_version (int): Configuration identifier.
        sensor_types (str or list, optional): Type(s) of sensors to filter (e.g., "AC_ACTIVE_ENERGY" or ["AC_ACTIVE_ENERGY", "MODULE_SOLAR_IRRADIANCE"]).

    Returns:
        list: Array of sensor objects.
    """
    url = f"{BASE_URL}/{plant_code}/sensors/{config_version}"
    params = {}
    if sensor_types:
        params["type"] = sensor_types if isinstance(sensor_types, str) else ",".join(sensor_types)
    
    try:
        response = requests.get(url, headers=HEADERS, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            print("Response Status Code:", response.status_code)
            print("Response Text:", response.text)
            raise Exception(f"Failed to retrieve sensors: {response.status_code}, {response.text}")
    except requests.exceptions.RequestException as e:
        raise Exception(f"An error occurred while making the request: {e}")

def save_sensors_to_csv(plant_code, sensors):
    """
    Saves the sensors to a CSV file in the specified directory structure.

    Args:
        plant_code (str): The plant code.
        sensors (list): List of sensor objects.
    """
    # Define the directory structure
    base_dir = "Data"
    plant_dir = os.path.join(base_dir, plant_code)

    # Create directories if they don't exist
    os.makedirs(plant_dir, exist_ok=True)

    # Define the CSV file path
    csv_file_path = os.path.join(plant_dir, f"{plant_code}_sensors.csv")

    # Write sensors to the CSV file
    with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
        writer = csv.writer(csv_file)
        # Write the header
        writer.writerow(["ID", "Name", "Type", "Description", "OID", "Properties", "Source", "Visible", "Enabled"])
        # Write the data
        for sensor in sensors:
            writer.writerow([
                sensor.get("id", "N/A"),
                sensor.get("name", "N/A"),
                sensor.get("type", "N/A"),
                sensor.get("desc", "N/A"),
                sensor.get("oid", "N/A"),
                sensor.get("props", "N/A"),
                sensor.get("source", "N/A"),
                sensor.get("visible", "N/A"),
                sensor.get("enabled", "N/A")
            ])

    print(f"Sensors saved to: {csv_file_path}")

def print_sensors(sensors):
    """
    Prints the sensors nicely on the screen.

    Args:
        sensors (list): List of sensor objects.
    """
    print("\nSensors List:")
    for sensor in sensors:
        print(f"ID: {sensor.get('id', 'N/A')}")
        print(f"Name: {sensor.get('name', 'N/A')}")
        print(f"Type: {sensor.get('type', 'N/A')}")
        print(f"Description: {sensor.get('desc', 'N/A')}")
        print(f"OID: {sensor.get('oid', 'N/A')}")
        print(f"Properties: {sensor.get('props', 'N/A')}")
        print(f"Source: {sensor.get('source', 'N/A')}")
        print(f"Visible: {sensor.get('visible', 'N/A')}")
        print(f"Enabled: {sensor.get('enabled', 'N/A')}")
        print("-" * 40)  # Separator for readability

# Main execution
try:
    plant_code = PLANT_CODE  # Plant code
    config_version = VERSION       # Configuration version
    sensor_types = None      # Retrieve all sensor types

    # Retrieve sensors
    sensors = get_sensors(plant_code, config_version, sensor_types)
    print(len(sensors), "sensors found")
    # Print sensors nicely
    print_sensors(sensors)

    # Save sensors to CSV
    save_sensors_to_csv(plant_code, sensors)
except Exception as e:
    print("Error:", e)

3572 sensors found

Sensors List:
ID: 1
Name: Voltage L1
Type: VOLTAGE
Description: 
OID: 1.1.14:1
Properties: {'alarm_c': '5', 'alarm_debounce': '60', 'alarm_function': '', 'alarm_max_threshold': '0', 'alarm_min_threshold': '0', 'alarm_tdc': '60', 'alarm_tdo': '60', 'alarm_type': '', 'cast': 'U32', 'endian': 'BE', 'is_raw': '1', 'limit_max': '', 'limit_min': '', 'register': '3c552', 'sampling_period': '300'}
Source: sundra
Visible: True
Enabled: True
----------------------------------------
ID: 10
Name: Temperature
Type: TEMPERATURE
Description: 
OID: 1.1.1.1:10
Properties: {'alarm_c': '3', 'alarm_debounce': '300', 'alarm_function': 'LIMITALARM', 'alarm_max_threshold': '70', 'alarm_min_threshold': '-10', 'alarm_tdc': '300', 'alarm_tdo': '300', 'alarm_type': '52', 'c_high_pass': '', 'c_low_pass': '', 'cast': 'U16', 'copy_to': '', 'data_age_lmt': '0', 'endian': 'BE', 'is_raw': '1', 'limit_max': '', 'limit_min': '', 'mask': '', 'mask_value': '1', 'register': '31064', 'sampling_period': '

In [5]:
import pandas as pd


# Path to the sensors file
SENSORS_FILE = f"Data/{PLANT_CODE}/{PLANT_CODE}_sensors.csv"

# Step 1: Load the sensors file
try:
    sensors_metadata = pd.read_csv(SENSORS_FILE)
except FileNotFoundError:
    print(f"File not found: {SENSORS_FILE}")
    exit()  # Ensure the program exits here to avoid further execution

# Step 2: Find the sensor with the name "Energia Prodotta" or "Energy AC DELTA"
if "Name" not in sensors_metadata.columns:
    print("The 'Name' column is missing in the sensors metadata.")
    exit()

sensor_row = sensors_metadata[sensors_metadata["Name"].str.contains("Energia Prodotta", case=False, na=False)]
if sensor_row.empty:
    sensor_row = sensors_metadata[sensors_metadata["Name"].str.contains("Energy AC DELTA", case=False, na=False)]
    if sensor_row.empty:
        print("Sensor with the name 'Energia Prodotta' or 'Energy AC DELTA' not found.")
        exit()

sensor_id = sensor_row.iloc[0]["ID"]
print(f"Sensor ID for the found sensor: {sensor_id}")

Sensor ID for the found sensor: 156


In [6]:
import pandas as pd
from datetime import datetime, timedelta
import requests
import time

# Path to the sensors file
SENSORS_FILE = f"Data/{PLANT_CODE}/{PLANT_CODE}_sensors.csv"

# Step 1: Load the sensors file
try:
    sensors_metadata = pd.read_csv(SENSORS_FILE)
except FileNotFoundError:
    print(f"File not found: {SENSORS_FILE}")
    exit()

# Step 2: Find the sensor with the name "Energia Prodotta" or "Energy AC DELTA"
if "Name" not in sensors_metadata.columns:
    print("The 'Name' column is missing in the sensors metadata.")
    exit()

sensor_row = sensors_metadata[sensors_metadata["Name"].str.contains("Energia Prodotta", case=False, na=False)]
if sensor_row.empty:
    sensor_row = sensors_metadata[sensors_metadata["Name"].str.contains("Energy AC DELTA", case=False, na=False)]
    if sensor_row.empty:
        print("Sensor with the name 'Energia Prodotta' or 'Energy AC DELTA' not found.")
        exit()

sensor_id = sensor_row.iloc[0]["ID"]
print(f"Sensor ID for the found sensor: {sensor_id}")

# Step 3: Retrieve data for the sensor
TREND_URL = f"{BASE_URL}/{PLANT_CODE}/data/trend/"
end_date = datetime.now().replace(minute=0, second=0, microsecond=0)  # Round down to the nearest full hour
max_interval = timedelta(days=92)  # Maximum allowed interval (92 days)
no_data_threshold = timedelta(days=365)  # Stop if no data is returned for more than 1 year

current_end = end_date
last_reading = None
no_data_duration = timedelta(0)

while no_data_duration < no_data_threshold:
    current_start = current_end - max_interval
    payload = {
        "from": current_start.strftime("%Y-%m-%d %H:%M:%S"),
        "to": current_end.strftime("%Y-%m-%d %H:%M:%S"),
        "sensors": [sensor_id],
        "resolution": RESOLUTION
    }

    # Make the POST request to the API
    response = requests.post(TREND_URL, headers=HEADERS, json=payload)

    if response.status_code == 200:
        try:
            data = response.json()
        except ValueError:
            print("Failed to parse JSON response.")
            break

        # Check if data is returned for the sensor
        if str(sensor_id) in data and data[str(sensor_id)]:
            sensor_data = data[str(sensor_id)]
            sensor_df = pd.DataFrame(sensor_data, columns=["timestamp", "value"])
            sensor_df["timestamp"] = pd.to_datetime(sensor_df["timestamp"])  # Ensure timestamp is datetime

            # Update the last reading to the minimum timestamp in this range
            last_reading = sensor_df["timestamp"].min()

            print(f"Data retrieved for range {current_start} to {current_end}.")
            no_data_duration = timedelta(0)  # Reset no data duration
            del sensor_df  # Clear memory
        else:
            print(f"No data for sensor {sensor_id} in range {current_start} to {current_end}.")
            no_data_duration += max_interval  # Increment no data duration
    else:
        print(f"Failed to fetch trend data. HTTP Status Code: {response.status_code}")
        print("Response:", response.text)
        break

    # Move to the previous time range
    current_end = current_start - timedelta(seconds=1)
    time.sleep(1)  # Avoid hitting API rate limits

# Step 4: Save the last reading going backward
if last_reading:
    last_day = last_reading.date()
    print(f"The last day of data for the sensor going backward is: {last_day}")
else:
    print("No data found for the sensor.")

Sensor ID for the found sensor: 156
Data retrieved for range 2025-03-16 15:00:00 to 2025-06-16 15:00:00.
Data retrieved for range 2024-12-14 14:59:59 to 2025-03-16 14:59:59.
Data retrieved for range 2024-09-13 14:59:58 to 2024-12-14 14:59:58.
Data retrieved for range 2024-06-13 14:59:57 to 2024-09-13 14:59:57.
Data retrieved for range 2024-03-13 14:59:56 to 2024-06-13 14:59:56.
Data retrieved for range 2023-12-12 14:59:55 to 2024-03-13 14:59:55.
Data retrieved for range 2023-09-11 14:59:54 to 2023-12-12 14:59:54.
Data retrieved for range 2023-06-11 14:59:53 to 2023-09-11 14:59:53.
Data retrieved for range 2023-03-11 14:59:52 to 2023-06-11 14:59:52.
Data retrieved for range 2022-12-09 14:59:51 to 2023-03-11 14:59:51.
Data retrieved for range 2022-09-08 14:59:50 to 2022-12-09 14:59:50.
Data retrieved for range 2022-06-08 14:59:49 to 2022-09-08 14:59:49.
Data retrieved for range 2022-03-08 14:59:48 to 2022-06-08 14:59:48.
No data for sensor 156 in range 2021-12-06 14:59:47 to 2022-03-08 1

The code under retrieves all the sensors, and specifies which sensors actually have data

In [6]:
# import requests
# import pandas as pd
# import time
# import os
# from datetime import datetime, timedelta

# # API endpoint URLs
# TREND_URL = f"{BASE_URL}/{PLANT_CODE}/data/trend/"
# SENSORS_URL = f"{BASE_URL}/{PLANT_CODE}/sensors/{config_version}"

# # Function to save sensors with data (ID and Name) to a CSV file
# def save_sensors_with_data(plant_code, sensors_with_data, sensors_metadata):
#     """
#     Saves the list of sensors with data (ID and Name) to a CSV file in the specified directory structure.

#     Args:
#         plant_code (str): The plant code.
#         sensors_with_data (list): List of sensor IDs with data.
#         sensors_metadata (pd.DataFrame): DataFrame containing sensor metadata.
#     """
#     # Define the directory structure
#     base_dir = "Data"
#     plant_dir = os.path.join(base_dir, plant_code)

#     # Create directories if they don't exist
#     os.makedirs(plant_dir, exist_ok=True)

#     # Define the CSV file path
#     csv_file_path = os.path.join(plant_dir, f"{plant_code}_sensors_with_data.csv")

#     # Filter metadata to include only sensors with data
#     matching_sensors = sensors_metadata[sensors_metadata["id"].isin(sensors_with_data)]  # Use "id" instead of "ID"

#     # Save the matching sensors (ID and Name) to the CSV file
#     matching_sensors[["id", "name"]].to_csv(csv_file_path, index=False)  # Use "id" and "name" columns

#     print(f"Sensors with data saved to: {csv_file_path}")

# # Fetch the list of sensors
# response_sensors = requests.get(SENSORS_URL, headers=HEADERS)

# if response_sensors.status_code == 200:
#     # Parse the JSON response to get the list of sensors
#     sensors_data = response_sensors.json()
#     sensors_metadata = pd.DataFrame(sensors_data)  # Convert sensor metadata to a DataFrame
#     sensor_ids = sensors_metadata["id"].tolist()  # Get all sensor IDs
#     print(f"Total sensor IDs: {len(sensor_ids)}")
    
#     # Use the last_day from the previous code as the start_date
#     try:
#         # Use the last_day from the previous code as the start_date
#         start_date = datetime.combine(last_day, datetime.min.time())  # Convert last_day (date) to datetime
#     except FileNotFoundError:
#         print("No previous last_day found. Using default start date.")
#         start_date = datetime(2025, 4, 15, 0, 0, 0)  # Default start date

#     # Define the end date
#     end_date = datetime(2025, 5, 15, 23, 59, 59)  # End date

#     # Define maximum time ranges for each resolution
#     resolution_max_intervals = {
#         0: timedelta(days=92),    # 15-minute resolution
#         1: timedelta(days=368),   # Hourly resolution
#         2: timedelta(days=7300),  # Daily resolution
#         3: timedelta(days=7300),  # Monthly resolution
#         4: timedelta(days=7300)   # Yearly resolution
#     }

#     # Get the maximum interval for the selected resolution
#     max_interval = resolution_max_intervals.get(RESOLUTION, timedelta(days=92))  # Default to 92 days if resolution is unknown

#     # Split the time range into chunks of the maximum interval
#     time_ranges = []
#     current_start = start_date
#     while current_start < end_date:
#         current_end = min(current_start + max_interval, end_date)
#         time_ranges.append((current_start, current_end))
#         current_start = current_end + timedelta(seconds=1)  # Avoid overlapping intervals

#     # Split sensor IDs into batches of 25 sensors per batch
#     batch_size = 25
#     sensor_batches = [sensor_ids[i:i + batch_size] for i in range(0, len(sensor_ids), batch_size)]

#     # Initialize an empty DataFrame
#     df = pd.DataFrame()

#     # List to track sensors with data
#     sensors_with_data = []

#     # Loop through each time range and batch of sensors
#     for time_idx, (start_time, end_time) in enumerate(time_ranges):
#         print(f"Processing time range {time_idx + 1}: {start_time} to {end_time}")
#         for batch_idx, batch in enumerate(sensor_batches):
#             print(f"Processing batch {batch_idx + 1} with sensors: {batch}")
#             payload = {
#                 "from": start_time.strftime("%Y-%m-%d %H:%M:%S"),
#                 "to": end_time.strftime("%Y-%m-%d %H:%M:%S"),
#                 "sensors": batch,
#                 "resolution": RESOLUTION  # Use the selected resolution
#             }

#             # Make the POST request to the API
#             response = requests.post(TREND_URL, headers=HEADERS, json=payload)

#             if response.status_code == 200:
#                 # Parse the JSON response
#                 data = response.json()

#                 # Track sensors with no data
#                 no_data_sensors = []

#                 # Transform the data into a DataFrame
#                 for sensor_id, sensor_data in data.items():
#                     if not sensor_data:
#                         no_data_sensors.append(sensor_id)  # Log sensors with no data
#                         continue
#                     sensors_with_data.append(sensor_id)  # Add sensor to the list of sensors with data
#                     sensor_df = pd.DataFrame(sensor_data, columns=["timestamp", sensor_id])
#                     sensor_df["timestamp"] = pd.to_datetime(sensor_df["timestamp"])  # Ensure timestamp is datetime
#                     if df.empty:
#                         df = sensor_df
#                     else:
#                         df = pd.merge(df, sensor_df, on="timestamp", how="outer")  # Merge on timestamp

#                 # Log sensors with no data for this batch
#                 if no_data_sensors:
#                     print(f"No data returned for sensors in batch {batch_idx + 1}: {', '.join(map(str, no_data_sensors))}")
#                 else:
#                     print(f"All sensors in batch {batch_idx + 1} have data.")
#             else:
#                 print(f"Failed to fetch trend data for batch {batch_idx + 1}. HTTP Status Code: {response.status_code}")
#                 print("Response:", response.text)

#             # Respect the API rate limit (no more than 5 calls per minute)
#             time.sleep(12)  # Wait 12 seconds between calls to stay under the limit

#     # Remove duplicates from the list of sensors with data
#     sensors_with_data = list(set(sensors_with_data))
#     # Display the DataFrame
#     print("DataFrame:")
#     print(df.head())
    
#     # Display the list of sensors with data
#     print("\nSensors with data:")
#     print(sensors_with_data)
#     print(f"Total sensors with data: {len(sensors_with_data)}")
    
#     # Save the list of sensors with data (ID and Name) to a CSV file
#     save_sensors_with_data(PLANT_CODE, sensors_with_data, sensors_metadata)

#     # Calculate and display the number of missing values
#     if not df.empty:
#         missing_values = df.isna().sum().sum()
#         print(f"Total missing values in the DataFrame: {missing_values}")

# else:
#     print(f"Failed to fetch sensors. HTTP Status Code: {response_sensors.status_code}")
#     print("Response:", response_sensors.text)

In [97]:
import requests
import pandas as pd
import time
import os
from datetime import datetime, timedelta

# API endpoint URLs
TREND_URL = f"{BASE_URL}/{PLANT_CODE}/data/trend/"
SENSORS_URL = f"{BASE_URL}/{PLANT_CODE}/sensors/{config_version}"

# Define the specific sensors to process
sensor_ids = ["timestamp", "D.171.ISIRR", "172", "D.156.DELTA", "156"]

# Define the file path
file_path = f"data_{PLANT_CODE}_{RESOLUTION}.csv"

# Check if the file exists and has content
if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
    print(f"File '{file_path}' exists and is not empty. Loading data into df.")
    df = pd.read_csv(file_path, parse_dates=["timestamp"], index_col="timestamp")
else:
    print(f"File '{file_path}' does not exist or is empty. Fetching data from the API.")

    # Use the last_day as the start_date
    try:
        start_date = datetime.combine(last_day, datetime.min.time())  # Convert last_day (date) to datetime
    except NameError:
        print("No previous last_day found. Using default start date.")
        start_date = datetime(2025, 4, 15, 0, 0, 0)  # Default start date

    # Define the end date
    end_date = datetime(2025, 5, 15, 23, 59, 59)  # Example end date
    max_interval = timedelta(days=92)  # Maximum interval for resolution 0

    # Split the time range into chunks of the maximum interval
    time_ranges = []
    current_start = start_date
    while current_start < end_date:
        current_end = min(current_start + max_interval, end_date)
        time_ranges.append((current_start, current_end))
        current_start = current_end + timedelta(seconds=1)  # Avoid overlapping intervals

    # Initialize a list to store all sensor data
    all_data = []

    # Loop through each time range
    for time_idx, (start_time, end_time) in enumerate(time_ranges):
        print(f"Processing time range {time_idx + 1}: {start_time} to {end_time}")
        payload = {
            "from": start_time.strftime("%Y-%m-%d %H:%M:%S"),
            "to": end_time.strftime("%Y-%m-%d %H:%M:%S"),
            "sensors": sensor_ids,
            "resolution": RESOLUTION  # Use the selected resolution
        }

        # Make the POST request to the API
        response = requests.post(TREND_URL, headers=HEADERS, json=payload)

        if response.status_code == 200:
            # Parse the JSON response
            data = response.json()

            # Transform the data into a long-format DataFrame
            for sensor_id, sensor_data in data.items():
                if not sensor_data:
                    print(f"No data returned for sensor: {sensor_id}")
                    continue
                sensor_df = pd.DataFrame(sensor_data, columns=["timestamp", "value"])
                sensor_df["timestamp"] = pd.to_datetime(sensor_df["timestamp"])  # Ensure timestamp is datetime
                sensor_df["sensor_id"] = sensor_id  # Add a column to identify the sensor
                all_data.append(sensor_df)  # Append to the list of all data
        else:
            print(f"Failed to fetch trend data. HTTP Status Code: {response.status_code}")
            print("Response:", response.text)

        # Respect the API rate limit (no more than 5 calls per minute)
        time.sleep(12)  # Wait 12 seconds between calls to stay under the limit

    if all_data:
        long_df = pd.concat(all_data, ignore_index=True)

        # Pivot the long-format DataFrame to wide format
        df = long_df.pivot(index="timestamp", columns="sensor_id", values="value")

        # Flatten the multi-level column index (if it exists)
        if isinstance(df.columns, pd.MultiIndex):  # Check if columns are multi-level
            df.columns = df.columns.get_level_values(0)  # Flatten multi-level columns

        # Ensure 'timestamp' is the index
        df.index.name = "timestamp"  # Explicitly name the index as 'timestamp'

        # Save the cleaned DataFrame back to the CSV file
        df.to_csv(file_path, index=True)  # Save with the index

        # Verify the structure of the saved CSV file
        with open(file_path, "r") as f:
            print("Saved CSV File Content:")
            print(f.read())
    else:
        # If no data was fetched, initialize an empty DataFrame
        df = pd.DataFrame()

# Reload the DataFrame to verify the structure
# Reload the DataFrame from the file
try:
    df = pd.read_csv(file_path, parse_dates=["timestamp"], index_col="timestamp")

    # Remove the name from the index
    df.index.name = None  # Remove the name of the index

    # Display the cleaned DataFrame
    print("Cleaned DataFrame:")
    print(df.head())
except ValueError as e:
    print(f"Error loading CSV: {e}")
    print("CSV content might be missing the 'timestamp' column. Please check the file.")

File 'data_P1130_1.csv' exists and is not empty. Loading data into df.
Cleaned DataFrame:
                            156    172  D.156.DELTA  D.171.ISIRR
2022-03-18 07:00:00  47487776.0  3.226        440.0      675.620
2022-03-18 08:00:00  47491120.0  4.607       3344.0      836.143
2022-03-18 09:00:00  47494692.0  5.690       3572.0      939.744
2022-03-18 10:00:00  47498144.0  6.827       3452.0      972.460
2022-03-18 11:00:00  47501184.0  7.632       3040.0      792.876


In [103]:
df.isna().sum()

156            0
172            0
D.156.DELTA    0
D.171.ISIRR    0
dtype: int64

In [102]:
import numpy as np
# Apply forward fill to the "156" column
df["156"] = df["156"].ffill()
# Replace NaN values in the "D.156.DELTA" column with 0
df["D.156.DELTA"] = df["D.156.DELTA"].fillna(0)
df["172"] = df["172"].ffill()
# Function to fill NaN values based on the condition
def fill_missing_values(series):
    for i in range(len(series)):
        if pd.isna(series[i]):  # Check if the current value is NaN
            if i + 1 < len(series) and series[i + 1] == 0:  # If the next value is 0
                series[i] = 0
            else:  # Otherwise, take the average of the surrounding values
                prev_value = series[i - 1] if i - 1 >= 0 else np.nan
                next_value = series[i + 1] if i + 1 < len(series) else np.nan
                series[i] = np.nanmean([prev_value, next_value])  # Average of surrounding values
    return series

df["D.171.ISIRR"] = fill_missing_values(df["D.171.ISIRR"].values)

# Display the first few rows to verify
print(df.head())


                            156    172  D.156.DELTA  D.171.ISIRR
2022-03-18 07:00:00  47487776.0  3.226        440.0      675.620
2022-03-18 08:00:00  47491120.0  4.607       3344.0      836.143
2022-03-18 09:00:00  47494692.0  5.690       3572.0      939.744
2022-03-18 10:00:00  47498144.0  6.827       3452.0      972.460
2022-03-18 11:00:00  47501184.0  7.632       3040.0      792.876


In [110]:
import pandas as pd

# Load the CSV file into a DataFrame
file_path = "/Users/rafaelcrismar/Desktop/auto-ml_Preprocessing/dataexport_20250612T180411.csv"
df_meteo = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
print(df_meteo.head())

                             variable  20220318T0000  20220318T0100  \
0               SARAH DNI - backwards            0.0            0.0   
1   SARAH Diffuse Shortwave Radiation            0.0            0.0   
2  SARAH Diffuse Shortwave Radiation@            0.0            0.0   
3    SARAH Direct Shortwave Radiation            0.0            0.0   
4   SARAH Direct Shortwave Radiation@            0.0            0.0   

   20220318T0200  20220318T0300  20220318T0400  20220318T0500  20220318T0600  \
0            0.0            NaN            NaN            NaN            NaN   
1            0.0            NaN            NaN            NaN            NaN   
2            0.0            0.0            0.0            0.0            0.0   
3            0.0            NaN            NaN            NaN            NaN   
4            0.0            0.0            0.0            0.0            0.0   

   20220318T0700  20220318T0800  ...  20240128T2100  20240128T2200  \
0            NaN      

In [111]:
df_meteo

Unnamed: 0,variable,20220318T0000,20220318T0100,20220318T0200,20220318T0300,20220318T0400,20220318T0500,20220318T0600,20220318T0700,20220318T0800,...,20240128T2100,20240128T2200,20240128T2300,20240129T0000,20240129T0100,20240129T0200,20240129T0300,20240129T0400,20240129T0500,20240129T0600
0,SARAH DNI - backwards,0.0,0.0,0.0,,,,,,763.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,155.0
1,SARAH Diffuse Shortwave Radiation,0.0,0.0,0.0,,,,,,142.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0
2,SARAH Diffuse Shortwave Radiation@,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,358.4164,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.563559
3,SARAH Direct Shortwave Radiation,0.0,0.0,0.0,,,,,,437.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0
4,SARAH Direct Shortwave Radiation@,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,266.91025,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.709076
5,SARAH Global tilted radiation GTI,0.0,0.0,0.0,,,,,,818.3952,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,58.950546
6,SARAH Photosynthetic active radiation,0.0,0.0,0.0,,,,,,277.91998,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.599999
7,SARAH Photosynthetic photon flux density,0.0,0.0,0.0,,,,,,1270.0944,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,43.871998
8,SARAH Shortwave Radiation,0.0,0.0,0.0,,,,,,579.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0
9,METEOSAT Cloud Mask,,,,,,,,,,...,,,,,,,,,,


In [118]:
# Transpose the DataFrame to make columns the index
df_flipped = df_meteo.transpose()

# Display the flipped DataFrame
print("Flipped DataFrame:")
print(df_flipped.head())

Flipped DataFrame:
                                  0                                  1   \
variable       SARAH DNI - backwards  SARAH Diffuse Shortwave Radiation   
20220318T0000                    0.0                                0.0   
20220318T0100                    0.0                                0.0   
20220318T0200                    0.0                                0.0   
20220318T0300                    NaN                                NaN   

                                               2   \
variable       SARAH Diffuse Shortwave Radiation@   
20220318T0000                                 0.0   
20220318T0100                                 0.0   
20220318T0200                                 0.0   
20220318T0300                                 0.0   

                                             3   \
variable       SARAH Direct Shortwave Radiation   
20220318T0000                               0.0   
20220318T0100                               0.0   
2022031

In [120]:
# Make the first row the column names
df_flipped.columns = df_flipped.iloc[0]  # Set the first row as the header
df_flipped = df_flipped[1:]  # Drop the first row

# Reset the index if needed
df_flipped.reset_index(inplace=True)

# Display the updated DataFrame
print("Updated DataFrame with First Row as Columns:")
print(df_flipped.head())

Updated DataFrame with First Row as Columns:
variable          index SARAH DNI - backwards  \
0         20220318T0000                   0.0   
1         20220318T0100                   0.0   
2         20220318T0200                   0.0   
3         20220318T0300                   NaN   
4         20220318T0400                   NaN   

variable SARAH Diffuse Shortwave Radiation SARAH Diffuse Shortwave Radiation@  \
0                                      0.0                                0.0   
1                                      0.0                                0.0   
2                                      0.0                                0.0   
3                                      NaN                                0.0   
4                                      NaN                                0.0   

variable SARAH Direct Shortwave Radiation SARAH Direct Shortwave Radiation@  \
0                                     0.0                               0.0   
1              

In [None]:
# Reset the current index to drop it
df_flipped.reset_index(drop=True, inplace=True)

# Set the "index" column as the new index
df_flipped.set_index("index", inplace=True)

# Ensure the new index is in datetime format
df_flipped.index = pd.to_datetime(df_flipped.index, errors="coerce")

# Remove the name of the index
df_flipped.index.name = None
# Explicitly remove the name of the columns index
df_flipped.columns.name = None


# Display the updated DataFrame
print("Updated DataFrame:")
print(df_flipped.head())

Updated DataFrame:
variable            SARAH DNI - backwards SARAH Diffuse Shortwave Radiation  \
2022-03-18 00:00:00                   0.0                               0.0   
2022-03-18 01:00:00                   0.0                               0.0   
2022-03-18 02:00:00                   0.0                               0.0   
2022-03-18 03:00:00                   NaN                               NaN   
2022-03-18 04:00:00                   NaN                               NaN   

variable            SARAH Diffuse Shortwave Radiation@  \
2022-03-18 00:00:00                                0.0   
2022-03-18 01:00:00                                0.0   
2022-03-18 02:00:00                                0.0   
2022-03-18 03:00:00                                0.0   
2022-03-18 04:00:00                                0.0   

variable            SARAH Direct Shortwave Radiation  \
2022-03-18 00:00:00                              0.0   
2022-03-18 01:00:00                          

In [127]:
# Explicitly remove the name of the columns index
df_flipped.columns.name = None

In [128]:
df_flipped

Unnamed: 0,SARAH DNI - backwards,SARAH Diffuse Shortwave Radiation,SARAH Diffuse Shortwave Radiation@,SARAH Direct Shortwave Radiation,SARAH Direct Shortwave Radiation@,SARAH Global tilted radiation GTI,SARAH Photosynthetic active radiation,SARAH Photosynthetic photon flux density,SARAH Shortwave Radiation,METEOSAT Cloud Mask,...,METEOSAT Cloud Top Height,METEOSAT DNI - backwards,METEOSAT Diffuse Shortwave Clearsky Radiation,METEOSAT Diffuse Shortwave Radiation,METEOSAT Direct Shortwave Radiation,METEOSAT Shortwave Radiation,NEMS4 Sunshine Duration,NEMS4 Cloud Cover Total,METEOSAT Clearsky Shortwave,SARAH Clearsky Shortwave
2022-03-18 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,0.0,0.0,,0.0
2022-03-18 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,0.0,0.0,,0.0
2022-03-18 02:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,0.0,13.0,,0.0
2022-03-18 03:00:00,,,0.0,,0.0,,,,,,...,,,,,,,0.0,46.0,,
2022-03-18 04:00:00,,,0.0,,0.0,,,,,,...,,,,,,,0.0,25.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-29 02:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,0.0,62.0,,0.0
2024-01-29 03:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,0.0,87.0,,0.0
2024-01-29 04:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,0.0,76.0,,0.0
2024-01-29 05:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,0.0,46.0,,0.0


In [109]:
# Identify the columns to pivot (all columns starting from "20220318T0000" onwards)
columns_to_pivot = df_meteo.columns[df_meteo.columns.get_loc("20220318T0000"):]

# Pivot the DataFrame
df_meteo_pivoted = df_meteo.melt(
    id_vars=[col for col in df_meteo.columns if col not in columns_to_pivot],  # Keep other columns as identifiers
    value_vars=columns_to_pivot,  # Columns to pivot
    var_name="datetime",          # Name for the new column holding the pivoted column names
    value_name="value"            # Name for the new column holding the values
)

# Display the pivoted DataFrame
print("Pivoted DataFrame:")
print(df_meteo_pivoted.head())

Pivoted DataFrame:
  location       lat       lon  asl                           variable  unit  \
0      NaN  44.80000  27.50000  NaN              SARAH DNI - backwards  W/m²   
1      NaN  44.80000  27.50000  NaN  SARAH Diffuse Shortwave Radiation  W/m²   
2      NaN  44.80000  27.50000  NaN  SARAH Diffuse Shortwave Radiation  W/m²   
3      NaN  44.80000  27.50000  NaN   SARAH Direct Shortwave Radiation  W/m²   
4      NaN  44.80000  27.50000  NaN   SARAH Direct Shortwave Radiation  W/m²   

         level timeResolution aggregation       datetime value  
0          sfc         hourly        none  20220318T0000   0.0  
1          sfc         hourly        none  20220318T0000   0.0  
2  instant@sfc         hourly        none  20220318T0000   0.0  
3          sfc         hourly        none  20220318T0000   0.0  
4  instant@sfc         hourly        none  20220318T0000   0.0  


In [63]:
# Save the DataFrame to a CSV file
df.to_csv("output_data.csv", index=True)  # Save with the index

print("DataFrame saved to 'output_data.csv'")

DataFrame saved to 'output_data.csv'


In [141]:
df=df.drop(columns=["156"])

In [143]:
df_flipped.tail(50)

Unnamed: 0,SARAH DNI - backwards,SARAH Diffuse Shortwave Radiation,SARAH Diffuse Shortwave Radiation@,SARAH Direct Shortwave Radiation,SARAH Direct Shortwave Radiation@,SARAH Global tilted radiation GTI,SARAH Photosynthetic active radiation,SARAH Photosynthetic photon flux density,SARAH Shortwave Radiation,METEOSAT Cloud Mask,...,METEOSAT Cloud Top Height,METEOSAT DNI - backwards,METEOSAT Diffuse Shortwave Clearsky Radiation,METEOSAT Diffuse Shortwave Radiation,METEOSAT Direct Shortwave Radiation,METEOSAT Shortwave Radiation,NEMS4 Sunshine Duration,NEMS4 Cloud Cover Total,METEOSAT Clearsky Shortwave,SARAH Clearsky Shortwave
2024-01-27 05:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,0.0,100.0,,0.0
2024-01-27 06:00:00,91.0,10.0,14.98839,6.0,5.709768,39.825687,7.68,35.0976,16.0,,...,,,,,,,0.0,100.0,,18.0
2024-01-27 07:00:00,60.0,66.0,105.291695,11.0,43.979805,132.16699,36.96,168.9072,77.0,,...,,,,,,,0.0,100.0,,143.0
2024-01-27 08:00:00,26.0,111.0,51.2756,7.0,102.54393,139.47292,56.64,258.84482,118.0,,...,,,,,,,0.0,100.0,,280.0
2024-01-27 09:00:00,65.0,183.0,24.205858,25.0,212.15198,263.06085,99.84,456.2688,208.0,,...,,,,,,,0.0,100.0,,384.0
2024-01-27 10:00:00,6.0,156.0,115.13462,2.0,50.518333,152.59004,75.84,346.5888,158.0,,...,,,,,,,15.151516,74.0,,442.0
2024-01-27 11:00:00,12.0,159.0,121.96616,5.0,38.944176,160.96909,78.72,359.75043,164.0,,...,,,,,,,39.999996,33.0,,446.0
2024-01-27 12:00:00,30.0,126.0,108.36172,12.0,17.41034,152.31721,66.24,302.7168,138.0,,...,,,,,,,0.0,100.0,,396.0
2024-01-27 13:00:00,240.0,100.0,82.61765,72.0,57.34321,290.71066,82.56,377.2992,172.0,,...,,,,,,,0.0,100.0,,297.0
2024-01-27 14:00:00,22.0,68.0,43.09473,5.0,2.08234,80.796005,35.04,160.13281,73.0,,...,,,,,,,0.0,100.0,,163.0


In [142]:
df.loc["2023-02-10"]

Unnamed: 0,172,D.156.DELTA,D.171.ISIRR
2023-02-10 00:00:00,-9.846,0.0,1.843
2023-02-10 01:00:00,-9.846,0.0,1.874
2023-02-10 02:00:00,-9.846,0.0,1.851
2023-02-10 03:00:00,-9.846,0.0,1.833
2023-02-10 04:00:00,-9.846,0.0,1.85
2023-02-10 05:00:00,-9.846,48.0,26.368
2023-02-10 06:00:00,-8.977,724.0,204.776
2023-02-10 07:00:00,-6.101,2016.0,427.565
2023-02-10 08:00:00,-1.333,2760.0,593.535
2023-02-10 09:00:00,3.126,2740.0,808.19


In [144]:
# Merge df and df_flipped on their indices, keeping all records from df
df_merged = df.merge(df_flipped, how="left", left_index=True, right_index=True)

# Display the merged DataFrame
print("Merged DataFrame:")
print(df_merged.head())

Merged DataFrame:
                       172  D.156.DELTA  D.171.ISIRR SARAH DNI - backwards  \
2022-03-18 07:00:00  3.226        440.0      675.620                   NaN   
2022-03-18 08:00:00  4.607       3344.0      836.143                 763.0   
2022-03-18 09:00:00  5.690       3572.0      939.744                 795.0   
2022-03-18 10:00:00  6.827       3452.0      972.460                 817.0   
2022-03-18 11:00:00  7.632       3040.0      792.876                 818.0   

                    SARAH Diffuse Shortwave Radiation  \
2022-03-18 07:00:00                               NaN   
2022-03-18 08:00:00                             142.0   
2022-03-18 09:00:00                             151.0   
2022-03-18 10:00:00                             156.0   
2022-03-18 11:00:00                             157.0   

                    SARAH Diffuse Shortwave Radiation@  \
2022-03-18 07:00:00                                0.0   
2022-03-18 08:00:00                           358.4164

In [166]:
df_filtered.to_csv("df_filteredd.csv", index=True)  # Save the merged DataFrame to a CSV file

In [150]:
# Select rows up to the specified timestamp
df_filtered = df_merged.loc[:"2024-01-29 06:00:00"]

# Display the filtered DataFrame
print("Filtered DataFrame:")
print(df_filtered.head())
print(df_filtered.tail())

Filtered DataFrame:
                       172  D.156.DELTA  D.171.ISIRR SARAH DNI - backwards  \
2022-03-18 07:00:00  3.226        440.0      675.620                   NaN   
2022-03-18 08:00:00  4.607       3344.0      836.143                 763.0   
2022-03-18 09:00:00  5.690       3572.0      939.744                 795.0   
2022-03-18 10:00:00  6.827       3452.0      972.460                 817.0   
2022-03-18 11:00:00  7.632       3040.0      792.876                 818.0   

                    SARAH Diffuse Shortwave Radiation  \
2022-03-18 07:00:00                               NaN   
2022-03-18 08:00:00                             142.0   
2022-03-18 09:00:00                             151.0   
2022-03-18 10:00:00                             156.0   
2022-03-18 11:00:00                             157.0   

                    SARAH Diffuse Shortwave Radiation@  \
2022-03-18 07:00:00                                0.0   
2022-03-18 08:00:00                           358.41

In [167]:
import requests
import pandas as pd

# Define the API URL
url = "https://my.meteoblue.com/packages/multimodel-1h?apikey=BxCsIi1U6N8SoM3w&lat=47.098&lon=15&asl=925&format=json&tz=GMT&forecast_days=7&history_days=4"

# Make the GET request to the API
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Extract the 'time' and other variables from the 'data' field
    time = data["data"]["time"]  # Time column
    variables = {key: value for key, value in data["data"].items() if key != "time"}  # Other variables
    
    # Create a DataFrame
    df = pd.DataFrame(variables, index=pd.to_datetime(time))  # Use 'time' as the index
    df.index.name = None  # Remove the index name for cleaner output
    
    # Display the DataFrame
    print("DataFrame:")
    print(df.head())
else:
    print(f"Failed to fetch data. HTTP Status Code: {response.status_code}")
    print("Response:", response.text)

KeyError: 'data'

In [None]:
import requests
import pandas as pd

# Define the API URL
url = "https://my.meteoblue.com/packages/multimodel-1h?apikey=BxCsIi1U6N8SoM3w&lat=44.791225&lon=27.506639&asl=925&format=json&tz=GMT&forecast_days=7&history_days=4"

# Make the GET request to the API
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Debug: Print the entire response to inspect its structure
    print("API Response:")
    print(data)
    
    # Check if 'data' or 'metadata' exists in the response
    if "data" in data:
        time = data["data"]["time"]  # Time column
        variables = {key: value for key, value in data["data"].items() if key != "time"}  # Other variables
        
        # Create a DataFrame
        df = pd.DataFrame(variables, index=pd.to_datetime(time))  # Use 'time' as the index
        df.index.name = None  # Remove the index name for cleaner output
        
        # Display the DataFrame
        print("DataFrame:")
        print(df.head())
    elif "metadata" in data:
        # Process metadata if 'data' is missing
        metadata = data["metadata"]
        print("Metadata:")
        print(metadata)
        
        # Load metadata into a DataFrame
        metadata_df = pd.DataFrame.from_dict(metadata, orient='index', columns=['Value']).reset_index()
        metadata_df.columns = ['Key', 'Value']  # Rename columns for clarity
        
        # Display the metadata DataFrame
        print("Metadata DataFrame:")
        print(metadata_df)
    else:
        print("Neither 'data' nor 'metadata' keys are present in the response. Please check the API response structure.")
else:
    print(f"Failed to fetch data. HTTP Status Code: {response.status_code}")
    print("Response:", response.text)

API Response:
{'metadata': {'modelrun_updatetime_utc': '2025-06-16 20:46', 'name': '', 'height': 925, 'timezone_abbrevation': 'GMT', 'latitude': 47.098, 'modelrun_utc': ['2025-06-16 12:00', '2025-06-16 12:00', '2025-06-16 12:00', '2025-06-16 00:00', '2025-06-16 00:00', '2025-06-16 12:00', '2025-06-16 12:00', '2025-06-16 00:00', '2025-06-16 12:00', '2025-06-16 12:00', '2025-06-16 18:00', '2025-06-16 12:00', '2025-06-16 12:00', '2025-06-16 12:00', '2025-06-16 12:00', '2025-06-16 12:00', '2025-06-16 00:00', '2025-04-07 00:00', '2025-04-06 00:00', '2025-06-16 00:00', '2025-06-16 12:00'], 'models': ['NEMS4', 'NEMS12', 'NEMSGLOBAL', 'NEMS12_E', 'NEMSGLOBAL_E', 'NMM4', 'NMM22', 'IFS025', 'ICON', 'ICONEU', 'ICOND2', 'HARMONIE', 'GFS05', 'MFGLOBAL', 'MFEU', 'UMGLOBAL10', 'GEM15', 'COSMOM5', 'COSMOI2', 'WRFGR', 'AIFS025'], 'gridpointelevation': [945, 846, 895, 846, 895, 934, 816, 885, 1042, 927, 939, 978, 1024, 1046, 956, 884, 1069, 913, 959, 1033, 885], 'longitude': 15.0, 'utc_timeoffset': 0.0,

In [156]:
# Convert all possible columns in df_filtered to numeric
df_filtered = df_filtered.apply(pd.to_numeric, errors="coerce")

# Display the updated DataFrame
print("DataFrame with Numeric Columns:")
print(df_filtered.head())

DataFrame with Numeric Columns:
                       172  D.156.DELTA  D.171.ISIRR  SARAH DNI - backwards  \
2022-03-18 07:00:00  3.226        440.0      675.620                    NaN   
2022-03-18 08:00:00  4.607       3344.0      836.143                  763.0   
2022-03-18 09:00:00  5.690       3572.0      939.744                  795.0   
2022-03-18 10:00:00  6.827       3452.0      972.460                  817.0   
2022-03-18 11:00:00  7.632       3040.0      792.876                  818.0   

                     SARAH Diffuse Shortwave Radiation  \
2022-03-18 07:00:00                                NaN   
2022-03-18 08:00:00                              142.0   
2022-03-18 09:00:00                              151.0   
2022-03-18 10:00:00                              156.0   
2022-03-18 11:00:00                              157.0   

                     SARAH Diffuse Shortwave Radiation@  \
2022-03-18 07:00:00                             0.00000   
2022-03-18 08:00:00       

In [164]:
# Drop columns where the entire column is NaN
df_filtered = df_filtered.dropna(axis=1, how="all")

# Display the updated DataFrame
print("DataFrame after dropping columns with all NaN values:")
print(df_filtered.head())

DataFrame after dropping columns with all NaN values:
                       172  D.156.DELTA  D.171.ISIRR  SARAH DNI - backwards  \
2022-03-18 07:00:00  3.226        440.0      675.620                    NaN   
2022-03-18 08:00:00  4.607       3344.0      836.143                  763.0   
2022-03-18 09:00:00  5.690       3572.0      939.744                  795.0   
2022-03-18 10:00:00  6.827       3452.0      972.460                  817.0   
2022-03-18 11:00:00  7.632       3040.0      792.876                  818.0   

                     SARAH Diffuse Shortwave Radiation  \
2022-03-18 07:00:00                                NaN   
2022-03-18 08:00:00                              142.0   
2022-03-18 09:00:00                              151.0   
2022-03-18 10:00:00                              156.0   
2022-03-18 11:00:00                              157.0   

                     SARAH Diffuse Shortwave Radiation@  \
2022-03-18 07:00:00                             0.00000   
2022

In [165]:
df_filtered.isna().sum()

172                                           0
D.156.DELTA                                   0
D.171.ISIRR                                   0
SARAH DNI - backwards                       186
SARAH Diffuse Shortwave Radiation           186
SARAH Diffuse Shortwave Radiation@            0
SARAH Direct Shortwave Radiation            186
SARAH Direct Shortwave Radiation@             0
SARAH Global tilted radiation GTI           186
SARAH Photosynthetic active radiation       186
SARAH Photosynthetic photon flux density    186
SARAH Shortwave Radiation                   186
NEMS4 Sunshine Duration                      36
NEMS4 Cloud Cover Total                      36
SARAH Clearsky Shortwave                    186
dtype: int64

In [158]:
df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 16359 entries, 2022-03-18 07:00:00 to 2024-01-29 06:00:00
Data columns (total 24 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   172                                            16359 non-null  float64
 1   D.156.DELTA                                    16359 non-null  float64
 2   D.171.ISIRR                                    16359 non-null  float64
 3   SARAH DNI - backwards                          16173 non-null  float64
 4   SARAH Diffuse Shortwave Radiation              16173 non-null  float64
 5   SARAH Diffuse Shortwave Radiation@             16359 non-null  float64
 6   SARAH Direct Shortwave Radiation               16173 non-null  float64
 7   SARAH Direct Shortwave Radiation@              16359 non-null  float64
 8   SARAH Global tilted radiation GTI              16173 non-null  float64
 9   SARAH Photosynt

In [None]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta

# API endpoint URLs
TREND_URL = f"{BASE_URL}/{PLANT_CODE}/data/trend/"
SENSORS_URL = f"{BASE_URL}/{PLANT_CODE}/sensors/{config_version}"

# Define the specific sensors to process
sensor_ids = ["timestamp", "D.171.ISIRR", "172", "D.156.DELTA"]

# Initialize an empty DataFrame
df = pd.DataFrame()

# Set both start_date and end_date to yesterday
yesterday = datetime.now() - timedelta(days=1)
start_date = yesterday.replace(hour=0, minute=0, second=0)  # Start of yesterday
end_date = yesterday.replace(hour=23, minute=59, second=59)  # End of yesterday

# Define the maximum interval for resolution 0
max_interval = timedelta(days=92)

# Split the time range into chunks of the maximum interval
time_ranges = []
current_start = start_date
while current_start < end_date:
    current_end = min(current_start + max_interval, end_date)
    time_ranges.append((current_start, current_end))
    current_start = current_end + timedelta(seconds=1)  # Avoid overlapping intervals

# Initialize a list to store all sensor data
all_data = []

# Loop through each time range
for time_idx, (start_time, end_time) in enumerate(time_ranges):
    print(f"Processing time range {time_idx + 1}: {start_time} to {end_time}")
    payload = {
        "from": start_time.strftime("%Y-%m-%d %H:%M:%S"),
        "to": end_time.strftime("%Y-%m-%d %H:%M:%S"),
        "sensors": sensor_ids,
        "resolution": RESOLUTION  # Use the selected resolution
    }

    # Make the POST request to the API
    response = requests.post(TREND_URL, headers=HEADERS, json=payload)

    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()

        # Transform the data into a long-format DataFrame
        for sensor_id, sensor_data in data.items():
            if not sensor_data:
                print(f"No data returned for sensor: {sensor_id}")
                continue
            sensor_df = pd.DataFrame(sensor_data, columns=["timestamp", "value"])
            sensor_df["timestamp"] = pd.to_datetime(sensor_df["timestamp"])  # Ensure timestamp is datetime
            sensor_df["sensor_id"] = sensor_id  # Add a column to identify the sensor
            all_data.append(sensor_df)  # Append to the list of all data
    else:
        print(f"Failed to fetch trend data. HTTP Status Code: {response.status_code}")
        print("Response:", response.text)

    # Respect the API rate limit (no more than 5 calls per minute)
    time.sleep(12)  # Wait 12 seconds between calls to stay under the limit

# Combine all data into a single DataFrame
if all_data:
    long_df = pd.concat(all_data, ignore_index=True)

    # Pivot the long-format DataFrame to wide format
    df = long_df.pivot(index="timestamp", columns="sensor_id", values="value")

# Save the DataFrame to a CSV file
df.to_csv(f"data_{PLANT_CODE}_{RESOLUTION}.csv")
print(f"Data saved to data_{PLANT_CODE}_{RESOLUTION}.csv")

In [32]:
import pandas as pd

# Load the CSV file into a DataFrame
file_path = "/Users/rafaelcrismar/Desktop/auto-ml_Preprocessing/csv_44.791225_27.506639_fixed_23_180_PT15M.csv"
df2 = pd.read_csv(file_path, header=None)

# Display the first few rows of the DataFrame
print(df2.head())

         0             1             2             3             4   \
0  air_temp  clearsky_dhi  clearsky_dni  clearsky_ghi  clearsky_gti   
1        19             0             0             0             0   
2        19             0             0             0             0   
3        18             0             0             0             0   
4        18             0             0             0             0   

              5    6    7    8    9                   10                 11  \
0  cloud_opacity  dhi  dni  ghi  gti  precipitation_rate  relative_humidity   
1           30.2    0    0    0    0                   0               82.9   
2           33.8    0    0    0    0                   0               83.3   
3           53.4    0    0    0    0                   0               83.7   
4           75.1    0    0    0    0                   0               84.1   

                          12      13  
0                 period_end  period  
1  2024-06-01T00:15:

In [31]:
import pandas as pd

# Load the CSV file into a DataFrame
file_path = "/Users/rafaelcrismar/Desktop/auto-ml_Preprocessing/data_P1130_0.csv"
df = pd.read_csv(file_path, header=None)

# Display the first few rows of the DataFrame
print(df.head())

                     0        1            2            3
0            timestamp  172.000  D.156.DELTA  D.171.ISIRR
1  2022-03-18 07:00:00    2.529          NaN      152.901
2  2022-03-18 07:15:00    3.044          NaN      162.154
3  2022-03-18 07:30:00    3.237          NaN      174.519
4  2022-03-18 07:45:00    4.031        440.0      186.045


In [56]:
import pandas as pd

# Load the CSV file into a DataFrame
file_path = "weather_data.csv"  # Path to the saved CSV file
df_weather = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
print(df_weather.head())

               time  clearskyshortwave_backwards  clearskyshortwave_instant  \
0  2025-06-11 00:00                         0.00                       0.00   
1  2025-06-11 01:00                         0.00                       0.00   
2  2025-06-11 02:00                         0.00                       0.00   
3  2025-06-11 03:00                         6.57                      31.06   
4  2025-06-11 04:00                        95.94                     168.61   

   convective_precipitation  dif_backwards  dif_instant  \
0                       0.0           0.00         0.00   
1                       0.0           0.00         0.00   
2                       0.0           0.00         0.00   
3                       0.0           1.60         7.51   
4                       0.0          26.47        39.89   

   directshortwaveradiation_backwards  directshortwaveradiation_instant  \
0                                0.00                              0.00   
1                   

In [57]:
df_weather.columns

Index(['time', 'clearskyshortwave_backwards', 'clearskyshortwave_instant',
       'convective_precipitation', 'dif_backwards', 'dif_instant',
       'directshortwaveradiation_backwards',
       'directshortwaveradiation_instant', 'dni_backwards', 'dni_instant',
       'extraterrestrialradiation_backwards',
       'extraterrestrialradiation_instant', 'felttemperature',
       'fog_probability', 'ghi_backwards', 'ghi_instant', 'gni_backwards',
       'gni_instant', 'highclouds', 'isdaylight', 'lowclouds', 'midclouds',
       'pictocode', 'precipitation', 'precipitation_probability', 'rainspot',
       'relativehumidity', 'sealevelpressure', 'snowfraction', 'sunshinetime',
       'temperature', 'totalcloudcover', 'uvindex', 'visibility',
       'winddirection', 'windspeed'],
      dtype='object')

In [59]:
import requests
import pandas as pd

# Define the API endpoint and parameters
api_key = "BxCsIi1U6N8SoM3w"  # Replace with your actual API key
base_url = "https://my.meteoblue.com/history/point"
params = {
    "start": "2022-03-18",  # Start date
    "end": "2025-06-10",  # End date
    "lat": 44.791225,  # Latitude
    "lon": 27.50663,  # Longitude
    "variable": "temperature",  # Weather variable
    "domain": "ERA5T",  # Weather model domain
    "format": "csv",  # Response format
    "apikey": api_key  # API key
}

# Make the API call
response = requests.get(base_url, params=params)

# Check the response status
if response.status_code == 200:
    # Save the response content to a CSV file
    with open("historical_weather_data.csv", "wb") as file:
        file.write(response.content)
    print("Historical weather data saved to 'historical_weather_data.csv'")

    # Load the CSV file into a DataFrame
    df_historical = pd.read_csv("historical_weather_data.csv")
    print("Historical weather data loaded into DataFrame:")
    print(df_historical.head())
else:
    print(f"Failed to fetch data. HTTP Status Code: {response.status_code}")
    print("Response:", response.text)

Failed to fetch data. HTTP Status Code: 403
Response: {"error_message":"Access to the package API is not available for this user","error":true}


In [36]:
# For df: Make the first row the column headers
df.columns = df.iloc[0]  # Set the first row as the column headers
df = df[1:]  # Drop the first row as it is now the header
df.reset_index(drop=True, inplace=True)  # Reset the index

# For df2: Make the first row the column headers
df2.columns = df2.iloc[0]  # Set the first row as the column headers
df2 = df2[1:]  # Drop the first row as it is now the header
df2.reset_index(drop=True, inplace=True)  # Reset the index

# Display the modified DataFrames
print("Modified df:")
print(df.head())
print("\nModified df2:")
print(df2.head())

Modified df:
0            timestamp  172.0 D.156.DELTA D.171.ISIRR
0  2022-03-18 07:00:00  2.529         NaN     152.901
1  2022-03-18 07:15:00  3.044         NaN     162.154
2  2022-03-18 07:30:00  3.237         NaN     174.519
3  2022-03-18 07:45:00  4.031       440.0     186.045
4  2022-03-18 08:00:00  3.915       772.0     196.427

Modified df2:
0 air_temp clearsky_dhi clearsky_dni clearsky_ghi clearsky_gti cloud_opacity  \
0       19            0            0            0            0          30.2   
1       19            0            0            0            0          33.8   
2       18            0            0            0            0          53.4   
3       18            0            0            0            0          75.1   
4       18            0            0            0            0          17.7   

0 dhi dni ghi gti precipitation_rate relative_humidity  \
0   0   0   0   0                  0              82.9   
1   0   0   0   0                  0              8

In [37]:
# Convert the 'period_end' column to the desired format
df2["period_end"] = pd.to_datetime(df2["period_end"]).dt.strftime("%Y-%m-%d %H:%M:%S")

# Display the modified DataFrame
print(df2.head())

0 air_temp clearsky_dhi clearsky_dni clearsky_ghi clearsky_gti cloud_opacity  \
0       19            0            0            0            0          30.2   
1       19            0            0            0            0          33.8   
2       18            0            0            0            0          53.4   
3       18            0            0            0            0          75.1   
4       18            0            0            0            0          17.7   

0 dhi dni ghi gti precipitation_rate relative_humidity           period_end  \
0   0   0   0   0                  0              82.9  2024-06-01 00:15:00   
1   0   0   0   0                  0              83.3  2024-06-01 00:30:00   
2   0   0   0   0                  0              83.7  2024-06-01 00:45:00   
3   0   0   0   0                  0              84.1  2024-06-01 01:00:00   
4   0   0   0   0                  0              84.3  2024-06-01 01:15:00   

0 period  
0  PT15M  
1  PT15M  
2  PT15M  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["period_end"] = pd.to_datetime(df2["period_end"]).dt.strftime("%Y-%m-%d %H:%M:%S")


In [38]:
# For df2: Make 'period_end' the index and convert it to a datetime object
df2["period_end"] = pd.to_datetime(df2["period_end"])  # Convert to datetime
df2.set_index("period_end", inplace=True)  # Set 'period_end' as the index

# For df: Make 'timestamp' the index and convert it to a datetime object
df["timestamp"] = pd.to_datetime(df["timestamp"])  # Convert to datetime
df.set_index("timestamp", inplace=True)  # Set 'timestamp' as the index

# Display the modified DataFrames
print("Modified df2:")
print(df2.head())
print("\nModified df:")
print(df.head())

Modified df2:
0                   air_temp clearsky_dhi clearsky_dni clearsky_ghi  \
period_end                                                            
2024-06-01 00:15:00       19            0            0            0   
2024-06-01 00:30:00       19            0            0            0   
2024-06-01 00:45:00       18            0            0            0   
2024-06-01 01:00:00       18            0            0            0   
2024-06-01 01:15:00       18            0            0            0   

0                   clearsky_gti cloud_opacity dhi dni ghi gti  \
period_end                                                       
2024-06-01 00:15:00            0          30.2   0   0   0   0   
2024-06-01 00:30:00            0          33.8   0   0   0   0   
2024-06-01 00:45:00            0          53.4   0   0   0   0   
2024-06-01 01:00:00            0          75.1   0   0   0   0   
2024-06-01 01:15:00            0          17.7   0   0   0   0   

0                   preci

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["period_end"] = pd.to_datetime(df2["period_end"])  # Convert to datetime
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["timestamp"] = pd.to_datetime(df["timestamp"])  # Convert to datetime


In [39]:
df

Unnamed: 0_level_0,172.0,D.156.DELTA,D.171.ISIRR
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-03-18 07:00:00,2.529,,152.901
2022-03-18 07:15:00,3.044,,162.154
2022-03-18 07:30:00,3.237,,174.519
2022-03-18 07:45:00,4.031,440.0,186.045
2022-03-18 08:00:00,3.915,772.0,196.427
...,...,...,...
2025-05-15 22:45:00,12.057,0.0,0.597
2025-05-15 23:00:00,11.757,0.0,0.594
2025-05-15 23:15:00,11.642,0.0,0.593
2025-05-15 23:30:00,11.565,0.0,0.59


In [40]:
df2

Unnamed: 0_level_0,air_temp,clearsky_dhi,clearsky_dni,clearsky_ghi,clearsky_gti,cloud_opacity,dhi,dni,ghi,gti,precipitation_rate,relative_humidity,period
period_end,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-06-01 00:15:00,19,0,0,0,0,30.2,0,0,0,0,0,82.9,PT15M
2024-06-01 00:30:00,19,0,0,0,0,33.8,0,0,0,0,0,83.3,PT15M
2024-06-01 00:45:00,18,0,0,0,0,53.4,0,0,0,0,0,83.7,PT15M
2024-06-01 01:00:00,18,0,0,0,0,75.1,0,0,0,0,0,84.1,PT15M
2024-06-01 01:15:00,18,0,0,0,0,17.7,0,0,0,0,0,84.3,PT15M
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-06-02 23:00:00,19,0,0,0,0,24.5,0,0,0,0,0,79.5,PT15M
2025-06-02 23:15:00,19,0,0,0,0,26.6,0,0,0,0,0,79.9,PT15M
2025-06-02 23:30:00,19,0,0,0,0,27.1,0,0,0,0,0,80.4,PT15M
2025-06-02 23:45:00,19,0,0,0,0,27.7,0,0,0,0,0,80.9,PT15M


In [41]:
# Merge df2 with df on the index, keeping all rows from df2 (right join)
merged_df = pd.merge(df, df2, left_index=True, right_index=True, how="right")

# Display the merged DataFrame
print("Merged DataFrame:")
print(merged_df.head())

Merged DataFrame:
0                     172.0 D.156.DELTA D.171.ISIRR air_temp clearsky_dhi  \
period_end                                                                  
2024-06-01 00:15:00  16.338         0.0       0.605       19            0   
2024-06-01 00:30:00  17.015         0.0       0.609       19            0   
2024-06-01 00:45:00  16.936         0.0       0.607       18            0   
2024-06-01 01:00:00  16.384         0.0       0.607       18            0   
2024-06-01 01:15:00  16.143         0.0       0.605       18            0   

0                   clearsky_dni clearsky_ghi clearsky_gti cloud_opacity dhi  \
period_end                                                                     
2024-06-01 00:15:00            0            0            0          30.2   0   
2024-06-01 00:30:00            0            0            0          33.8   0   
2024-06-01 00:45:00            0            0            0          53.4   0   
2024-06-01 01:00:00            0          

In [46]:
merged_df.drop(columns=["period"], inplace=True)

In [None]:
# Define the cutoff date
cutoff_date = pd.to_datetime("2025-05-15 23:45:00")

# Filter the merged DataFrame to keep only rows before the cutoff date
filtered_df = merged_df[merged_df.index < cutoff_date]

# Display the filtered DataFrame
print("Filtered DataFrame:")
print(filtered_df.tail())

Filtered DataFrame:
0                     172.0 D.156.DELTA D.171.ISIRR air_temp clearsky_dhi  \
period_end                                                                  
2024-06-01 00:15:00  16.338         0.0       0.605       19            0   
2024-06-01 00:30:00  17.015         0.0       0.609       19            0   
2024-06-01 00:45:00  16.936         0.0       0.607       18            0   
2024-06-01 01:00:00  16.384         0.0       0.607       18            0   
2024-06-01 01:15:00  16.143         0.0       0.605       18            0   

0                   clearsky_dni clearsky_ghi clearsky_gti cloud_opacity dhi  \
period_end                                                                     
2024-06-01 00:15:00            0            0            0          30.2   0   
2024-06-01 00:30:00            0            0            0          33.8   0   
2024-06-01 00:45:00            0            0            0          53.4   0   
2024-06-01 01:00:00            0        

In [51]:
print(filtered_df.tail(50))

0                     172.0 D.156.DELTA D.171.ISIRR air_temp clearsky_dhi  \
period_end                                                                  
2025-05-15 11:15:00  27.674       836.0      253.32       23          107   
2025-05-15 11:30:00  27.442       820.0     249.522       23          103   
2025-05-15 11:45:00  27.487       804.0     242.388       23           98   
2025-05-15 12:00:00  27.959       780.0       235.0       23           93   
2025-05-15 12:15:00  28.016       756.0     227.217       23           90   
2025-05-15 12:30:00  27.975       744.0      218.79       23           89   
2025-05-15 12:45:00  28.105       704.0     210.386       23           88   
2025-05-15 13:00:00  28.302       684.0     205.246       24           86   
2025-05-15 13:15:00  28.416       584.0     169.523       24           85   
2025-05-15 13:30:00  28.216       596.0     174.768       24           83   
2025-05-15 13:45:00  27.867       548.0     159.389       24           82   

In [53]:
filtered_df.head(50)

Unnamed: 0_level_0,172.0,D.156.DELTA,D.171.ISIRR,air_temp,clearsky_dhi,clearsky_dni,clearsky_ghi,clearsky_gti,cloud_opacity,dhi,dni,ghi,gti,precipitation_rate,relative_humidity
period_end,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-06-01 00:15:00,16.338,0.0,0.605,19,0,0,0,0,30.2,0,0,0,0,0,82.9
2024-06-01 00:30:00,17.015,0.0,0.609,19,0,0,0,0,33.8,0,0,0,0,0,83.3
2024-06-01 00:45:00,16.936,0.0,0.607,18,0,0,0,0,53.4,0,0,0,0,0,83.7
2024-06-01 01:00:00,16.384,0.0,0.607,18,0,0,0,0,75.1,0,0,0,0,0,84.1
2024-06-01 01:15:00,16.143,0.0,0.605,18,0,0,0,0,17.7,0,0,0,0,0,84.3
2024-06-01 01:30:00,16.192,0.0,0.608,18,0,0,0,0,0.0,0,0,0,0,0,84.4
2024-06-01 01:45:00,16.523,0.0,0.606,18,0,0,0,0,7.2,0,0,0,0,0,84.6
2024-06-01 02:00:00,16.125,0.0,0.612,18,0,0,0,0,18.9,0,0,0,0,0,84.7
2024-06-01 02:15:00,16.636,0.0,1.12,18,0,0,0,0,17.4,0,0,0,0,0,84.7
2024-06-01 02:30:00,17.215,4.0,3.325,18,0,0,0,0,33.8,0,0,0,0,0,84.7


In [54]:
# Save the filtered DataFrame to a CSV file
filtered_df.to_csv("filtered_data.csv")

print("Filtered DataFrame saved to 'filtered_data.csv'")

Filtered DataFrame saved to 'filtered_data.csv'


In [23]:
df.set_index("timestamp", inplace=True)

In [29]:
df.to_csv(f"data_{PLANT_CODE}_{RESOLUTION}.csv")

In [None]:
import pandas as pd
df = pd.read_csv('data_P1130_2.csv')

In [5]:
df.head()

Unnamed: 0,timestamp,100,1000,1011,1017,1021,1027,1031,1037,104,...,V941,V951,V961,V978,V979,V981,V993,V994,V996,V998
0,2022-03-18,27.719,1151989,20.146,26.649,19.875,29.93,19.522,32.877,21.287,...,3.067,3.067,3.067,284.574,0.039,102.094,288.152,0.049,94.763,102.462
1,2022-03-19,26.993,1152508,15.039,26.2,11.961,29.667,14.825,31.844,16.376,...,3.685,3.685,3.685,257.708,0.25,93.908,262.022,0.247,87.523,93.872
2,2022-03-20,26.148,1153129,17.98,26.23,15.384,29.474,17.485,31.637,16.777,...,3.853,3.853,3.853,306.43,0.146,95.131,307.965,0.144,87.641,93.898
3,2022-03-21,26.015,1153783,16.536,27.287,19.134,29.426,17.604,31.816,17.683,...,3.074,3.074,3.074,328.139,0.152,95.812,330.374,0.157,88.426,92.512
4,2022-03-22,26.456,1154462,18.33,27.309,18.249,29.787,17.626,32.235,19.207,...,3.543,3.543,3.543,340.998,0.277,97.367,342.816,0.273,89.729,96.263


In [13]:
df[["D.171.ISIRR","172","D.156.DELTA"]]

Unnamed: 0,D.171.ISIRR,172,D.156.DELTA
0,5508.632,3.491,18544
1,5423.467,2.482,19544
2,6365.863,3.864,23328
3,6768.402,7.132,24456
4,6921.315,10.439,25516
...,...,...,...
1150,7235.732,15.373,23344
1151,7166.721,13.519,23888
1152,7365.096,14.688,24096
1153,6501.263,14.699,21212


In [14]:
df.tail()

Unnamed: 0,timestamp,100,1000,1011,1017,1021,1027,1031,1037,104,...,V941,V951,V961,V978,V979,V981,V993,V994,V996,V998
1150,2025-05-11,26.29,1616901,13.138,29.654,13.893,26.08,15.542,31.179,12.923,...,3.005,3.005,3.005,313.3,0.044,85.571,315.258,0.048,78.93,84.076
1151,2025-05-12,431.185,1617540,13.447,30.346,13.756,27.253,14.117,31.883,55.639,...,2.938,2.938,2.938,311.708,0.102,85.956,307.94,0.104,77.841,82.178
1152,2025-05-13,1644.815,1618195,134.836,1243.556,177.001,1644.685,136.468,1245.111,177.74,...,2.943,2.943,2.943,327.996,0.179,,331.664,0.171,81.579,83.941
1153,2025-05-14,26.755,1618766,11.887,30.632,14.683,27.16,11.936,32.436,12.842,...,2.992,2.992,2.992,291.945,0.276,88.747,292.173,0.273,,86.69
1154,2025-05-15,25.043,1619500,17.274,30.104,16.257,27.117,15.866,32.436,17.231,...,2.916,2.916,2.916,367.545,0.305,86.986,369.531,0.3,80.168,84.738
