In [1]:
import pandas as pd
import requests
from datetime import datetime
import os
import json
from pytz import timezone
from urllib3.exceptions import MaxRetryError, ConnectionError
from requests.exceptions import RequestException

try:
    # Specify the URL and data
    url = "https://www.gso.org.my/SystemData/CurrentGen.aspx/GetChartDataSource"
    data = {
        "Fromdate": datetime.now().strftime('%d/%m/%Y'),
        "Todate": datetime.now().strftime('%d/%m/%Y')
    }

    # Set headers
    headers = {
    "Content-Type": "application/json; charset=utf-8",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    # Make a POST request
    response = requests.post(url, data=json.dumps(data), headers=headers)

    # Check the response status
    if response.status_code == 200:
        # Extract and process data
        chartobjdata_list = json.loads(json.loads(response.text)["d"])
        flattened_data = [
            {
                "datetime": entry["DT"],
                "Coal": entry["Coal"],
                "Gas": entry["Gas"],
                "CoGen": entry["CoGen"],
                "Oil": entry["Oil"],
                "Hydro": entry["Hydro"],
                "Solar": entry["Solar"]
            } for entry in chartobjdata_list
        ]

        # Convert flattened data to DataFrame
        df = pd.DataFrame(flattened_data)
        df['datetime'] = pd.to_datetime(df['datetime'])

        # Split "datetime" into "date" and "time" columns
        df['date'] = df['datetime'].dt.date
        df['time'] = df['datetime'].dt.time

        # Select specific columns
        df = df.loc[:, ['date', 'time', 'Coal', 'Gas', 'CoGen', 'Oil', 'Hydro', 'Solar']]

        # Save data to CSV using flattened_data
        data_dir = 'data_gso'
        os.makedirs(data_dir, exist_ok=True)

        file_date = datetime.today()
        file_name = file_date.strftime('%Y-%m-%d.csv')
        file_path = os.path.join(data_dir, file_name)

        if os.path.exists(file_path):
            existing_data = pd.read_csv(file_path, header=0)
            combined_data = pd.concat([existing_data, df], ignore_index=True)
            combined_data.to_csv(file_path, index=False)
            print(f'Data has been appended to {file_path}')
        else:
            df.to_csv(file_path, index=False)
            print(f'Data has been saved to {file_path}')

    else:
        print("Error: Unable to retrieve chart data. Status Code:", response.status_code)

except (RequestException, ConnectionError, MaxRetryError) as e:
    print(f"An error occurred: {e}")
    # You can add further error handling or logging here.

Data has been saved to data_gso\2025-07-13.csv


## march 2025 till now

In [None]:
import pandas as pd
import requests
from datetime import datetime, timedelta
import os
import json
from urllib3.exceptions import MaxRetryError, ConnectionError
from requests.exceptions import RequestException

try:
    # Specify the URL
    url = "https://www.gso.org.my/SystemData/CurrentGen.aspx/GetChartDataSource"

    # Set headers
    headers = {
        "Content-Type": "application/json; charset=utf-8",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    # Define the start and end dates
    start_date = datetime(2025, 3, 1)
    end_date = datetime.now()
    
    # Initialize an empty list to store all fetched data
    all_flattened_data = []

    # Loop through each day from start_date to end_date
    current_date = start_date
    while current_date <= end_date:
        from_date_str = current_date.strftime('%d/%m/%Y')
        to_date_str = current_date.strftime('%d/%m/%Y') # Fetch data for one day at a time

        data = {
            "Fromdate": from_date_str,
            "Todate": to_date_str
        }

        # Make a POST request
        response = requests.post(url, data=json.dumps(data), headers=headers)

        # Check the response status
        if response.status_code == 200:
            # Extract and process data for the current day
            chartobjdata_list = json.loads(json.loads(response.text)["d"])
            flattened_data = [
                {
                    "datetime": entry["DT"],
                    "Coal": entry["Coal"],
                    "Gas": entry["Gas"],
                    "CoGen": entry["CoGen"],
                    "Oil": entry["Oil"],
                    "Hydro": entry["Hydro"],
                    "Solar": entry["Solar"]
                } for entry in chartobjdata_list
            ]
            all_flattened_data.extend(flattened_data)
            print(f"Successfully fetched data for {from_date_str}")
        else:
            print(f"Error: Unable to retrieve chart data for {from_date_str}. Status Code: {response.status_code}")
        
        # Move to the next day
        current_date += timedelta(days=1)

    if all_flattened_data:
        # Convert all flattened data to DataFrame
        df = pd.DataFrame(all_flattened_data)
        df['datetime'] = pd.to_datetime(df['datetime'])

        # Split "datetime" into "date" and "time" columns
        df['date'] = df['datetime'].dt.date
        df['time'] = df['datetime'].dt.time

        # Select specific columns
        df = df.loc[:, ['date', 'time', 'Coal', 'Gas', 'CoGen', 'Oil', 'Hydro', 'Solar']]

        # Sort the DataFrame by date and time to ensure proper appending
        df = df.sort_values(by=['date', 'time']).drop_duplicates()

        # Save data to CSV
        data_dir = 'data_gso'
        os.makedirs(data_dir, exist_ok=True)

        # We'll save all data in a single file named with the current date, but containing data from May 1st
        file_name = f"generation_data_from_{start_date.strftime('%Y-%m-%d')}_to_current.csv"
        file_path = os.path.join(data_dir, file_name)

        if os.path.exists(file_path):
            existing_data = pd.read_csv(file_path, parse_dates={'datetime_full': ['date', 'time']})
            # To avoid duplicates when appending, we can convert both to datetime and then drop duplicates
            df['datetime_full'] = pd.to_datetime(df['date'].astype(str) + ' ' + df['time'].astype(str))
            
            combined_data = pd.concat([existing_data, df], ignore_index=True)
            # Drop duplicates based on the 'datetime_full' column (or original 'DT' equivalent)
            combined_data.drop_duplicates(subset=['datetime_full'], inplace=True)
            # Re-select the original columns for saving
            combined_data = combined_data.loc[:, ['date', 'time', 'Coal', 'Gas', 'CoGen', 'Oil', 'Hydro', 'Solar']]
            combined_data.to_csv(file_path, index=False)
            print(f'Data has been appended and de-duplicated to {file_path}')
        else:
            df.to_csv(file_path, index=False)
            print(f'Data has been saved to {file_path}')
    else:
        print("No data was fetched for the specified date range.")

except (RequestException, ConnectionError, MaxRetryError) as e:
    print(f"An error occurred: {e}")
    # You can add further error handling or logging here.

Successfully fetched data for 01/03/2025
Successfully fetched data for 02/03/2025
Successfully fetched data for 03/03/2025
Successfully fetched data for 04/03/2025
