In [1]:
def get_weektable_from_excel(date:str, demand:str):
    """
    NOTE: WORKS FOR TABLES FROM 29 SEP 2014 ONWARDS ONLY

    date = DD/MM/YYYY format -- first date of the week, by which the file is stored on ema.gov.sg

    demand = ['system', 'nem_actual', 'nem_forecast']
    """
    import pandas as pd
    import time as tm

    # base_url + year/ + ['20141222','29_Sep_2014'] + .xls
    base_url = "https://www.ema.gov.sg/content/dam/corporate/resources/statistics/half-hourly-data/"
    date_tm = tm.strptime(date,"%d/%m/%Y")

    try:
        tempdf = pd.read_excel(base_url + tm.strftime("%Y/%Y%m%d.xls",date_tm), index_col=0, header=[1,4]).iloc[:48]
    except:
        try:
            tempdf = pd.read_excel(base_url + tm.strftime("%Y/%d_%b_%Y.xls",date_tm), index_col=0, header=[1,4]).iloc[:48]
        except:
            # 02 Jan 2017 error
            tempdf = pd.read_excel('https://www.ema.gov.sg/content/dam/corporate/resources/statistics/half-hourly-data/2016/20170102.xls', index_col=0, header=[1,4]).iloc[:48]
    
    demand_in = ['system','nem_actual','nem_forecast']
    demand_out = [' System Demand (Actual)',' NEM Demand (Actual)',' NEM Demand (Forecast)']
    if demand == demand_in[0]:
        try:
            return tempdf.xs(demand_out[0], axis=1, level=1)
        except:
            print('No system demand column!')
    elif demand == demand_in[1]:
        try:
            return tempdf.xs(demand_out[1], axis=1, level=1)
        except:
            return tempdf.xs('NEM Demand ', axis=1, level=1)
    elif demand == demand_in[2]:
        try:
            return tempdf.xs(demand_out[2], axis=1, level=1)
        except:
            return tempdf.xs('VSTLF', axis=1, level=1)

In [2]:
def yearly_excel(startdate:str, enddate:str, demand:str):
    import pandas as pd
    import time as tm
    import datetime as dt

    startdate_tm = tm.strptime(startdate,"%d/%m/%Y")
    enddate_tm = tm.strptime(enddate,"%d/%m/%Y")
    delta = dt.timedelta(days = 7)

    currdate_tm = startdate_tm
    tempdf = get_weektable_from_excel(date=tm.strftime('%d/%m/%Y',currdate_tm), demand=demand)

    #updating
    prev_df = tempdf
    prev_date = currdate_tm
    currdate_tm = dt.datetime(currdate_tm[0],currdate_tm[1],currdate_tm[2])
    currdate_tm += delta
    currdate_tm = currdate_tm.timetuple()

    while (currdate_tm <= enddate_tm):
        tempdf = get_weektable_from_excel(date=tm.strftime('%d/%m/%Y',currdate_tm), demand=demand)

        #adding to dataframes according to years
        if (currdate_tm.tm_year == prev_date.tm_year):
            prev_df = pd.concat([prev_df,tempdf], axis=1)
        else:
            #save prev_df in excel with prev year name
            prev_df.columns = prev_df.columns.strftime("%d/%m/%Y")
            prev_df.to_excel(str(prev_date.tm_year) + f'[{demand}]' + ".xlsx")
            prev_df = tempdf

        #updating
        prev_date = currdate_tm
        currdate_tm = dt.datetime(currdate_tm[0],currdate_tm[1],currdate_tm[2])
        currdate_tm += delta
        currdate_tm = currdate_tm.timetuple()

In [6]:
startdate = "05/01/2015"
enddate = "03/01/2023"

yearly_excel(startdate,enddate,'nem_forecast')