In [0]:
import subprocess
import sys

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

def update_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", package])

In [0]:
update_package("pip")
install("gamsapi")
install("gamsapi[transfer]==48.6.1")

In [0]:
import os
import gams.transfer as gt
import gams as g
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [0]:
sysdir = '/Volumes/datahub/marketmonitoring/marketmonitoring/Corey/Programs/gams48.6_linux_x64_64_sfx'
vSPD = '/Volumes/datahub/marketmonitoring/marketmonitoring/Corey/vSPD'
InputDir = vSPD + '/Input/'
OutputDir = vSPD + '/Output/'
output_prefix = "Modified_"
pnode_todup = ['KOE1101 KSF0']
pnode_dup = ['KOE1101 KSF2']

os.makedirs(os.path.dirname(InputDir), exist_ok=True)
os.makedirs(os.path.dirname(OutputDir), exist_ok=True)

In [0]:
# Downloads all gdx files within a spefic web page

def DownloadGdx(url):
  # Send a request to the URL
  response = requests.get(url)

  # Parse the HTML response using BeautifulSoup
  soup = BeautifulSoup(response.text, 'html.parser')

  # Find all the links on the page
  links = soup.find_all('a')

  # Loop through the links filters to get all of the gdx files 
  FileList =  []
  BaseList = []
  for link in links:
      href = link.get('href')
      if href is not None and href.endswith('.gdx'):
          FileList.append(href)
          BaseList.append(os.path.basename(href).split('.')[0])


  # Sort all the files alphanumerically and gets rid of all of the duplicates
  FileList = sorted(list(set(FileList)))
  BaseList = sorted(list(set(BaseList)))

  # Downloads all gdx files, fixing all of the incorrectly named files
  for i in range(len(FileList)):
    dbutils.fs.cp('https://www.emi.ea.govt.nz' + FileList[i], InputDir) # downloads to mount point

# Set the URL to request
url = 'https://www.emi.ea.govt.nz/Wholesale/Datasets/DispatchAndPricing/GDX/2025'  
DownloadGdx(url)

In [0]:
BaseList = [f for f in os.listdir(InputDir) if os.path.isfile(os.path.join(InputDir, f))]
DateList = [s.split('_', 1)[1] if '_' in s else s for s in BaseList]
DateList[:5]

In [0]:
# Create a GAMS workspace
ws = g.GamsWorkspace(system_directory=sysdir)

In [0]:
for i in range(len(BaseList)):
    print(i+1, 'of', len(BaseList))

    # Read the GDX file
    inputFile = InputDir + BaseList[i]
    m = gt.Container(inputFile, system_directory=sysdir)

    # Initialize lists to store sets and parameters
    sets = []
    parameters = []
    s_and_p = []

    # Iterate through the symbols in the GDX file and create a dictionary of dataframes and indicators of whether they are parameters or sets
    AllDFs = {}
    RecordDisc = {}
    for symbol in m:
        j = symbol[0]
        s_and_p.append(j)
        RecordDisc[j] = m[j]._description
        if m.data[j].records is None:
            if m[j]._gams_type == 0:
                AllDFs[j] = pd.DataFrame(columns = m.data[j].domain)   
            else:
                AllDFs[j] = pd.DataFrame(columns = m.data[j].domain + ['Value'])   
        else: 
            if m[j]._gams_type == 0:
                AllDFs[j] = m.data[j].records[m.data[j].domain_labels].copy(deep=True)
            else:
                AllDFs[j] = m.data[j].records.copy(deep=True)

        if m[j]._gams_type == 0:
            sets.append(j)
        elif m[j]._gams_type == 1:
            parameters.append(j)


    # go through each dataframes to find the sets and parameters that need to be duplicated (we are adding another large solar plant in kaitaia except it is 5x larger)
    n_list = []
    o_list = []

    for s_name, df in AllDFs.items():
        col_names = df.columns

        if 'n' in col_names: 
            n_list += [s_name]
        if 'o' in col_names: 
            o_list += [s_name]

    # The i_node set is the only set where the n set is labelled as *
    i_node = AllDFs['i_node']
    for j in range(len(pnode_todup)):
        i_node['uni'] = i_node['uni'].cat.add_categories(pnode_dup[j])
        i_node_todup = i_node[i_node['uni'] == pnode_todup[j]].copy()
        i_node_todup.loc[i_node_todup['uni'] == pnode_todup[j], 'uni'] = pnode_dup[j]
        AllDFs['i_node'] = pd.concat([AllDFs['i_node'], i_node_todup]).reset_index(drop=True)

    # Iterate through and duplicate
    for symbol_name, df in AllDFs.items():
        dup_cols = list(set(df.columns) & set(['n','o']))
        if symbol_name in list(set(n_list) | set(o_list)):
            for j in range(len(pnode_todup)):
                dupDF = pd.DataFrame()
                tempDF = df[df[dup_cols[0]] == pnode_todup[j]].copy()
                dupDF = pd.concat([dupDF,tempDF])
                    
                for col in dup_cols:      
                    dupDF[col] = dupDF[col].astype('object')
                    dupDF.loc[:,col] = pnode_dup[j]

                if len(dupDF) > 0:
                    df = pd.concat([df,dupDF]).reset_index(drop=True)       
        AllDFs[symbol_name] = df.copy()    


    for pn in pnode_dup:
        df = AllDFs['i_dateTimeEnergyOffer'].copy()
        df.loc[(df['o'] == pn) & (df['bidofrCmpnt'] == 'limitMW'), 'value'] *= 3.0 
        AllDFs['i_dateTimeEnergyOffer'] = df.copy()

        df = AllDFs['i_dateTimeOfferParameter'].copy()
        df.loc[(df['o'] == pn) & (df['offerPar'].isin(['initialMW','resrvGenMax','potentialMW'])),'value'] *= 3.0
        AllDFs['i_dateTimeOfferParameter'] = df.copy()

    outputFile = OutputDir + output_prefix + DateList[i]

    m = gt.Container(system_directory=sysdir)
    for j in s_and_p:
        if j in sets:
            colnames = ['*' if col == 'uni' else col for col in AllDFs[j].columns]
            gt.Set(m, j, colnames, records = AllDFs[j], description=RecordDisc[j])

        if j in parameters:
            colnames = ['*' if col == 'uni' else col for col in AllDFs[j].columns]
            gt.Parameter(m, j, colnames[0:-1], records = AllDFs[j])


    m.write(outputFile)