# MESSAGE-ix matrix improvement tool
Need to investigate why more steps worsen the results

In [4]:
import numpy as np
import pandas as pd
from pyomo.environ import *
from datetime import datetime
import xarray as xr

import matplotlib.pyplot as plt

def showme(df):
    return df["val"].unstack()

path = 'data/matrix_transport.csv'
bound = 2
steps = 1

def solv(df, bound):
    """
    this function filters matrix coefficient
    dataframe in which the log of the coefficient 
    is lower or higher than the bound
    """
    df_solv = df.loc[(df["val"] >= bound) |
                     (df["val"] <= -bound)]
    return df_solv

def make_logdf(df):
    df.loc[df['val']!=0,'val'] = np.log10(np.absolute(df.loc[df['val']!=0,'val']))
    return df

def read_ix(df, lvl):
    return df.index.get_level_values(lvl)
    
def disp_range(text,df):
    log_absdf = df.copy()
    log_absdf.loc[log_absdf['val']!=0,'val']=(np.log10(
        np.absolute(
            log_absdf.loc[log_absdf['val']!=0,'val'])))
    
    print(f"{text}:","[",np.int32(np.min(log_absdf)),",",np.int32(np.max(log_absdf)),"]")

def make_scale(path, bound, steps, show_range=True):
    # Load the matrix coefficient
    data       = (pd.read_csv(path)
                   .set_index(['row','col'],drop=True)[['val']])

    matrix = data
    
    if show_range == True:
        disp_range("Unscaled range     ",matrix)
    
    scalers = {"row":[], "col":[]}
    counter = 0
    while counter < steps:
        for s in scalers.keys():
            # print(matrix)
            # calculate log base 10 of the absolute value of the matrix
            log_absmatrix = matrix.copy()
            log_absmatrix = make_logdf(log_absmatrix)

            # Create matrix with small and large coefficient
            log_absmatrix_solv  = solv(log_absmatrix, bound=bound)

            # Populating row scaler
            objective_ix = "_obj" if s == "row" else "constobj"
            index_solv = [e for e in read_ix(log_absmatrix_solv,s) if e != objective_ix]  
            #print(index_solv)

            SFs = {k:[] for k in index_solv}
            for k in SFs.keys():
                index_val = (read_ix(log_absmatrix,s) == k)
                dflog_val = log_absmatrix.loc[index_val,"val"]
                lb,ub = np.int32(min(dflog_val)),np.int32(max(dflog_val))
                mid = np.int32(np.mean([lb,ub]))
                SFs[k] = 10.0**(-mid)
            
            # Create DataFrame of row scaler
            return_index = list(set(read_ix(log_absmatrix,s)))
            if counter == 0:
                multiplier = 1
            else:
                multiplier = scalers[s].reindex(return_index).fillna(1)
            step_scaler = pd.DataFrame(data=SFs, index=["val"]).transpose()
            step_scaler.index.name = s
            step_scaler = step_scaler.reindex(return_index).fillna(1)
            scalers[s] = step_scaler.mul(multiplier) # <--- this summarize multipliers from previous steps
            
            # Create new matrix with scaled rows
            matrix = matrix.mul(step_scaler) # <--- remember that this matrix is already changed, so double counting
        
        if show_range == True:
            disp_range(f"Scaled range step {counter}",matrix)
    
        # Increment the counter
        counter += 1
    # generating prescaler arguments for GAMS
    prescale_args = {}
    for key,df_scaler in scalers.items():
        df_scaler = df_scaler.loc[df_scaler["val"] != 1]
        scaler_dict = df_scaler["val"].to_dict()
        for k,v in scaler_dict.items():
            if k == '_obj':
                k_ = "_obj.scale"
            elif k == 'constobj':
                k_ = "constobj.scale"
            else:
                k_ = k.replace("(", ".scale('")
                k_ = k_.replace(")", "')")
                k_ = k_.replace(",", "','")
            prescale_args.update({k_:v})    

    return(prescale_args)

In [7]:
make_scale(path,bound,10,show_range=True)

Unscaled range     : [ -8 , 0 ]
Scaled range step 0: [ -4 , 4 ]
Scaled range step 1: [ -3 , 3 ]
Scaled range step 2: [ -3 , 2 ]
Scaled range step 3: [ -3 , 2 ]
Scaled range step 4: [ -3 , 2 ]
Scaled range step 5: [ -3 , 2 ]
Scaled range step 6: [ -3 , 2 ]
Scaled range step 7: [ -3 , 2 ]
Scaled range step 8: [ -3 , 2 ]
Scaled range step 9: [ -3 , 2 ]


{"demand.scale('new-york')": 0.001,
 "supply.scale('san-diego')": 0.001,
 "demand.scale('topeka')": 0.0010000000000000002,
 "x.scale('san-diego','chicago')": 10.0,
 "x.scale('san-diego','new-york')": 100000.0,
 "x.scale('seattle','new-york')": 10.0,
 "x.scale('seattle','topeka')": 10.0,
 "x.scale('san-diego','topeka')": 100000.0}

In [6]:
make_scale(path,bound,1,show_range=True)

Unscaled range     : [ -8 , 0 ]
Scaled range step 0: [ -4 , 4 ]


{"x.scale('san-diego','new-york')": 10000.0,
 "x.scale('san-diego','topeka')": 1000.0}

**Prototype - Dev**

In [None]:
import numpy as np
import pandas as pd
from pyomo.environ import *
from datetime import datetime
import xarray as xr

import matplotlib.pyplot as plt

def showme(df):
    return df["val"].unstack()

# determine absolute bound of exponent in matrix coeff
bound = 4 

def solv(df, bound):
    """
    this function filters matrix coefficient
    dataframe in which the log of the coefficient 
    is lower or higher than the bound
    """
    df_solv = df.loc[(df["val"] >= bound) |
                     (df["val"] <= -bound)]
    return df_solv

def make_logdf(df):
    df.loc[df['val']!=0,'val'] = np.log10(np.absolute(df.loc[df['val']!=0,'val']))
    return df
    

# Load the whole matrix
matrix       = (pd.read_csv('data/matrix_transport.csv')
               .set_index(['row','col'],drop=True)[['val']])

# calculate log base 10 of the absolute value of the matrix
log_absmatrix = matrix.copy()
log_absmatrix = make_logdf(log_absmatrix)

# Create matrix with small and large coefficient
log_absmatrix_solv  = solv(log_absmatrix,bound=bound)

# SCALE BY ROW
# Populating row scaler
rows_solv = list(set(log_absmatrix_solv.index.get_level_values("row"))
                 -set(["_obj"]))

RSFs = {row:[] for row in rows_solv}
for k in RSFs.keys():
    index_row = (log_absmatrix.index.get_level_values('row') == k)
    rval = log_absmatrix.loc[index_row,"val"]
    lb,ub = min(rval),max(rval)
    mid = np.mean([lb,ub])
    RSFs[k] = 10**(-mid)

# Create DataFrame of row scaler
row_scaler = pd.DataFrame(data=RSFs, index=["val"]).transpose()
row_scaler.index.name = 'row'

# Create new matrix with scaled rows
matrix0 = matrix.copy()
index_mod = matrix0.index.get_level_values('row').isin(row_scaler.index)
matrix0.loc[index_mod] = matrix0.loc[index_mod].mul(row_scaler)

# SCALE BY COL
# Populating col scaler
log_absmatrix0 = matrix0.copy()
log_absmatrix0 = make_logdf(log_absmatrix0)
log_absmatrix0_solv = solv(log_absmatrix0,bound=bound)

cols_solv = list(set(log_absmatrix_solv.index.get_level_values("col"))
                 -set(["constobj"]))
CSFs = {col:[] for col in cols_solv}
for k in CSFs.keys():
    index_col = (log_absmatrix0.index.get_level_values('col') == k)
    cval = log_absmatrix0.loc[index_col,"val"]
    lb,ub = min(cval),max(cval)
    mid = np.mean([lb,ub])
    CSFs[k] = 10**(-mid)

# Create DataFrame of col scaler
col_scaler = pd.DataFrame(data=CSFs, index=["val"]).transpose()
col_scaler.index.name = 'col'

# Create new matrix with scaled rows
new_matrix = matrix0.copy()
index_mod = new_matrix.index.get_level_values('col').isin(col_scaler.index)
new_matrix.loc[index_mod] = new_matrix.loc[index_mod].mul(col_scaler)

scalers = [RSFs,CSFs]
prescale_args = {}
for s in scalers:
    for k,v in s.items():
        if k == '_obj':
            k_ = "_obj.scale"
        elif k == 'constobj':
            k_ = "constobj.scale"
        else:
            k_ = k.replace("(", ".scale('")
            k_ = k_.replace(")", "')")
            k_ = k_.replace(",", "','")
        prescale_args.update({k_:v})

gams_args = (str(prescale_args)
             .replace('{','')
             .replace('}','')
             .replace('"','')
             .replace(':','=')
             .replace(', ',';\n')
             +';'
            )