# MESSAGE-ix matrix improvement tool

In [1]:
import numpy as np
import pandas as pd
from pyomo.environ import *
from datetime import datetime
import xarray as xr

import matplotlib.pyplot as plt

def showme(df):
    return df["val"].unstack()

path = 'data/matrix_transport.csv'
bound = 2
steps = 1

def solv(df, bound):
    """
    this function filters matrix coefficient
    dataframe in which the log of the coefficient 
    is lower or higher than the bound
    """
    df_solv = df.loc[(df["val"] >= bound) |
                     (df["val"] <= -bound)]
    return df_solv

def make_logdf(df):
    df.loc[df['val']!=0,'val'] = np.log10(np.absolute(df.loc[df['val']!=0,'val']))
    return df

def read_ix(df, lvl):
    return df.index.get_level_values(lvl)
    
def show_range(text,df):
    log_absdf = df.copy()
    log_absdf.loc[log_absdf['val']!=0,'val']=(np.log10(
        np.absolute(
            log_absdf.loc[log_absdf['val']!=0,'val'])))
    
    print(f"{text}:","[",np.int32(np.min(log_absdf)),",",np.int32(np.max(log_absdf)),"]")

def make_scale(path, bound, steps):
    # Load the matrix coefficient
    data       = (pd.read_csv(path)
                   .set_index(['row','col'],drop=True)[['val']])

    matrix = data
    # TODO: show_range("Unscaled range",matrix)
    show_range("Unscaled range",matrix)
    
    scalers = {"row":[], "col":[]}
    counter = 0
    while counter < steps:
        for s in scalers.keys():
            # calculate log base 10 of the absolute value of the matrix
            log_absmatrix = matrix.copy()
            log_absmatrix = make_logdf(log_absmatrix)

            # Create matrix with small and large coefficient
            log_absmatrix_solv  = solv(log_absmatrix, bound=bound)

            # Populating row scaler
            objective_ix = "_obj" if s == "row" else "constobj"
            index_solv = [e for e in read_ix(log_absmatrix_solv,s) if e != objective_ix] #  
            #print(index_solv)

            SFs = {k:[] for k in index_solv}
            for k in SFs.keys():
                index_val = (read_ix(log_absmatrix,s) == k)
                dflog_val = log_absmatrix.loc[index_val,"val"]
                lb,ub = np.int32(min(dflog_val)),np.int32(max(dflog_val))
                mid = np.int32(np.mean([lb,ub]))
                SFs[k] = 10.0**(-mid)
            #print(SFs)

            # Create DataFrame of row scaler
            scalers[s] = pd.DataFrame(data=SFs, index=["val"]).transpose()
            scalers[s].index.name = s

            # Create new matrix with scaled rows
            index_mod = read_ix(matrix,s).isin(scalers[s].index)
            matrix.loc[index_mod] = matrix.loc[index_mod].mul(scalers[s])

        # generating prescaler arguments for GAMS
        prescale_args = {}
        for key,val in scalers.items():
            scaler_dict = val["val"].to_dict()
            for k,v in scaler_dict.items():
                if k == '_obj':
                    k_ = "_obj.scale"
                elif k == 'constobj':
                    k_ = "constobj.scale"
                else:
                    k_ = k.replace("(", ".scale('")
                    k_ = k_.replace(")", "')")
                    k_ = k_.replace(",", "','")
                prescale_args.update({k_:v})    

        # TODO: show_range("Scaled range  ",matrix)
        show_range("Scaled range  ",matrix)
        # Increment the counter
        counter += 1

    return(prescale_args)

In [2]:
make_scale(path,bound,3)

Unscaled range: [ -8 , 0 ]
Scaled range  : [ -4 , 4 ]
Scaled range  : [ -3 , 3 ]
Scaled range  : [ -3 , 2 ]


{"supply.scale('san-diego')": 0.1,
 "demand.scale('new-york')": 0.1,
 "demand.scale('topeka')": 0.1,
 "x.scale('seattle','new-york')": 1.0,
 "x.scale('seattle','topeka')": 10.0,
 "x.scale('san-diego','new-york')": 1.0,
 "x.scale('san-diego','chicago')": 1.0,
 "x.scale('san-diego','topeka')": 1.0}

In [5]:
scaler["row"]

NameError: name 'scaler' is not defined

In [None]:
make_scale(path,bound,steps)

In [None]:
showme(matrix)

**Prototype - Dev**

In [6]:
import numpy as np
import pandas as pd
from pyomo.environ import *
from datetime import datetime
import xarray as xr

import matplotlib.pyplot as plt

def showme(df):
    return df["val"].unstack()

# determine absolute bound of exponent in matrix coeff
bound = 4 

def solv(df, bound):
    """
    this function filters matrix coefficient
    dataframe in which the log of the coefficient 
    is lower or higher than the bound
    """
    df_solv = df.loc[(df["val"] >= bound) |
                     (df["val"] <= -bound)]
    return df_solv

def make_logdf(df):
    df.loc[df['val']!=0,'val'] = np.log10(np.absolute(df.loc[df['val']!=0,'val']))
    return df
    

# Load the whole matrix
matrix       = (pd.read_csv('data/matrix_transport.csv')
               .set_index(['row','col'],drop=True)[['val']])

# calculate log base 10 of the absolute value of the matrix
log_absmatrix = matrix.copy()
log_absmatrix = make_logdf(log_absmatrix)

# Create matrix with small and large coefficient
log_absmatrix_solv  = solv(log_absmatrix,bound=bound)

# SCALE BY ROW
# Populating row scaler
rows_solv = list(set(log_absmatrix_solv.index.get_level_values("row"))
                 -set(["_obj"]))

RSFs = {row:[] for row in rows_solv}
for k in RSFs.keys():
    index_row = (log_absmatrix.index.get_level_values('row') == k)
    rval = log_absmatrix.loc[index_row,"val"]
    lb,ub = min(rval),max(rval)
    mid = np.mean([lb,ub])
    RSFs[k] = 10**(-mid)

# Create DataFrame of row scaler
row_scaler = pd.DataFrame(data=RSFs, index=["val"]).transpose()
row_scaler.index.name = 'row'

# Create new matrix with scaled rows
matrix0 = matrix.copy()
index_mod = matrix0.index.get_level_values('row').isin(row_scaler.index)
matrix0.loc[index_mod] = matrix0.loc[index_mod].mul(row_scaler)

# SCALE BY COL
# Populating col scaler
log_absmatrix0 = matrix0.copy()
log_absmatrix0 = make_logdf(log_absmatrix0)
log_absmatrix0_solv = solv(log_absmatrix0,bound=bound)

cols_solv = list(set(log_absmatrix_solv.index.get_level_values("col"))
                 -set(["constobj"]))
CSFs = {col:[] for col in cols_solv}
for k in CSFs.keys():
    index_col = (log_absmatrix0.index.get_level_values('col') == k)
    cval = log_absmatrix0.loc[index_col,"val"]
    lb,ub = min(cval),max(cval)
    mid = np.mean([lb,ub])
    CSFs[k] = 10**(-mid)

# Create DataFrame of col scaler
col_scaler = pd.DataFrame(data=CSFs, index=["val"]).transpose()
col_scaler.index.name = 'col'

# Create new matrix with scaled rows
new_matrix = matrix0.copy()
index_mod = new_matrix.index.get_level_values('col').isin(col_scaler.index)
new_matrix.loc[index_mod] = new_matrix.loc[index_mod].mul(col_scaler)

scalers = [RSFs,CSFs]
prescale_args = {}
for s in scalers:
    for k,v in s.items():
        if k == '_obj':
            k_ = "_obj.scale"
        elif k == 'constobj':
            k_ = "constobj.scale"
        else:
            k_ = k.replace("(", ".scale('")
            k_ = k_.replace(")", "')")
            k_ = k_.replace(",", "','")
        prescale_args.update({k_:v})

gams_args = (str(prescale_args)
             .replace('{','')
             .replace('}','')
             .replace('"','')
             .replace(':','=')
             .replace(', ',';\n')
             +';'
            )

In [11]:
col_scaler

Unnamed: 0_level_0,val
col,Unnamed: 1_level_1
"x(san-diego,new-york)",20000.0
"x(san-diego,topeka)",8451.542547


In [13]:
col_scaler2 = col_scaler.loc[[("x(san-diego,new-york)")]]
col_scaler2

Unnamed: 0_level_0,val
col,Unnamed: 1_level_1
"x(san-diego,new-york)",20000.0


In [20]:
final_scaler = col_scaler.mul(col_scaler2.reindex(col_scaler.index).fillna(1))
print(final_scaler)

                                val
col                                
x(san-diego,new-york)  4.000000e+08
x(san-diego,topeka)    8.451543e+03


In [None]:
report("Original value",matrix)
report("Row scaling val",matrix0)
report("New Matrix",new_matrix)

In [None]:
import numpy as np
import pandas as pd
from pyomo.environ import *
from datetime import datetime
import xarray as xr

import matplotlib.pyplot as plt

def showme(df):
    return df["val"].unstack()

In [None]:
# determine absolute bound of exponent in matrix coeff
bound = 4 

def solv(df, bound):
    """
    this function filters matrix coefficient
    dataframe in which the log of the coefficient 
    is lower or higher than the bound
    """
    df_solv = df.loc[(df["val"] >= bound) |
                     (df["val"] <= -bound)]
    return df_solv

def make_logdf(df):
    df.loc[df['val']!=0,'val'] = np.log10(np.absolute(df.loc[df['val']!=0,'val']))
    return df
    

# Load the whole matrix
matrix       = (pd.read_csv('data/matrix.csv')
               .set_index(['row','col'],drop=True)[['val']])

# calculate log base 10 of the absolute value of the matrix
log_absmatrix = matrix.copy()
log_absmatrix = make_logdf(log_absmatrix)

# Create matrix with small and large coefficient
log_absmatrix_solv  = solv(log_absmatrix,bound=bound)


**Start Looping**

In [None]:
# SCALE BY ROW
# Populating row scaler
RSFs = {row:[] for row in set(log_absmatrix_solv.index.get_level_values(0))-set(["_obj"])}
for k in RSFs.keys():
    rval = log_absmatrix.loc[(k),"val"]
    lb,ub = min(rval),max(rval)
    mid = np.mean([lb,ub])
    RSFs[k] = 10**(-mid)

# Create DataFrame of row scaler
row_scaler = pd.DataFrame(data=RSFs, index=["val"]).transpose()
row_scaler.index.name = 'row'

# Create new matrix with scaled rows
matrix0 = matrix.copy()
index_mod = matrix0.index.get_level_values('row').isin(row_scaler.index)

matrix0.loc[index_mod] = matrix0.loc[index_mod].mul(row_scaler)

In [None]:
# SCALE BY COL
# Populating col scaler
log_absmatrix0 = matrix0.copy()
log_absmatrix0 = make_logdf(log_absmatrix0)
log_absmatrix0_solv = solv(log_absmatrix0,bound=bound)

CSFs = {col:[] for col in set(log_absmatrix0_solv.index.get_level_values(1))-set(["constobj"])}
for k in CSFs.keys():
    cval = log_absmatrix0.loc[(log_absmatrix0.index.get_level_values('col') == k),"val"]
    lb,ub = min(cval),max(cval)
    mid = np.mean([lb,ub])
    CSFs[k] = 10**(-mid)

# Create DataFrame of col scaler
col_scaler = pd.DataFrame(data=CSFs, index=["val"]).transpose()
col_scaler.index.name = 'col'

# Create new matrix with scaled rows
new_matrix = matrix0.copy()
index_mod = new_matrix.index.get_level_values('col').isin(col_scaler.index)
new_matrix.loc[index_mod] = new_matrix.loc[index_mod].mul(col_scaler)

In [None]:
def report(text,df):
    log_absdf = df.copy()
    log_absdf.loc[log_absdf['val']!=0,'val']=(np.log10(
        np.absolute(
            log_absdf.loc[log_absdf['val']!=0,'val'])))
    
    print(f"{text}:","[",np.int32(np.min(log_absdf)),",",np.int32(np.max(log_absdf)),"]")

report("Original value",matrix)
report("Row scaling val",matrix0)
report("New Matrix",new_matrix)

**Modify below later**

In [None]:
scalers = [RSFs,CSFs]
prescale_args = {}
for s in scalers:
    for k,v in s.items():
        if k == '_obj':
            k_ = "_obj.scale"
        elif k == 'constobj':
            k_ = "constobj.scale"
        else:
            k_ = k.replace("(", ".scale('")
            k_ = k_.replace(")", "')")
            k_ = k_.replace(",", "','")
        prescale_args.update({k_:v})

gams_args = (str(prescale_args)
             .replace('{','')
             .replace('}','')
             .replace('"','')
             .replace(':','=')
             .replace(', ',';\n')
             +';'
            )

In [None]:
# print(gams_args)

In [None]:
# Printing stats

matrices_dict = {'Old Matrix':matrix,
                 'New Matrix':new_matrix,
                } 

for k,v in matrices_dict.items():
    print(f'{k} stats:')
    print('>> log10 coefficient range:',
          np.int32(np.log10(np.abs(v['val']).fillna(1)).min(axis=None)),'--',
          np.int32(np.log10(np.abs(v['val']).fillna(1)).max(axis=None)))
    print('')

# TO DO:
# 1. plot histogram of log10 distribution of those matrix


In [None]:
fig, axs = plt.subplots(nrows=2, sharex=True)
axs[0].hist(make_logdf(matrix))
axs[1].hist(make_logdf(new_matrix))
axs[0].set_title("Unscaled")
axs[1].set_title("Scaled")
plt.tight_layout()
plt.show()

# Finalized Prototype