# MESSAGE-ix matrix improvement tool

This jupyter notebook is a prototype of the MESSAGE-ix matrix improvement tool.
This tool is aimed to automatically improve and optimize coefficient matrix quality for a MESSAGE-ix scenario and the used to return the results from scaled MESSAGE-ix matrix to the originally intended values.

This tool is derived from the tool developed by Makowski & Sosnowski, 1981 (https://pure.iiasa.ac.at/id/eprint/1766/1/CP-81-037.pdf)

According to Curtis and Reid (1972), matrix A can be described as well-scaled if:

$
\sum_{i} \sum_{j} \ (log_{10} \ |a_{i,j}|)^2 \leq v \qquad \qquad Eq. 1
$

with $v$ is an acceptable matrix quality criteria.
If $ax_{i,j}$ is $(log_{10} |a_{i,j}|)^2$ where $a_{i,j}$ is a non-zero value, matrix $Ax$ is:

In [1]:
import numpy as np
import pandas as pd
from pyomo.environ import *
from datetime import datetime
import xarray as xr

import matplotlib.pyplot as plt

def showme(df):
    return df["val"].unstack()

In [2]:
# Load the whole matrix
matrix       = (pd.read_csv('data/matrix_transport.csv')
               .set_index(['row','col'],drop=True)[['val']])
# Load and create matrix with small and large coefficient
matrix_up    = (pd.read_csv('data/matrix_transport_prob_upper.csv')
               .set_index(['row','col'],drop=True)[['val']])
matrix_lo    = (pd.read_csv('data/matrix_transport_prob_lower.csv')
               .set_index(['row','col'],drop=True)[['val']])
matrix_solv  = pd.concat([matrix_lo,matrix_up])


  matrix_solv  = pd.concat([matrix_lo,matrix_up])


In [3]:
matrix_solv

Unnamed: 0_level_0,Unnamed: 1_level_0,val
row,col,Unnamed: 2_level_1
_obj,"x(san-diego,new-york)",2.5e-09
_obj,"x(san-diego,topeka)",1.4e-08


In [4]:
# calculate log base 10 of the absolute value of the matrix
log_absmatrix = matrix.copy()
log_absmatrix.loc[log_absmatrix['val']!=0,'val']=(np.log10(
    np.absolute(
        log_absmatrix.loc[log_absmatrix['val']!=0,'val'])))
#log_absmatrix = log_absmatrix.astype(int)

# calculate log base 10 of the absolute value of the matrix
log_absmatrix_solv = matrix_solv.copy()
log_absmatrix_solv.loc[log_absmatrix_solv['val']!=0,'val']=(np.log10(
    np.absolute(
        log_absmatrix_solv.loc[log_absmatrix_solv['val']!=0,'val'])))
#log_absmatrix_solv = log_absmatrix_solv.astype(int)

## Start Looping

In [5]:
# SCALE BY ROW
# Populating row scaler
RSFs = {row:[] for row in set(log_absmatrix_solv.index.get_level_values(0))}
for k in RSFs.keys():
    rval = log_absmatrix.loc[(k),"val"]
    lb,ub = min(rval),max(rval)
    mid = np.mean([lb,ub])
    RSFs[k] = 10**(-mid)

# Create DataFrame of row scaler
row_scaler = pd.DataFrame(data=RSFs, index=["val"]).transpose()
row_scaler.index.name = 'row'

# Create new matrix with scaled rows
matrix0 = matrix.copy()
matrix0.loc[row_scaler.index] = matrix0.loc[row_scaler.index].mul(row_scaler)

In [6]:
# SCALE BY COL
# Populating col scaler
log_absmatrix0 = np.log10(matrix0)
log_absmatrix0_solv = log_absmatrix0.loc[(log_absmatrix0["val"] >= 2) | 
                                         (log_absmatrix0["val"] <= -2)]

CSFs = {col:[] for col in set(log_absmatrix0_solv.index.get_level_values(1))}
for k in CSFs.keys():
    cval = log_absmatrix0.loc[(log_absmatrix0.index.get_level_values('col') == k),"val"]
    lb,ub = min(cval),max(cval)
    mid = np.mean([lb,ub])
    CSFs[k] = 10**(-mid)

# Create DataFrame of col scaler
col_scaler = pd.DataFrame(data=CSFs, index=["val"]).transpose()
col_scaler.index.name = 'col'

# Create new matrix with scaled rows
new_matrix = matrix0.copy()
new_matrix.loc[(new_matrix.index.get_level_values('col').isin(list(col_scaler.index)))] = new_matrix.loc[(new_matrix.index.get_level_values('col').isin(list(col_scaler.index)))].mul(col_scaler)

In [7]:
def report(text,df):
    log_absdf = df.copy()
    log_absdf.loc[log_absdf['val']!=0,'val']=(np.log10(
        np.absolute(
            log_absdf.loc[log_absdf['val']!=0,'val'])))
    print(f"{text}:","[",np.int32(np.min(log_absdf)),",",np.int32(np.max(log_absdf)),"]")

report("Original value",matrix)
report("Row scaling val",matrix0)
report("New Matrix",new_matrix)

Original value: [ -8 , 0 ]
Row scaling val: [ -4 , 4 ]
New Matrix: [ -2 , 2 ]


In [8]:
showme(matrix)

col,constobj,"x(san-diego,chicago)","x(san-diego,new-york)","x(san-diego,topeka)","x(seattle,chicago)","x(seattle,new-york)","x(seattle,topeka)"
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
_obj,1.0,1.8,2.5e-09,1.4e-08,1.7,2.5,1.8
demand(chicago),,1.0,,,1.0,,
demand(new-york),,,1.0,,,1.0,
demand(topeka),,,,1.0,,,1.0
supply(san-diego),,1.0,1.0,1.0,,,
supply(seattle),,,,,1.0,1.0,1.0


In [9]:
showme(matrix0)

col,constobj,"x(san-diego,chicago)","x(san-diego,new-york)","x(san-diego,topeka)","x(seattle,chicago)","x(seattle,new-york)","x(seattle,topeka)"
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
_obj,12649.110641,22768.399153,3.2e-05,0.000177,21503.488089,31622.776602,22768.399153
demand(chicago),,1.0,,,1.0,,
demand(new-york),,,1.0,,,1.0,
demand(topeka),,,,1.0,,,1.0
supply(san-diego),,1.0,1.0,1.0,,,
supply(seattle),,,,,1.0,1.0,1.0


In [10]:
showme(new_matrix)

col,constobj,"x(san-diego,chicago)","x(san-diego,new-york)","x(san-diego,topeka)","x(seattle,chicago)","x(seattle,new-york)","x(seattle,topeka)"
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
_obj,1.0,150.892012,0.005623,0.013307,146.640677,177.827941,150.892012
demand(chicago),,0.006627,,,0.006819,,
demand(new-york),,,177.827941,,,0.005623,
demand(topeka),,,,75.14602,,,0.006627
supply(san-diego),,0.006627,177.827941,75.14602,,,
supply(seattle),,,,,0.006819,0.005623,0.006627


In [12]:
row_scaler

Unnamed: 0_level_0,val
row,Unnamed: 1_level_1
_obj,12649.110641


In [11]:
col_scaler

Unnamed: 0_level_0,val
col,Unnamed: 1_level_1
"x(san-diego,topeka)",75.14602
constobj,7.9e-05
"x(seattle,chicago)",0.006819
"x(san-diego,chicago)",0.006627
"x(san-diego,new-york)",177.827941
"x(seattle,new-york)",0.005623
"x(seattle,topeka)",0.006627


## Modify below later

np.log10(524288.0)