In [1]:
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install openpyxl
import numpy as np
import pandas as pd
flowdata = pd.read_excel(r'./flowdata.xlsx', sheet_name='Scenario1')
flowdata = flowdata[['KTB', 'KTC', 'SKB', 'SKC']] #ensure columns ordering is correct



**Equation**: (w)(KTB) + (x)(KTC) - (y)(SKB) - (z)(SKC) = KTB + KTC - SKB - SKC
- Each set of flow values observed for KTB, KTC, SKB, SKC can be geometrically represented as a line in 4-dimensional space mapped by w, x, y, z
- By iterating through *test_values* for w, x, y, z, we find the point (w,x,y,z) that minimises the sum of the distance between (w,x,y,z) and all the lines
- Find *num_results* number of sample points that provide the *num_results* lowest sum of distances
- Each of w, x, y, z is equal to E/1+E where E is the actual flowmeter error

In [2]:
flowdata

Unnamed: 0,KTB,KTC,SKB,SKC
0,0,390.7,0,391.7
1,0,388.2,0,388.4
2,0,408.9,0,407.2
3,0,413.1,0,407.5
4,0,408.3,0,400.8
5,0,405.5,0,399.1
6,0,394.6,0,386.4
7,0,407.0,0,399.6
8,0,362.8,0,355.7
9,0,408.5,0,404.9


In [3]:
num_results = 10
#introduce offset to ensure that every value is unique
results_array = [[0, 0, 0, 0, 999999 - row_num] for row_num in range(num_results)]
results = pd.DataFrame(results_array, columns=['E1 (%)', 'E2(%)', 'E3(%)', 'E4(%)', 'Sum of Distances'])

In [4]:
test_values = np.arange(-10,10)/1000 # -2% to +2% at intervals of 0.1%
flowdata_temp = flowdata
# change the sign of SKB and SKC columns for simpler arithmetic
flowdata_temp['SKB'] *= -1
flowdata_temp['SKC'] *= -1
flowdata_array = flowdata_temp.to_numpy()
flowdata_array

array([[   0. ,  390.7,    0. , -391.7],
       [   0. ,  388.2,    0. , -388.4],
       [   0. ,  408.9,    0. , -407.2],
       [   0. ,  413.1,    0. , -407.5],
       [   0. ,  408.3,    0. , -400.8],
       [   0. ,  405.5,    0. , -399.1],
       [   0. ,  394.6,    0. , -386.4],
       [   0. ,  407. ,    0. , -399.6],
       [   0. ,  362.8,    0. , -355.7],
       [   0. ,  408.5,    0. , -404.9],
       [   0. ,  407.1,    0. , -398.6],
       [   0. ,  407.6,    0. , -401.4],
       [   0. ,  408.5,    0. , -402.1],
       [   0. ,  408.6,    0. , -409.3]])

In [5]:
for E1 in test_values:
    w = E1/(1+E1)
    for E2 in test_values:
        x = E2/(1+E2)
        for E3 in test_values:
            y = E3/(1+E3)
            for E4 in test_values:
                z = E4/(1+E4)
                sum_of_dist = 0
                for params in flowdata_array:
                    m = sum(params)
                    dist = abs(np.dot(params, [w,x,y,z] - m))/np.linalg.norm(params)
                    sum_of_dist += dist
                if sum_of_dist < results['Sum of Distances'].max():
                    row_to_replace = results['Sum of Distances'].idxmax()
                    results.loc[row_to_replace] = [100*E1,100*E2,100*E3,100*E4,sum_of_dist]

In [6]:
results.sort_values(by='Sum of Distances')
results

Unnamed: 0,E1 (%),E2(%),E3(%),E4(%),Sum of Distances
0,-0.01,0.009,-0.01,-0.01,0.740961
1,-0.01,0.009,-0.009,-0.01,0.740961
2,-0.01,0.009,-0.008,-0.01,0.740961
3,-0.01,0.009,-0.007,-0.01,0.740961
4,-0.01,0.009,-0.006,-0.01,0.740961
5,-0.01,0.009,-0.005,-0.01,0.740961
6,-0.01,0.009,-0.004,-0.01,0.740961
7,-0.01,0.009,-0.003,-0.01,0.740961
8,-0.01,0.009,-0.002,-0.01,0.740961
9,-0.01,0.009,-0.001,-0.01,0.740961
