In [1]:
import numpy as np
import utils as ut
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score

In [2]:
X, Y = ut.read_data('data/perf_monitor.csv', x_cols=3)
# It is a good idea to print a few numbers to validate that data has been read correctly 
print(X[0:10])
print(Y[0:10])

[[  6674.43747787 596477.83606076 605987.55400577]
 [  5057.61630045 449272.85318218 454768.18284612]
 [  7350.52210752 261979.11486909 266287.50287858]
 [  6346.50347689 547749.21004677 553423.50600704]
 [  4938.26212291 277600.20799442 283022.79672739]
 [  5796.37480454 358519.43234887 362769.55882072]
 [  7603.06141845 553527.52243895 559967.64223449]
 [  7755.414491   343169.3532111  346909.18546354]
 [  6367.5293929  567974.15201452 579111.58713347]
 [  5387.89076134 248505.24848966 252373.59540343]]
[8476.3034388  6399.60092562 9339.50396982 8015.55661059 6293.66027726
 7331.61544067 9614.63753402 9800.15323156 8115.73318649 6839.97112985]


In [4]:
# This is the code to calculate the coefficients for any arbitrary function. 

def calculate_coeff(x_values, y_values, func):
    params = curve_fit(func, xdata = x_values.transpose(), ydata = y_values)
    coeff = params[0]
    pred = [func(x, *coeff) for x in x_values]
    score = r2_score(y_values, np.array(pred))
    return score, coeff

In [5]:
# Let us define function1 which takes unknown parameters alpha and beta to fit the data

def function1(data, alpha, beta):
    duration = data[0]
    bytes_xmit = data[1]
    bytes_xfer = data[2]
    loss_rate = (bytes_xmit - bytes_xfer)/bytes_xmit
    return duration *(1+loss_rate)*alpha + beta

# Let us fit the arbitrary function to the available data 
score, coeff = calculate_coeff(X, Y, function1)
print(f'R2 Score {score}')
print(f'Coefficients {coeff}')

R2 Score 0.9992231521951552
Coefficients [ 1.28270052 24.0359746 ]


In [6]:
# And we can also try another function with four different coefficients. 

def function2(data, alpha, beta, gamma, delta):
    duration = data[0]
    bytes_xmit = data[1]
    bytes_xfer = data[2]
    return duration * alpha + bytes_xmit * beta + bytes_xfer*gamma + delta

score, coeff = calculate_coeff(X, Y, function2)
print(f'R2 Score {score}')
print(f'Coefficients {coeff}')

R2 Score 0.999983210436721
Coefficients [ 1.26843857 -0.01638254  0.01613461  3.87681967]
