In [4]:
# import numpy and pandas, and DataFrame / Series
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
# Set some pandas options
pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# And some items for matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import logprocessing as lp
import io
from IPython.display import display, HTML
import statsmodels.formula.api as smf

In [6]:
meta =  lp.MetadataBuilder().load_cpu_cores(32).build()

query = {
        "header": "timestamp cpu1 io_write io_read net_down net_up memory power",
        "columns": "cpu0.timestamp, 100 - cpu0.idle, io.writet, io.readt, net.download, net.upload, memory.used, mean( energy.power_active )"
} 

#create alias for function
experiment = lambda x : (lp.get_dataframe_from_folder(
    "/home/danilo/Dropbox/Workspace/Experiments0503/" + x, meta, query))
    

In [7]:
df = experiment("f22/")
df["io"] = df["io_read"] + df["io_write"]
mod = smf.ols(formula='power ~ cpu1 + io', data=df)
res = mod.fit()

print(df.shape[0])

1800


In [15]:
def apply_regression( dir1, dir2, formula):
    dff21 = experiment(dir1)
    dff22 = experiment(dir2)

    dfjoint = pd.DataFrame()

    dfjoint["power"] = dff21["power"] + dff22["power"]
    dfjoint["cpu1_f21"] = dff21["cpu1"]
    dfjoint["cpu1_f22"] = dff22["cpu1"]
    dfjoint["io_f21"] = dff21["io_read"] + dff21["io_write"]
    dfjoint["io_f22"] = dff22["io_read"] + dff22["io_write"]

    mod = smf.ols(formula = formula , data=dfjoint)
    res = mod.fit()

    return res
    
r = apply_regression( "f21/one_server", "f22/one_server", 'power ~ cpu1_f21 + cpu1_f22 + io_f21 + io_f22')

print(r.summary())
print(r.rsquared)

                            OLS Regression Results                            
Dep. Variable:                  power   R-squared:                       0.702
Model:                            OLS   Adj. R-squared:                  0.701
Method:                 Least Squares   F-statistic:                     528.3
Date:                Mon, 12 Mar 2018   Prob (F-statistic):          8.02e-234
Time:                        11:45:45   Log-Likelihood:                -2496.7
No. Observations:                 900   AIC:                             5003.
Df Residuals:                     895   BIC:                             5027.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    150.3122      0.941    159.735      0.0

In [23]:
r1 = apply_regression( "f21/one_server", "f22/one_server", 'power ~ cpu1_f21 + cpu1_f22 + io_f21 + io_f22')
r2 = apply_regression( "f21/two_servers", "f22/two_servers", 'power ~ cpu1_f21 + cpu1_f22 + io_f21 + io_f22')
r3 = apply_regression( "f21", "f22", 'power ~ cpu1_f21 + cpu1_f22 + io_f21 + io_f22')

df_singlemachine = experiment( "f21/one_server/")

mod = smf.ols(formula='power ~ cpu1', data=df_singlemachine)
r4 = mod.fit()

df = pd.DataFrame({
    'R squared' : pd.Series([ r.rsquared for r in [r1, r2, r3, r4]], index=['one server', 'two servers', 'both', 'single machine'])
})

df

Unnamed: 0,R squared
one server,0.702469
two servers,0.804604
both,0.699668
single machine,0.876181
