In [156]:
import pandas as pd
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

plt.rcParams.update({'font.size': 14, 'xtick.top': True, 'ytick.right': True, 'xtick.direction': 'in', 'ytick.direction': 'in'})

In [157]:
# Load the data 
# Time that the boiler is on, the minimum temperature before and the maximum temperature after heating

df = pd.read_csv("hot_water_data.csv")
df['duration'] = df['duration'] / 60.0 #convert seconds to minutes
df.head(10)

Unnamed: 0,duration,temp_before,temp_after
0,18.233333,21.0,34.75
1,13.85,24.5,34.0
2,22.866667,20.0,33.0
3,13.9,24.5,34.5
4,102.233333,30.75,39.5
5,17.866667,23.25,33.5
6,25.033333,20.0,31.25
7,16.0,30.0,34.25
8,0.516667,28.75,34.5
9,66.45,28.75,42.0


In [158]:
from lmfit import minimize, Parameters, fit_report, Model

from sklearn.metrics import mean_squared_error

In [159]:
%matplotlib notebook

In [160]:
df.head()

Unnamed: 0,duration,temp_before,temp_after
0,18.233333,21.0,34.75
1,13.85,24.5,34.0
2,22.866667,20.0,33.0
3,13.9,24.5,34.5
4,102.233333,30.75,39.5


In [195]:
# multivariate linear regression. 

def modFn(x, a1, a2, a3,  b1, b2, b3, offset):
    out = a1*x['duration'] + a2*x['duration']**2 + a3*x['duration']**3 
    out += b1*x['temp_before'] + b2*x['temp_before']**2 + b3*x['temp_before']**3
    out += offset
    return out

p=Parameters()
p.add('a1', vary=True, value=0)
p.add('a2', vary=True, value=0)
p.add('a3', vary=True, value=0)
p.add('b1', vary=True, value=0)
p.add('b2', vary=True, value=0)
p.add('b3', vary=False, value=0)
p.add('offset', vary=False, value=0)

mod = Model(modFn)

result = mod.fit(df['temp_after'], p, x=df[['duration', 'temp_before']])
p = result.params


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

#plot data
ax.scatter(df['duration'], df['temp_before'], df['temp_after'], s=10 )

#predict surface
X,Y = np.meshgrid(np.linspace(0,30,20), np.linspace(15,30,20))
A = pd.DataFrame(np.array([X.flatten(), Y.flatten()]).T, columns=['duration','temp_before'])  
Z = mod.eval(p, x=A)
Z = np.reshape(np.array(Z), np.shape(X))
ax.plot_surface(X,Y,Z,  alpha=0.2)



ax.set_xlim(0,30)
ax.set_ylim(15,35)
ax.set_zlim(0,45)

ax.set_xlabel('Duration')
ax.set_zlabel('temp_after')
ax.set_ylabel('temp_before')

plt.show()

print "mean squared error",  mean_squared_error(df['temp_after'], mod.eval(p, x=df[['duration', 'temp_before']]))
result


<IPython.core.display.Javascript object>

mean squared error 3.7211930668636732


0,1,2
fitting method,leastsq,
# function evals,31,
# data points,460,
# variables,5,
chi-square,1711.74881,
reduced chi-square,3.76208530,
Akaike info crit.,614.460394,
Bayesian info crit.,635.116526,

name,value,standard error,relative error,initial value,min,max,vary
a1,0.86721303,0.04043885,(4.66%),0,-inf,inf,True
a2,-0.01711992,0.0015312,(8.94%),0,-inf,inf,True
a3,9.6355e-05,1.2104e-05,(12.56%),0,-inf,inf,True
b1,1.69338586,0.04415034,(2.61%),0,-inf,inf,True
b2,-0.02621674,0.00142245,(5.43%),0,-inf,inf,True
b3,0.0,0.0,,0,-inf,inf,False
offset,0.0,0.0,,0,-inf,inf,False

0,1,2
a2,a3,-0.982
b1,b2,-0.9787
a1,a2,-0.9459
a1,a3,0.8888
a1,b1,-0.6255
a1,b2,0.4844
a2,b1,0.4755
a3,b1,-0.3958
a2,b2,-0.3521
a3,b2,0.2809
