# Set Up Basic Model
- Linear model with two features: Start temperature and Room temperature
- Split training vs test data set 80-20

In [1]:
import graphlab
import numpy as np

# read files
red_data = graphlab.SFrame.read_csv('thermodynamics.red.csv')

purple_data = graphlab.SFrame.read_csv('thermodynamics.purple.csv')

purple_data2 = graphlab.SFrame.read_csv('thermodynamics.purple.2.csv')

# convenient functions
squared = lambda x: x*x

def get_residual_sum_of_squares(model, data, outcome):
    return sum(map(squared, outcome - model.predict(data)))

# split data
red_train_data, red_test_data = red_data.random_split(.8,seed=0)
purple_train_data, purple_test_data = purple_data.random_split(.8,seed=0)
purple_train_data2, purple_test_data2 = purple_data2.random_split(.8,seed=0)

# set up features
thermo_features = ['Room temperature', 'Start temperature']

# train models
red_model = graphlab.linear_regression.create(red_train_data, target = 'End temperature', features = thermo_features, 
                                                  validation_set = red_test_data)
purple_model = graphlab.linear_regression.create(purple_train_data, target = 'End temperature', features = thermo_features, 
                                                  validation_set = purple_test_data)

purple_model2 = graphlab.linear_regression.create(purple_train_data2, target = 'End temperature', features = thermo_features, 
                                                  validation_set = purple_test_data2)

This non-commercial license of GraphLab Create for academic use is assigned to chen.jenhau@gmail.com and will expire on July 15, 2019.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1548651367.log


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,str,int,float,str,float,float,float,str,str,int,float,float,str,str,float,str,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,str,int,float,str,float,float,float,str,str,int,float,float,str,str,float,str,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,str,int,float,str,float,float,float,str,str,int,float,float,str,str,float,str,str,str,str,float,str,str,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


# Inspect Basic Model Coefficients

In [3]:
red_weight_summary = red_model.get("coefficients")
purple_weight_summary = purple_model.get("coefficients")
purple_weight_summary2 = purple_model2.get("coefficients")
#purple_weight_summary_sub = purple_model_sub.get("coefficients")


rss_red = get_residual_sum_of_squares(red_model, red_test_data, red_test_data['End temperature'])
rss_purple = get_residual_sum_of_squares(purple_model, purple_test_data, purple_test_data['End temperature'])
rss_purple2 = get_residual_sum_of_squares(purple_model2, purple_test_data2, purple_test_data2['End temperature'])

print red_weight_summary, purple_weight_summary, purple_weight_summary2

+-------------------+-------+----------------+-----------------+
|        name       | index |     value      |      stderr     |
+-------------------+-------+----------------+-----------------+
|    (intercept)    |  None | 5.87399007861  |  3.42799076059  |
|  Room temperature |  None | 0.305581895198 |  0.163649625722 |
| Start temperature |  None | 0.66606805281  | 0.0323260449324 |
+-------------------+-------+----------------+-----------------+
[3 rows x 4 columns]
 +-------------------+-------+----------------+-----------------+
|        name       | index |     value      |      stderr     |
+-------------------+-------+----------------+-----------------+
|    (intercept)    |  None | 16.9435004253  |  6.39255614494  |
|  Room temperature |  None | 0.442315493843 |  0.25605488031  |
| Start temperature |  None | 0.474393946704 | 0.0596230848109 |
+-------------------+-------+----------------+-----------------+
[3 rows x 4 columns]
 +-------------------+-------+----------------+

# Use Basic Model Coefficients for Predictions
- Set room temperature to 20 and start temperature to 60, 70, and 75

## Red Model Predictions

In [27]:
r_w0 = red_weight_summary[0]['value']
r_w1 = red_weight_summary[1]['value']
r_w2 = red_weight_summary[2]['value']

print '%5.2f' % r_w0, '+', '%4.2f' % r_w1, 'x room_temperature', '+', '%4.2f' % r_w2, 'x starting_temperature'

print 'prediction for red @ 60 ->', '%4.2f' % (r_w0 + r_w1*20 + r_w2*60)

print 'prediction for red @ 70 ->', '%4.2f' % (r_w0 + r_w1*20 + r_w2*70)

print 'prediction for red @ 75 ->', '%4.2f' % (r_w0 + r_w1*20 + r_w2*75)

 5.87 + 0.31 x room_temperature + 0.67 x starting_temperature
prediction for red @ 60 -> 51.95
prediction for red @ 70 -> 58.61
prediction for red @ 75 -> 61.94


## Purple Model Predictions

In [29]:
p_w0 = purple_weight_summary[0]['value']
p_w1 = purple_weight_summary[1]['value']
p_w2 = purple_weight_summary[2]['value']

print '%5.2f' % p_w0, '+', '%4.2f' % p_w1, 'x room_temperature', '+', '%4.2f' % p_w2, 'x starting_temperature'

print 'prediction for purple @ 60 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*60)

print 'prediction for purple @ 70 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*70)

print 'prediction for purple @ 75 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*75)


p_w0 = purple_weight_summary_sub[0]['value']
p_w1 = purple_weight_summary_sub[1]['value']
p_w2 = purple_weight_summary_sub[2]['value']
p_w3 = purple_weight_summary_sub[3]['value']

print '%5.2f' % p_w0, '+', '%4.2f' % p_w1, 'x room_temperature', '+', '%4.2f' % p_w2, 'x starting_temperature', '+', '%4.2f' % p_w3, 'x (starting_temperature - room_temperature)',

print 'prediction for purple @ 60 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*60 + p_w3*(60-20))

print 'prediction for purple @ 70 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*70 + p_w3*(70-20))

print 'prediction for purple @ 75 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*75 + p_w3*(75-20))

16.94 + 0.44 x room_temperature + 0.47 x starting_temperature
prediction for purple @ 60 -> 54.25
prediction for purple @ 70 -> 59.00
prediction for purple @ 75 -> 61.37
13.96 + 0.64 x room_temperature + 0.22 x starting_temperature + 0.33 x (starting_temperature - room_temperature) prediction for purple @ 60 -> 53.43
prediction for purple @ 70 -> 58.98
prediction for purple @ 75 -> 61.76


In [4]:
p2_w0 = purple_weight_summary2[0]['value']
p2_w1 = purple_weight_summary2[1]['value']
p2_w2 = purple_weight_summary2[2]['value']

print '%5.2f' % p2_w0, '+', '%4.2f' % p2_w1, 'x room_temperature', '+', '%4.2f' % p2_w2, 'x starting_temperature'

print 'prediction for purple @ 60 ->', '%4.2f' % (p2_w0 + p2_w1*20 + p2_w2*60)

print 'prediction for purple @ 70 ->', '%4.2f' % (p2_w0 + p2_w1*20 + p2_w2*70)

print 'prediction for purple @ 75 ->', '%4.2f' % (p2_w0 + p2_w1*20 + p2_w2*75)


#p_w0 = purple_weight_summary_sub[0]['value']
#p_w1 = purple_weight_summary_sub[1]['value']
#p_w2 = purple_weight_summary_sub[2]['value']
#p_w3 = purple_weight_summary_sub[3]['value']

#print '%5.2f' % p_w0, '+', '%4.2f' % p_w1, 'x room_temperature', '+', '%4.2f' % p_w2, 'x starting_temperature', '+', '%4.2f' % p_w3, 'x (starting_temperature - room_temperature)',

#print 'prediction for purple @ 60 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*60 + p_w3*(60-20))

#print 'prediction for purple @ 70 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*70 + p_w3*(70-20))

#print 'prediction for purple @ 75 ->', '%4.2f' % (p_w0 + p_w1*20 + p_w2*75 + p_w3*(75-20))

12.72 + 0.50 x room_temperature + 0.52 x starting_temperature
prediction for purple @ 60 -> 53.94
prediction for purple @ 70 -> 59.14
prediction for purple @ 75 -> 61.74


# Train Three Different New Models for Red and Purple Devices

## Red Device
- model red_model_sub with features, thermo_feature_sub; add a new feature start_sub_rm = Start temperature - Room temperature
- model red_model_ln with features, thermo_feature_ln; thermo_feature_ln: add a new feature ln_start = ln(Start temperature)
- model red_model_ln_sub with features, thermo_feature_ln_sub; thermo_feature_ln_sub: add a new feature ln_start_sub_rm = ln(Start temperature - Room temperature)

In [5]:
red_train_data['start_sub_rm'] = map(lambda x, y: x-y, red_train_data['Start temperature'], red_train_data['Room temperature'])
red_train_data['ln_start_sub_rm'] = map(lambda x, y: np.log(x-y), red_train_data['Start temperature'], red_train_data['Room temperature'])
red_train_data['ln_start'] = map(lambda x: np.log(x), red_train_data['Start temperature'])


red_test_data['start_sub_rm'] = map(lambda x, y: x-y, red_test_data['Start temperature'], red_test_data['Room temperature'])
red_test_data['ln_start_sub_rm'] = map(lambda x, y: np.log(x-y), red_test_data['Start temperature'], red_test_data['Room temperature'])
red_test_data['ln_start'] = map(lambda x: np.log(x), red_test_data['Start temperature'])

thermo_features_sub = thermo_features + ['start_sub_rm']

red_model_sub = graphlab.linear_regression.create(red_train_data, target = 'End temperature', features = thermo_features_sub, 
                                                  validation_set = red_test_data)

rss_red_sub = get_residual_sum_of_squares(red_model_sub, red_test_data, red_test_data['End temperature'])

thermo_features_ln = thermo_features + ['ln_start']

red_model_ln = graphlab.linear_regression.create(red_train_data, target = 'End temperature', features = thermo_features_ln, 
                                                  validation_set = red_test_data)

rss_red_ln = get_residual_sum_of_squares(red_model_ln, red_test_data, red_test_data['End temperature'])

thermo_features_ln_sub = thermo_features + ['ln_start_sub_rm']

red_model_ln_sub = graphlab.linear_regression.create(red_train_data, target = 'End temperature', features = thermo_features_ln_sub, 
                                                  validation_set = red_test_data)

rss_red_ln_sub = get_residual_sum_of_squares(red_model_ln_sub, red_test_data, red_test_data['End temperature'])

# Compare RSS for Red Device Models

In [6]:
print '%4.2f' % rss_red, '%4.2f' % rss_red_sub, '%4.2f' % rss_red_ln, '%4.2f' % rss_red_ln_sub

0.83 0.84 0.83 0.83


## Purple Device
- model purple_model_sub with features, thermo_feature_sub; add a new feature start_sub_rm = Start temperature - Room temperature
- model purple_model_ln with features, thermo_feature_ln; thermo_feature_ln: add a new feature ln_start = ln(Start temperature)
- model purple_model_ln_sub with features, thermo_feature_ln_sub; thermo_feature_ln_sub: add a new feature ln_start_sub_rm = ln(Start temperature - Room temperature)

In [7]:
purple_train_data['start_sub_rm'] = map(lambda x, y: x-y, purple_train_data['Start temperature'], purple_train_data['Room temperature'])
purple_train_data['ln_start_sub_rm'] = map(lambda x, y: np.log(x-y), purple_train_data['Start temperature'], purple_train_data['Room temperature'])
purple_train_data['ln_start'] = map(lambda x: np.log(x), purple_train_data['Start temperature'])


purple_test_data['start_sub_rm'] = map(lambda x, y: x-y, purple_test_data['Start temperature'], purple_test_data['Room temperature'])
purple_test_data['ln_start_sub_rm'] = map(lambda x, y: np.log(x-y), purple_test_data['Start temperature'], purple_test_data['Room temperature'])
purple_test_data['ln_start'] = map(lambda x: np.log(x), purple_test_data['Start temperature'])

thermo_features_sub = thermo_features + ['start_sub_rm']

purple_model_sub = graphlab.linear_regression.create(purple_train_data, target = 'End temperature', features = thermo_features_sub, 
                                                  validation_set = purple_test_data)

rss_purple_sub = get_residual_sum_of_squares(purple_model_sub, purple_test_data, purple_test_data['End temperature'])

thermo_features_ln = thermo_features + ['ln_start']

purple_model_ln = graphlab.linear_regression.create(purple_train_data, target = 'End temperature', features = thermo_features_ln, 
                                                  validation_set = purple_test_data)

rss_purple_ln = get_residual_sum_of_squares(purple_model_ln, purple_test_data, purple_test_data['End temperature'])

thermo_features_ln_sub = thermo_features + ['ln_start_sub_rm']

purple_model_ln_sub = graphlab.linear_regression.create(purple_train_data, target = 'End temperature', features = thermo_features_ln_sub, 
                                                  validation_set = purple_test_data)

rss_purple_ln_sub = get_residual_sum_of_squares(purple_model_ln_sub, purple_test_data, purple_test_data['End temperature'])

# Compare RSS for Purple Device Models

In [8]:
print '%4.2f' % rss_purple, '%4.2f' % rss_purple_sub, '%4.2f' % rss_purple_ln, '%4.2f' % rss_purple_ln_sub

0.25 0.35 0.26 0.26


# Max-Error and RMSE for Red Device Models

In [24]:
result_basic = red_model.evaluate(red_test_data)
print 'max-error: %4.2f' % result_basic['max_error'], 'rmse: %4.2f' % result_basic['rmse']

result_basic_sub = red_model_sub.evaluate(red_test_data)
print 'max-error: %4.2f' % result_basic_sub['max_error'], 'rmse: %4.2f' % result_basic_sub['rmse']

result_basic_ln = red_model_ln.evaluate(red_test_data)
print 'max-error: %4.2f' % result_basic_ln['max_error'], 'rmse: %4.2f' % result_basic_ln['rmse']

result_basic_ln_sub = red_model_ln_sub.evaluate(red_test_data)
print 'max-error: %4.2f' % result_basic_ln_sub['max_error'], 'rmse: %4.2f' % result_basic_ln_sub['rmse']

max-error: 0.67 rmse: 0.46
max-error: 0.83 rmse: 0.46
max-error: 0.68 rmse: 0.46
max-error: 0.69 rmse: 0.45


# Max-Error and RMSE for Purple Device Models

In [25]:
result_basic = purple_model.evaluate(red_test_data)
print 'max-error: %4.2f' % result_basic['max_error'], 'rmse: %4.2f' % result_basic['rmse']

result_basic_sub = purple_model_sub.evaluate(red_test_data)
print 'max-error: %4.2f' % result_basic_sub['max_error'], 'rmse: %4.2f' % result_basic_sub['rmse']

result_basic_ln = purple_model_ln.evaluate(red_test_data)
print 'max-error: %4.2f' % result_basic_ln['max_error'], 'rmse: %4.2f' % result_basic_ln['rmse']

result_basic_ln_sub = purple_model_ln_sub.evaluate(red_test_data)
print 'max-error: %4.2f' % result_basic_ln_sub['max_error'], 'rmse: %4.2f' % result_basic_ln_sub['rmse']

max-error: 1.33 rmse: 1.01
max-error: 1.00 rmse: 0.79
max-error: 1.31 rmse: 0.99
max-error: 1.27 rmse: 0.97
