In [3]:
#Importing all necessary libraries
import h2o
import pandas as pd
import numpy as np
from h2o.estimators.gbm import H2OGradientBoostingEstimator

# Step 1 -Nutrition Level Pricing Dataset
id=range(1,1001)
calories=np.random.uniform(50,161,1000).astype('int')
protein=np.random.uniform(1,7,1000).astype('int')
fat=np.random.uniform(0,6,1000).astype('int')
vitamins=np.random.uniform(0,101,1000).astype('int')
fibre=np.random.uniform(0,15,1000).astype('int')
price=((calories*50)+(fat*20)+(vitamins*80)+(fibre*100)+(protein*60))/100
data={'id':list(id),
      'calories':list(calories),
      'protein':list(protein),
      'fat':list(fat),
      'vitamins':list(vitamins),
      'fibre':list(fibre),
      'price':list(price)}
dataset=pd.DataFrame(data,index=data['id'])
del dataset['id']
print("Dataset sample is ")
print(dataset.head())

#Step 2 - started h2o and imported dataset into h2o frame
h2o.init()
H2O_dataset=h2o.H2OFrame(dataset,destination_frame='nutrition_price')
H2O_dataset.summary()
#Step 3 - Splitting h2o dataset into train,valid and test sets
train,valid,test= H2O_dataset.split_frame(ratios=[0.79,0.1],
                                          destination_frames=['nutrition_test','nutrition_valid','nutrition_test'],
                                         seed=123)
print("Train record number : ",train.nrows)
print("Test record number : ",test.nrows)
print("Validation record number : ",valid.nrows)

#Step 4 - Train th model with the dataset and generating the mean absolute error on train,test and valid dataset 
target="price"
features=[i for i in train.names if i not in ["id","price"]]
print("\nFirst Model")
ml=H2OGradientBoostingEstimator(model_id="NutritionPricing")
ml.train(features,target,train,validation_frame=valid)
print("Train MAE : ",ml.mae(train=True))
print("Valid MAE : ",ml.mae(valid=True))
perf=ml.model_performance(test)
print("Test MAE : ",perf.mae())

#Step 5 - Changing the model to overfit the dataset
print("\nSecond Model")
ml2=H2OGradientBoostingEstimator(model_id="NutritionPricing_Overfitting",ntrees=1000,max_depth=10)
ml2.train(features,target,train,validation_frame=valid)
print("Train MAE : ",ml.mae(train=True),"--->",ml2.mae(train=True))
print("Valid MAE : ",ml.mae(valid=True),"--->",ml2.mae(valid=True))
perf2=ml2.model_performance(test)
print("Test MAE : ",perf.mae(),"--->",perf2.mae())

#Step 6 - Closing H2O 
h2o.cluster().shutdown(prompt=True)

Dataset sample is 
   calories  protein  fat  vitamins  fibre  price
1       160        4    1        52      6  130.2
2        58        6    0        30      7   63.6
3        59        1    3        41     14   77.5
4        82        4    0         4      2   48.6
5       137        2    1        78      7  139.3
Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O cluster uptime:,1 min 41 secs
H2O cluster timezone:,Asia/Kolkata
H2O data parsing timezone:,UTC
H2O cluster version:,3.22.1.1
H2O cluster version age:,1 month and 27 days
H2O cluster name:,H2O_from_python_AnandNoctis_xvn8fr
H2O cluster total nodes:,1
H2O cluster free memory:,1.301 Gb
H2O cluster total cores:,4
H2O cluster allowed cores:,4


Parse progress: |█████████████████████████████████████████████████████████| 100%


Unnamed: 0,calories,protein,fat,vitamins,fibre,price
type,int,int,int,int,int,real
mins,50.0,1.0,0.0,0.0,0.0,33.6
mean,104.29299999999992,3.489999999999999,2.507000000000002,49.50000000000001,7.047000000000003,101.38889999999988
maxs,160.0,6.0,5.0,100.0,14.0,173.3
sigma,32.02940889298512,1.681287223149789,1.738094350710562,29.099876336555173,4.245564251759218,28.934454711631595
zeros,0,0,166,13,56,0
missing,0,0,0,0,0,0
0,160.0,4.0,1.0,52.0,6.0,130.2
1,58.0,6.0,0.0,30.0,7.0,63.6
2,59.0,1.0,3.0,41.0,14.0,77.5


Train record number :  781
Test record number :  110
Validation record number :  109

First Model
gbm Model Build progress: |███████████████████████████████████████████████| 100%
Train MAE :  1.6381136460737749
Valid MAE :  4.708727214766046
Test MAE :  1.6381130091146345

Second Model
gbm Model Build progress: |███████████████████████████████████████████████| 100%
Train MAE :  1.6381136460737749 ---> 0.011898734352805397
Valid MAE :  4.708727214766046 ---> 4.073426408960892
Test MAE :  1.6381130091146345 ---> 0.011900296699354839
Are you sure you want to shutdown the H2O instance running at http://localhost:54321 (Y/N)? y
H2O session _sid_a597 closed.
