In [1]:
'''
@brief  Leg-Rest Pos Recommendataion with DecisionTree Regressor
@author Byunghun Hwang <bh.hwang@iae.re.kr>
@date   2021. 05. 21
'''

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
import progressbar



'''
Presets & Hyper-parameters
'''
CONFIGURATION_FILE_PATH = "./data/train/data_config.csv"
DATASET_PATH = "./data/train/"
pd.set_option('display.width', 200) # for display width
# FEATURE_LENGTH = 30 # n-dimensional data feature only use
# NUMBER_OF_SAMPLES = 299 # number of augmented data
# FEATURE_MAX_LENGTH = 115 # Maximum feature length
# NUMBER_OF_RANDOM_SELECTION = 5
# MAX_TRAIN_ITERATION = -1 # infinity



'''
1. Load configuration file
'''
data_config = pd.read_csv(CONFIGURATION_FILE_PATH, header=0, index_col=0)


'''
2. data extraction
'''
X = data_config.loc[:, ['user_height', 'user_weight', 'user_age']]
bmr = 66.47+(13.75*X['user_weight'])+(5*X['user_height'])-(6.76*X['user_age'])
bmi = X['user_weight']/(X['user_height']/100*X['user_height']/100)
X["bmr"] = bmr
X["bmi"] = bmi
ys = data_config.loc[:, ['bestfit_angle_standard']]
yr = data_config.loc[:, ['bestfit_angle_relax']]


'''
DecisionTree Regression Model
'''
print("------ Regression Model Evaluation (@standard) ------")
X_train, X_test, y_train, y_test = train_test_split(X, np.ravel(ys), test_size=0.33, shuffle=True)
model_standard = DecisionTreeRegressor(
    criterion = "mse",
    max_depth=6, 
    min_samples_leaf=1, 
    random_state=1).fit(X_train, y_train)

print("* R2 Score with Trainset (@standard) :", model_standard.score(X_train, y_train))
print("* R2 Score with Testset (@standard) :", model_standard.score(X_test, y_test))
print("* Feature Impotances (@standard) :")
for name, value in zip(X_train.columns, model_standard.feature_importances_):
    print('  - {0}: {1:.3f}'.format(name, value))


print("------ Regression Model Evaluation (@relax) ------")
X_train, X_test, y_train, y_test = train_test_split(X, np.ravel(yr), test_size=0.33, shuffle=True)
model_relax = DecisionTreeRegressor(
    criterion = "mse", # mean square error
    max_depth=6, 
    min_samples_leaf=1, 
    random_state=1).fit(X_train, y_train)

print("* R-squared Score with Trainset (@relax) :", model_relax.score(X_train, y_train))
print("* R-squared Score with Testset (@relax) :", model_relax.score(X_test, y_test))
print("* Feature Impotances (@standard) :")
for name, value in zip(X_train.columns, model_relax.feature_importances_):
    print('  - {0}: {1:.3f}'.format(name, value))



------ Regression Model Evaluation (@standard) ------
* R2 Score with Trainset (@standard) : 0.9730498592428459
* R2 Score with Testset (@standard) : 0.04578668674475739
* Feature Impotances (@standard) :
  - user_height: 0.103
  - user_weight: 0.170
  - user_age: 0.338
  - bmr: 0.047
  - bmi: 0.342
------ Regression Model Evaluation (@relax) ------
* R-squared Score with Trainset (@relax) : 0.967586682134766
* R-squared Score with Testset (@relax) : -1.0422222247042616
* Feature Impotances (@standard) :
  - user_height: 0.251
  - user_weight: 0.304
  - user_age: 0.211
  - bmr: 0.128
  - bmi: 0.106


In [2]:
'''
Output File Generation
'''
min_age = 10
max_age = 80
ages = np.array([min_age+i for i in range(max_age-min_age+1)])

min_height = 150
max_height = 200
heights = np.array([min_height+i for i in range(max_height-min_height+1)])

min_weight = 40
max_weight = 100
weights = np.array([min_weight+i for i in range(max_weight-min_weight+1)])


In [14]:
df = pd.DataFrame(data=[ages])
print(df)

   0   1   2   3   4   5   6   7   8   9   ...  61  62  63  64  65  66  67  68  69  70
0  10  11  12  13  14  15  16  17  18  19  ...  71  72  73  74  75  76  77  78  79  80

[1 rows x 71 columns]


In [15]:
a = 40
w = 60
h = 180
bmr = 66.47+(13.75*w)+(5*h)-(6.76*a)
bmi = w/(h/100*h/100)
pvs = model_standard.predict([[a,h,w,bmr,bmi]])
print(pvs[0])

810.0


In [None]:
bar = progressbar.ProgressBar(maxval=len(ages)*len(heights)*len(weights), widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
bar.start()
output_standard = pd.DataFrame(columns=['age','height','weight','legrest'])
output_relax = pd.DataFrame(columns=['age','height','weight','legrest'])
count = 0
for a in ages:
    for h in heights:
        for w in weights:
            bmr = 66.47+(13.75*w)+(5*h)-(6.76*a)
            bmi = w/(h/100*h/100)
            pvs = model_standard.predict([[a,h,w,bmr,bmi]])
            pvr = model_relax.predict([[a,h,w,bmr,bmi]])
            output_standard = output_standard.append({'age':a, 'height':h, 'weight':w, 'legrest':pvs}, ignore_index=True)
            output_relax = output_relax.append({'age':a, 'height':h, 'weight':w, 'legrest':pvr}, ignore_index=True)
            count = count+1
            bar.update(count)
bar.finish()

output_standard.to_csv('result_standard.csv')
output_relax.to_csv('result_relax.csv')
print("saved results")