In [1]:
import random
import datetime

In [2]:
%matplotlib inline
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score

from sklearn import tree
import joblib

import scipy

In [4]:
plt.rcParams["figure.figsize"] = (15, 10)

## load random forest model

In [5]:
filename_min = './min_random_forest_model.sav'
filename_max = './max_random_forest_model.sav'

In [6]:
rfr_min = joblib.load(filename_min)
rfr_max = joblib.load(filename_max)

## create test data

### load env_temp data

In [7]:
env_temp_file_path = '../data/hobo/env_05_processed.csv'
df_env_temp = pd.read_csv(env_temp_file_path)

In [8]:
df_env_temp

Unnamed: 0,time,env_temp
0,2020-12-09 11:45:00,69.557
1,2020-12-09 11:46:00,69.600
2,2020-12-09 11:47:00,69.685
3,2020-12-09 11:48:00,69.728
4,2020-12-09 11:49:00,69.771
...,...,...
221,2020-12-09 15:26:00,70.803
222,2020-12-09 15:27:00,70.930
223,2020-12-09 15:28:00,71.060
224,2020-12-09 15:29:00,71.060


In [9]:
df_env_temp.time = pd.to_datetime(df_env_temp.time)

In [10]:
time_start = datetime.datetime.strptime('12/09/20 13:20:00', '%m/%d/%y %H:%M:%S')
time_end = datetime.datetime.strptime('12/09/20 15:20:00', '%m/%d/%y %H:%M:%S')

t_list = []

t = time_start
while t <= time_end:
    t_list.append(t)
    t = t + pd.Timedelta('10min')

t_Series = pd.Series(t_list)

In [11]:
df_env_temp_selected = df_env_temp.loc[df_env_temp['time'].isin(t_Series)]

In [12]:
d1 = {}
for idxm, row in df_env_temp_selected.iterrows():
    d1[str(row['time'])] = row['env_temp']

In [13]:
d1

{'2020-12-09 13:20:00': 72.869,
 '2020-12-09 13:30:00': 76.63600000000002,
 '2020-12-09 13:40:00': 77.029,
 '2020-12-09 13:50:00': 74.33800000000002,
 '2020-12-09 14:00:00': 72.91199999999998,
 '2020-12-09 14:10:00': 72.05,
 '2020-12-09 14:20:00': 75.85300000000002,
 '2020-12-09 14:30:00': 77.464,
 '2020-12-09 14:40:00': 76.464,
 '2020-12-09 14:50:00': 73.429,
 '2020-12-09 15:00:00': 71.533,
 '2020-12-09 15:10:00': 70.459,
 '2020-12-09 15:20:00': 69.814}

### load features data

In [14]:
pth_data = '../data/test_refine_02.csv'
df_origin = pd.read_csv(pth_data)

In [15]:
df_origin

Unnamed: 0,time,height,weight,gender,bmi,age,env_temp,rh,heart_rate,stress_level,skin_temp,eda,TC,TS,Clo,Act
0,2020-12-09 13:20:00,1.62,52,1,19.814053,26,72.869,36.837,97.0,58.0,31.065,8.696613,4,5,0.61,1.0
1,2020-12-09 13:30:00,1.62,52,1,19.814053,26,76.636,39.856,119.0,,31.588,14.716620,3,6,0.61,4.0
2,2020-12-09 13:40:00,1.62,52,1,19.814053,26,77.029,40.446,122.0,,31.772,13.199184,3,6,0.61,4.0
3,2020-12-09 13:50:00,1.62,52,1,19.814053,26,74.338,45.924,106.0,,31.641,7.708510,4,3,0.74,1.0
4,2020-12-09 14:00:00,1.62,52,1,19.814053,26,72.912,45.179,96.5,,31.274,5.524036,3,2,0.74,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,2020-12-09 14:40:00,1.77,90,-1,28.727377,28,76.464,15.000,121.5,,31.955,11.877829,2,7,0.74,4.0
74,2020-12-09 14:50:00,1.77,90,-1,28.727377,28,73.429,16.270,104.0,,31.693,13.723230,4,4,0.61,1.0
75,2020-12-09 15:00:00,1.77,90,-1,28.727377,28,71.533,15.000,106.0,86.0,32.474,12.256653,5,4,0.61,1.0
76,2020-12-09 15:10:00,1.77,90,-1,28.727377,28,70.459,25.480,120.0,,31.588,11.623549,5,3,1.00,4.0


In [16]:
df_origin.shape

(78, 16)

In [17]:
# for i in range(df_origin.shape[0]):
#     t = df_origin.iloc[i, :]['time']
#     df_origin['env_temp'][i] = d1[t]

In [18]:
df_origin[df_origin[['env_temp']].isna().env_temp]
# no na in env_temp

Unnamed: 0,time,height,weight,gender,bmi,age,env_temp,rh,heart_rate,stress_level,skin_temp,eda,TC,TS,Clo,Act


In [19]:
df_origin = df_origin.drop(columns=['time'])

In [20]:
imp = SimpleImputer(strategy='mean')
df_imp = pd.DataFrame(imp.fit_transform(df_origin), columns=df_origin.columns)

In [21]:
df_imp

Unnamed: 0,height,weight,gender,bmi,age,env_temp,rh,heart_rate,stress_level,skin_temp,eda,TC,TS,Clo,Act
0,1.62,52.0,1.0,19.814053,26.0,72.869,36.837,97.0,58.000000,31.065,8.696613,4.0,5.0,0.61,1.0
1,1.62,52.0,1.0,19.814053,26.0,76.636,39.856,119.0,54.890244,31.588,14.716620,3.0,6.0,0.61,4.0
2,1.62,52.0,1.0,19.814053,26.0,77.029,40.446,122.0,54.890244,31.772,13.199184,3.0,6.0,0.61,4.0
3,1.62,52.0,1.0,19.814053,26.0,74.338,45.924,106.0,54.890244,31.641,7.708510,4.0,3.0,0.74,1.0
4,1.62,52.0,1.0,19.814053,26.0,72.912,45.179,96.5,54.890244,31.274,5.524036,3.0,2.0,0.74,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,1.77,90.0,-1.0,28.727377,28.0,76.464,15.000,121.5,54.890244,31.955,11.877829,2.0,7.0,0.74,4.0
74,1.77,90.0,-1.0,28.727377,28.0,73.429,16.270,104.0,54.890244,31.693,13.723230,4.0,4.0,0.61,1.0
75,1.77,90.0,-1.0,28.727377,28.0,71.533,15.000,106.0,86.000000,32.474,12.256653,5.0,4.0,0.61,1.0
76,1.77,90.0,-1.0,28.727377,28.0,70.459,25.480,120.0,54.890244,31.588,11.623549,5.0,3.0,1.00,4.0


In [22]:
df_min = df_imp.loc[df_imp['TC'].isin([4]) & df_imp['TS'].isin([2, 3])]
display(df_min.shape)

(15, 15)

In [23]:
df_max = df_imp.loc[df_imp['TC'].isin([4]) & df_imp['TS'].isin([5, 6])]
display(df_max.shape)

(5, 15)

## exmine min

In [24]:
df_min

Unnamed: 0,height,weight,gender,bmi,age,env_temp,rh,heart_rate,stress_level,skin_temp,eda,TC,TS,Clo,Act
3,1.62,52.0,1.0,19.814053,26.0,74.338,45.924,106.0,54.890244,31.641,7.70851,4.0,3.0,0.74,1.0
8,1.62,52.0,1.0,19.814053,26.0,75.764,38.53,91.0,48.0,32.111,4.639537,4.0,3.0,0.61,1.0
11,1.62,52.0,1.0,19.814053,26.0,70.459,35.009,94.0,51.0,31.902,3.313086,4.0,3.0,1.0,1.0
21,1.64,54.0,1.0,20.077335,24.0,76.464,32.449,85.0,17.0,32.63,6.399591,4.0,3.0,0.61,1.0
22,1.64,54.0,1.0,20.077335,24.0,71.661,31.757,98.22,54.0,32.267,6.153355,4.0,3.0,0.74,1.0
23,1.64,54.0,1.0,20.077335,24.0,71.533,29.219,94.0,73.0,31.85,5.418105,4.0,2.0,0.74,1.0
31,1.8,60.0,-1.0,18.518519,24.0,72.05,26.01,91.0,54.890244,32.435,6.234805,4.0,3.0,0.61,1.0
33,1.8,60.0,-1.0,18.518519,24.0,76.764,18.4,99.0,54.890244,33.199,6.666966,4.0,3.0,0.74,1.0
36,1.8,60.0,-1.0,18.518519,24.0,71.533,15.0,87.0,64.0,33.585,4.503452,4.0,2.0,1.0,1.0
42,1.68,62.0,-1.0,21.96712,29.0,74.338,33.703,74.0,6.0,30.933,2.674994,4.0,3.0,0.61,2.6


In [25]:
df_min.columns

Index(['height', 'weight', 'gender', 'bmi', 'age', 'env_temp', 'rh',
       'heart_rate', 'stress_level', 'skin_temp', 'eda', 'TC', 'TS', 'Clo',
       'Act'],
      dtype='object')

In [26]:
y_min = df_min['env_temp']

In [27]:
X_min = df_min[['height', 'weight', 'gender', 'bmi', 'age', 'rh', \
                      'heart_rate', 'stress_level', 'skin_temp', 'eda', \
                      'Clo', 'Act']]

In [28]:
y_pred_min = rfr_min.predict(X_min)

In [29]:
y_pred_min

array([74.03851512, 74.75113226, 74.06258   , 75.81583714, 75.38810429,
       74.86279714, 76.49831143, 75.95614143, 75.34564571, 76.05872286,
       76.03790429, 75.09631   , 73.84937143, 74.97411857, 74.12288286])

In [30]:
pd.DataFrame({'Actual': y_min, 'Predicted': y_pred_min})

Unnamed: 0,Actual,Predicted
3,74.338,74.038515
8,75.764,74.751132
11,70.459,74.06258
21,76.464,75.815837
22,71.661,75.388104
23,71.533,74.862797
31,72.05,76.498311
33,76.764,75.956141
36,71.533,75.345646
42,74.338,76.058723


In [31]:
# mean squared error
mean_squared_error(y_min, y_pred_min)

9.773204191985233

In [32]:
# R2 score
r2_score(y_min, y_pred_min)

-0.9780460462611411

## exmine max

In [33]:
df_max

Unnamed: 0,height,weight,gender,bmi,age,env_temp,rh,heart_rate,stress_level,skin_temp,eda,TC,TS,Clo,Act
0,1.62,52.0,1.0,19.814053,26.0,72.869,36.837,97.0,58.0,31.065,8.696613,4.0,5.0,0.61,1.0
7,1.62,52.0,1.0,19.814053,26.0,78.164,38.612,103.0,62.0,32.63,5.889015,4.0,5.0,0.61,1.0
15,1.64,54.0,1.0,20.077335,24.0,77.029,35.534,115.0,54.890244,31.3,3.810682,4.0,5.0,0.61,2.6
57,1.71,64.0,-1.0,21.887076,27.0,72.05,29.694,85.0,54.890244,32.993,3.396911,4.0,5.0,0.74,2.6
60,1.71,64.0,-1.0,21.887076,27.0,76.464,32.829,81.0,54.890244,32.396,3.558766,4.0,5.0,1.0,1.0


In [34]:
df_max.columns

Index(['height', 'weight', 'gender', 'bmi', 'age', 'env_temp', 'rh',
       'heart_rate', 'stress_level', 'skin_temp', 'eda', 'TC', 'TS', 'Clo',
       'Act'],
      dtype='object')

In [35]:
y_max = df_max['env_temp']

In [36]:
X_max = df_max[['height', 'weight', 'gender', 'bmi', 'age', 'rh', \
                      'heart_rate', 'stress_level', 'skin_temp', 'eda', \
                      'Clo', 'Act']]

In [37]:
y_pred_max = rfr_max.predict(X_max)

In [38]:
y_pred_max

array([79.09552   , 79.35832143, 79.38585286, 81.90947143, 81.79568714])

In [39]:
pd.DataFrame({'Actual': y_max, 'Predicted': y_pred_max})

Unnamed: 0,Actual,Predicted
0,72.869,79.09552
7,78.164,79.358321
15,77.029,79.385853
57,72.05,81.909471
60,76.464,81.795687


In [40]:
# mean squared error
mean_squared_error(y_max, y_pred_max)

34.277355003098535

In [41]:
# R2 score
r2_score(y_max, y_pred_max)

-4.90623531866752