In [None]:
'''
@brief  Legrest angle recommendataion
@authro Byunghun Hwang <bh.hwang@iae.re.kr>
'''

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

CONFIGURATION_FILE_PATH = "./data/train/data_config.csv"
DATASET_PATH = "./data/train/"

# load data configuration file
data_config = pd.read_csv(CONFIGURATION_FILE_PATH, header=0, index_col=0)

fsr_dataframe = {}
seat_dataframe = {}

for idx in data_config.index:
    fsr_filepath = DATASET_PATH+data_config.loc[idx, "fsr_matrix_1d_datafile"] # set FSR matrix data filepath
    seat_filepath = DATASET_PATH+data_config.loc[idx, "seat_datafile"] # set Seat data filepath
    print(idx, ") read data files : ", fsr_filepath, ",", seat_filepath)

    fsr_dataframe[idx] = pd.read_csv(fsr_filepath, header=0, index_col=False).iloc[:,0:162] # read FSR matrix data file
    seat_dataframe[idx] = pd.read_csv(seat_filepath, header=0, index_col=False) # read Seat data file

    # clear unnecessary columns
    del seat_dataframe[idx]['Measurement time'] # remove unnecessary column
    del fsr_dataframe[idx]['Measurement Time (sec)'] # remove unnecessary column

fsr_dataframe_standard_segment = {}
fsr_dataframe_relax_segment = {}
seat_loadcell_dataframe_standard_segment = {}
seat_loadcell_dataframe_relax_segment = {}

for idx in data_config.index:
    mtime = data_config.loc[idx, ['standard_s_mtime', "standard_e_mtime", "relax_s_mtime", "relax_e_mtime"]]

    # seat loadcell segmentation
    seat_loadcell_dataframe_standard_segment[idx] = seat_dataframe[idx][(seat_dataframe[idx]['mtime']>=mtime.standard_s_mtime) & (seat_dataframe[idx]['mtime']<=mtime.standard_e_mtime)]
    seat_loadcell_dataframe_relax_segment[idx] = seat_dataframe[idx][(seat_dataframe[idx]['mtime']>=mtime.relax_s_mtime) & (seat_dataframe[idx]['mtime']<=mtime.relax_e_mtime)]

    # fsr matrix segmentation
    fsr_dataframe_standard_segment[idx] = fsr_dataframe[idx][(fsr_dataframe[idx]['mtime']>=mtime.standard_s_mtime) & (fsr_dataframe[idx]['mtime']<=mtime.standard_e_mtime)]
    fsr_dataframe_relax_segment[idx] = fsr_dataframe[idx][(fsr_dataframe[idx]['mtime']>=mtime.relax_s_mtime) & (fsr_dataframe[idx]['mtime']<=mtime.relax_e_mtime)]

    print("FSR Segments@Standard size : ", len(fsr_dataframe_standard_segment[idx]), ", FSR Segments@Relax size : ", len(fsr_dataframe_relax_segment[idx]))
    print("Seat Segments@Standard size : ", len(seat_loadcell_dataframe_standard_segment[idx]), ", Seat Segments@Relax size : ", len(seat_loadcell_dataframe_relax_segment[idx]))

In [None]:
'''
기초 통계량 분석
'''

# 참가자 나이
source = data_config.loc[:, ['user_age']]
plt.figure()
plt.title('Age')
plt.xlabel('age')
plt.ylabel('count')
plt.hist(source)
plt.grid()
source.plot.kde()
plt.show()

In [None]:
'''
Box-and-Whisker Plot for analysis of correlation between age and bestfit
'''
import scipy.stats as stats
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm


# age bestfit standard

source = data_config.loc[:, ['user_age', 'bestfit_angle_standard']]
age_20s = source.loc[(source['user_age']<30)&(source['user_age']>=20),'bestfit_angle_standard']
age_30s = source.loc[(source['user_age']<40)&(source['user_age']>=30),'bestfit_angle_standard']
age_40s = source.loc[(source['user_age']<50)&(source['user_age']>=40),'bestfit_angle_standard']

plt.figure()
plt.title("Age-Bestfit (@standard mode)")
plt.boxplot([age_20s, age_30s, age_40s])
plt.show()

F_statistic, pVal = stats.f_oneway(age_20s, age_30s, age_40s) #Atman910
print('Altman 910 oneway ANOVA : F={0:.1f}, p={1:.5f}'.format(F_statistic, pVal))

# height bestfit standard
source = data_config.loc[:, ['user_height', 'bestfit_angle_standard']]
height_160 = source.loc[(source['user_height']<170)&(source['user_height']>=160),'bestfit_angle_standard']
height_170 = source.loc[(source['user_height']<180)&(source['user_height']>=170),'bestfit_angle_standard']
height_180 = source.loc[(source['user_height']<190)&(source['user_height']>=180),'bestfit_angle_standard']

plt.figure()
plt.title("Height-Bestfit (@standard mode)")
plt.boxplot([height_160, height_170, height_180])
plt.show()

F_statistic, pVal = stats.f_oneway(height_160, height_170, height_180) #Atman910
print('Altman 910 oneway ANOVA : F={0:.1f}, p={1:.5f}'.format(F_statistic, pVal))


# weight bestfit standard
source = data_config.loc[:, ['user_weight', 'bestfit_angle_standard']]
weight_50 = source.loc[(source['user_weight']<60)&(source['user_weight']>=50),'bestfit_angle_standard']
weight_60 = source.loc[(source['user_weight']<70)&(source['user_weight']>=60),'bestfit_angle_standard']
weight_70 = source.loc[(source['user_weight']<80)&(source['user_weight']>=70),'bestfit_angle_standard']
weight_80 = source.loc[(source['user_weight']<90)&(source['user_weight']>=80),'bestfit_angle_standard']

plt.figure()
plt.title("Weight-Bestfit (@standard mode)")
plt.boxplot([weight_50, weight_60, weight_70, weight_80])
plt.show()

F_statistic, pVal = stats.f_oneway(weight_50, weight_60, weight_70, weight_80) #Atman910
print('Altman 910 oneway ANOVA : F={0:.1f}, p={1:.5f}'.format(F_statistic, pVal))


# gender bestfit standard
source = data_config.loc[:, ['user_gender', 'bestfit_angle_standard']]
female = source.loc[(source['user_gender']==0),'bestfit_angle_standard']
male = source.loc[(source['user_gender']==1),'bestfit_angle_standard']

plt.figure()
plt.title("Gender-Bestfit (@standard mode)")
plt.boxplot([male, female])
plt.show()

F_statistic, pVal = stats.f_oneway(male, female) #Atman910
print('Altman 910 oneway ANOVA : F={0:.1f}, p={1:.5f}'.format(F_statistic, pVal))


# BMI bestfit standard
source = data_config.loc[:, ['user_height','user_weight', 'bestfit_angle_standard']]
bmi = source['user_weight']/(source['user_height']/100*source['user_height']/100)
bmi_source = pd.DataFrame(source)
bmi_source['bmi'] = bmi

group1 = bmi_source.loc[bmi_source['bmi']<18.5,'bestfit_angle_standard']
group2 = bmi_source.loc[(bmi_source['bmi']<22.9)&(bmi_source['bmi']>=18.5),'bestfit_angle_standard']
group3 = bmi_source.loc[(bmi_source['bmi']<24.9)&(bmi_source['bmi']>=22.9),'bestfit_angle_standard']
group4 = bmi_source.loc[(bmi_source['bmi']<29.9)&(bmi_source['bmi']>=24.9),'bestfit_angle_standard']
group5 = bmi_source.loc[(bmi_source['bmi']<34.9)&(bmi_source['bmi']>=29.9),'bestfit_angle_standard']

plt.figure()
plt.title("BMI-Bestfit (@standard mode)")
plt.boxplot([group1, group2, group3, group4, group5])
plt.show()

F_statistic, pVal = stats.f_oneway(group1, group2, group3, group4, group5) #Atman910
print('Altman 910 oneway ANOVA : F={0:.1f}, p={1:.5f}'.format(F_statistic, pVal))


# RMR bestfit standard
source = data_config.loc[:, ['user_height','user_weight', 'user_age', 'user_gender', 'bestfit_angle_standard']]
rmr = 66.47+(13.75*source['user_weight'])+(5*source['user_height'])-(6.76*source['user_age'])
rmr_source = pd.DataFrame(source)
rmr_source['rmr'] = rmr

# group1 = rmr_source.loc[rmr_source['rmr']<1200,'bestfit_angle_standard']
group2 = rmr_source.loc[(rmr_source['rmr']<1300)&(rmr_source['rmr']>=1200),'bestfit_angle_standard']
group3 = rmr_source.loc[(rmr_source['rmr']<1400)&(rmr_source['rmr']>=1300),'bestfit_angle_standard']
group4 = rmr_source.loc[(rmr_source['rmr']<1500)&(rmr_source['rmr']>=1400),'bestfit_angle_standard']
group5 = rmr_source.loc[(rmr_source['rmr']<1600)&(rmr_source['rmr']>=1500),'bestfit_angle_standard']
group6 = rmr_source.loc[(rmr_source['rmr']<1700)&(rmr_source['rmr']>=1600),'bestfit_angle_standard']

plt.figure()
plt.title("RMR-Bestfit (@standard mode)")
plt.boxplot([group2, group3, group4, group5, group6])
plt.show()

F_statistic, pVal = stats.f_oneway(group2, group3, group4, group5, group6) #Atman910
print('Altman 910 oneway ANOVA : F={0:.1f}, p={1:.5f}'.format(F_statistic, pVal))


In [None]:
'''
using statmodels
'''
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

source = data_config.loc[:, ['user_height', 'user_weight', 'user_age', 'bestfit_angle_standard']]
source.boxplot(column = 'bestfit_angle_standard', by='user_weight' , grid=False)

formula = 'bestfit_angle_standard ~ user_height + user_weight + C(user_age)'
lm = ols(formula, data=source).fit()
print(anova_lm(lm))
print(lm.summary())

# results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()
# print(results.summary())

In [None]:
'''
n-way anova
'''


In [None]:
'''
correlation analysis
'''



In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X, y)
print(regr.predict([[0, 0, 0, 0]]))