-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
4,310 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#------------------------------------------------------ | ||
# © 2020 Francisco M. Garcia-Moreno | ||
#------------------------------------------------------ | ||
|
||
source("utils/utils.R") | ||
source("utils/predict_fun.R") | ||
source("utils/multiplot.R") | ||
source("utils/sensor_utils.R") | ||
Sys.setlocale("LC_ALL", "en_US.UTF-8") | ||
|
||
#------------------------------------------------------ | ||
# Extract features and generate dataset windowed | ||
#------------------------------------------------------ | ||
|
||
# Load questionnaire dataset (where we have the target label CLASS - frail, pre-frail, non-frail) | ||
questionnaires_frailty<-read.csv("Datasets/questionnaires/preprocessed/all_questionnaires_frailty.csv", row.names = "X") | ||
questionnaires_frailty <- questionnaires_frailty[questionnaires_frailty$Edad>=65,] | ||
subjects_excluded <- rownames(questionnaires_frailty[questionnaires_frailty$Edad<65,]) | ||
|
||
sampling_rates<-c(0.1, 0.01, 0.06, 0.08, 0.05, 0.15, 0.25, 0.5) #desired sampling rates | ||
windows_sizes_in_sec<-c(0.5, 1, 1.5, 2, 2.5, 3) #desired window sizes | ||
|
||
# Generate the wearable dataset | ||
complete_dataset_extracting_features(subjects_excluded, devices=c("smartwatch_empatica"), | ||
split_events = T, # split_events: True if we have a column "event" where we have labelled some known events | ||
windows_sizes_in_sec = c(1, 1.5, 2, 2.5, 3), | ||
sampling_rates = ) | ||
|
||
#------------------------------------------------------ | ||
# Load generated dataset (whitout event column) | ||
#------------------------------------------------------ | ||
dfs <- load_complete_dataset_with_features_extracted(questionnaires_frailty, devices=c( | ||
"smartwatch_empatica" | ||
), | ||
windows_sizes_in_sec = windows_sizes_in_sec, | ||
sampling_rates = sampling_rates, | ||
split_events = split_events, | ||
y_col_name = "fried_frailty_risk", new_cols=c(), | ||
del_cols=c( | ||
built_col_name_of_basic_stats(c("acc", "gyr", "gra", "linacc"), c("corr.xy", "corr.xz", "corr.yz")) | ||
# , built_col_name_of_basic_stats(c("pres", "lig", "noise"), c(statisticsID(), "energy")) | ||
, built_col_name_of_basic_stats(c("pres"), c(statisticsID(), "energy")) | ||
, built_col_name_of_basic_stats(c("lig"), c(statisticsID(), "energy")) | ||
, built_col_name_of_basic_stats(c("noise"), c(statisticsID(), "energy")) | ||
|
||
, built_col_name_of_basic_stats(c("linacc.x", "linacc.y", "linacc.z"), c(statisticsID(), "energy")) | ||
, built_col_name_of_basic_stats(c("gra.x", "gra.y", "gra.z"), c(statisticsID(), "energy")) | ||
# built_col_name_of_basic_stats(SMARTWATCH_TRIAXIAL_SENSORS_COL_NAMES, c(statisticsID(), "energy")), | ||
# , built_col_name_of_basic_stats(c("eda"), c(statisticsID(), "energy")) | ||
# , built_col_name_of_basic_stats(c("temp"), c(statisticsID(), "energy")) | ||
# , built_col_name_of_basic_stats(c("hr"), c(statisticsID(), "energy")) | ||
# , built_col_name_of_basic_stats(c("gyr.x", "gyr.y", "gyr.z"), c(statisticsID(), "energy")) | ||
# | ||
# , built_col_name_of_basic_stats(c("acc.x", "acc.y", "acc.z"), c(statisticsID(), "energy")) | ||
# , built_col_name_of_basic_stats(c("acc.x"), c(statisticsID(), "energy")) | ||
# , built_col_name_of_basic_stats(c("acc.y"), c(statisticsID(), "energy")) | ||
# , built_col_name_of_basic_stats(c("acc.z"), c(statisticsID(), "energy")) | ||
|
||
|
||
|
||
|
||
) | ||
) | ||
|
||
|
||
#------------------------------------------------------ | ||
# Example: Prepare only data for EMPATICA E4 device | ||
# for the different sampling-rates and window-sizes configurations | ||
#------------------------------------------------------ | ||
dfs_smart_emp <- dfs$smartwatch_empatica | ||
|
||
|
||
#Sampling rate = 1/0.1 = 10Hz | ||
df_smart_emp_sr0.1_w0.5 <- dfs_smart_emp$`0.1S`$`0.5s` | ||
df_smart_emp_sr0.1_w1 <- dfs_smart_emp$`0.1S`$`1s` | ||
df_smart_emp_sr0.1_w1.5 <- dfs_smart_emp$`0.1S`$`1.5s` | ||
df_smart_emp_sr0.1_w2 <- dfs_smart_emp$`0.1S`$`2s` | ||
df_smart_emp_sr0.1_w2.5 <- dfs_smart_emp$`0.1S`$`2.5s` | ||
df_smart_emp_sr0.1_w3 <- dfs_smart_emp$`0.1S`$`3s` | ||
|
||
#Sampling rate = 1/0.06 = ~15Hz | ||
df_smart_emp_sr0.06_w0.5 <- dfs_smart_emp$`0.06S`$`0.5s` | ||
df_smart_emp_sr0.06_w1 <- dfs_smart_emp$`0.06S`$`1s` | ||
df_smart_emp_sr0.06_w1.5 <- dfs_smart_emp$`0.06S`$`1.5s` | ||
df_smart_emp_sr0.06_w2 <- dfs_smart_emp$`0.06S`$`2s` | ||
df_smart_emp_sr0.06_w2.5 <- dfs_smart_emp$`0.06S`$`2.5s` | ||
df_smart_emp_sr0.06_w3 <- dfs_smart_emp$`0.06S`$`3s` | ||
|
||
#Sampling rate = 1/0.04 = 25Hz | ||
df_smart_emp_sr0.04_w0.5 <- dfs_smart_emp$`0.04S`$`0.5s` | ||
df_smart_emp_sr0.04_w1 <- dfs_smart_emp$`0.04S`$`1s` | ||
df_smart_emp_sr0.04_w1.5 <- dfs_smart_emp$`0.04S`$`1.5s` | ||
df_smart_emp_sr0.04_w2 <- dfs_smart_emp$`0.04S`$`2s` | ||
df_smart_emp_sr0.04_w2.5 <- dfs_smart_emp$`0.04S`$`2.5s` | ||
df_smart_emp_sr0.04_w3 <- dfs_smart_emp$`0.04S`$`3s` | ||
|
||
#Sampling rate = 1/0.02 = 50Hz | ||
df_smart_emp_sr0.02_w0.5 <- dfs_smart_emp$`0.02S`$`0.5s` | ||
df_smart_emp_sr0.02_w1 <- dfs_smart_emp$`0.02S`$`1s` | ||
df_smart_emp_sr0.02_w1.5 <- dfs_smart_emp$`0.02S`$`1.5s` | ||
df_smart_emp_sr0.02_w2 <- dfs_smart_emp$`0.02S`$`2s` | ||
df_smart_emp_sr0.02_w2.5 <- dfs_smart_emp$`0.02S`$`2.5s` | ||
df_smart_emp_sr0.02_w3 <- dfs_smart_emp$`0.02S`$`3s` | ||
|
||
#Sampling rate = 1/0.01 = 100Hz | ||
df_smart_emp_sr0.01_w0.5 <- dfs_smart_emp$`0.01S`$`0.5s` | ||
df_smart_emp_sr0.01_w1 <- dfs_smart_emp$`0.01S`$`1s` | ||
df_smart_emp_sr0.01_w1.5 <- dfs_smart_emp$`0.01S`$`1.5s` | ||
df_smart_emp_sr0.01_w2 <- dfs_smart_emp$`0.01S`$`2s` | ||
df_smart_emp_sr0.01_w2.5 <- dfs_smart_emp$`0.01S`$`2.5s` | ||
df_smart_emp_sr0.01_w3 <- dfs_smart_emp$`0.01S`$`3s` | ||
|
||
|
||
#------------------------------------------------------ | ||
# For testing the library: | ||
# We can select a single configuration: | ||
# - sampling rate = 0.04 | ||
# - window size = 0.5 | ||
#------------------------------------------------------ | ||
data <- df_smart_emp_sr0.04_w0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#------------------------------------------------------ | ||
# © 2020 Francisco M. Garcia-Moreno | ||
#------------------------------------------------------ | ||
|
||
source("utils/utils.R") | ||
source("utils/predict_fun.R") | ||
source("utils/multiplot.R") | ||
source("utils/sensor_utils.R") | ||
Sys.setlocale("LC_ALL", "en_US.UTF-8") | ||
|
||
#------------------------------------------------------ | ||
# Hyperparameter tuning example | ||
#------------------------------------------------------ | ||
set.seed(33) | ||
if (Sys.getenv("JAVA_HOME")!="") Sys.setenv(JAVA_HOME="") | ||
|
||
|
||
#------------------------------------------------------ | ||
# For testing purposes we will use the data generated | ||
# from /examples/extract-features.R | ||
# We can select a single configuration: | ||
# - sampling rate = 0.04 | ||
# - window size = 0.5 | ||
#------------------------------------------------------ | ||
data <- df_smart_emp_sr0.04_w0.5 | ||
|
||
#we have to pass a validation data split | ||
parts <- createDataPartition(data$y, p = 0.8, list = F) | ||
validation_data <- data[-parts, ] | ||
train_data <- data[parts, ] | ||
|
||
#------------------------------------------------------ | ||
# Examples of hyperparameter tuning: | ||
#------------------------------------------------------ | ||
|
||
|
||
#++++++ | ||
#sr: 0.04; wz: 0.5 | ||
# we test 3 models: Random forest ("rf"), SVM ("svm") and k-nn ("knn") | ||
# evaluation metric = f1-score ("f1"); 10-fold (non-repeated) | ||
#++++++ | ||
f1_1_1 <- search_features_selection(train_data, method="rf", model="rf", minfeatTune=1, metric="f1", search_method="rfe", folds=10, reps=1, verboseOnlyFeatSel=T, verbose=F) | ||
f1_1_2 <- search_features_selection(train_data, method="rf", model="svm", minfeatTune=1, metric="f1", search_method="rfe", folds=10, reps=1, verboseOnlyFeatSel=T, verbose=F) | ||
f1_1_3 <- search_features_selection(train_data, method="rf", model="knn", minfeatTune=1, metric="f1", search_method="rfe", folds=10, reps=1, verboseOnlyFeatSel=T, verbose=F) | ||
|
||
#++++++ | ||
# plotting results | ||
#++++++ | ||
plot_features_vs_acc_list_features(list(f1_1_1$all, f1_1_2$all, f1_1_3$all), metric="f1", tech=c("RF", "SVM", "k-NN"), lineBest=F, interv = 8) | ||
plot_features_vs_acc_list_features(list(f1_1_3$all), metric="f1", tech=c("k-NN"), lineBest=F, interv = 8) | ||
ggsave("../../../Datasets") | ||
f1_1_3$all[f1_1_3$total_features] | ||
f1_1_3$all[25] | ||
length(f1_1_3$all) | ||
model_knn<-model.frame(f1_1_3$features,data) | ||
colnames(model_knn) | ||
|
||
|
||
#------------------------------------------------------ | ||
# Validation | ||
#------------------------------------------------------ | ||
features <- f1_1_3 | ||
|
||
res <- run_cv_prediction(features$features, train_data, validation_data, "knn", features$bestParam) | ||
validation_met <- get_tpr_tnr_auc_in_multiclass(unique(train_data$y), res$prediction, validation_data$y) #get get_metrics(res$prediction, validation_data[,ncol(validation_data)], posClass = 1) #get_tpr_tnr_auc_in_multiclass(unique(train$y), prediction, test_labels, verbose=verbose) | ||
cat(paste("\n >> VALIDATION Accuracy:", validation_met$acc, ". Precision:", validation_met$prec, ". Recall:", validation_met$rec, ". F1-Score:", validation_met$f, ". TPR:", validation_met$tpr, ". TNR:", validation_met$tnr)) | ||
|
||
f1_1_3$all[f1_1_3$total_features] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#------------------------------------------------------ | ||
# © 2020 Francisco M. Garcia-Moreno | ||
#------------------------------------------------------ | ||
|
||
source("utils/utils.R") | ||
source("utils/predict_fun.R") | ||
source("utils/multiplot.R") | ||
source("utils/sensor_utils.R") | ||
Sys.setlocale("LC_ALL", "en_US.UTF-8") | ||
|
||
#------------------------------------------------------ | ||
# Preprocessing example | ||
#------------------------------------------------------ | ||
set.seed(33) | ||
if (Sys.getenv("JAVA_HOME")!="") Sys.setenv(JAVA_HOME="") | ||
|
||
|
||
#------------------------------------------------------ | ||
# For testing purposes we will use the data generated | ||
# from /examples/extract-features.R | ||
# We can select a single configuration: | ||
# - sampling rate = 0.04 | ||
# - window size = 0.5 | ||
#------------------------------------------------------ | ||
data <- df_smart_emp_sr0.04_w0.5 | ||
|
||
#we have to pass a validation data split | ||
parts <- createDataPartition(data$y, p = 0.8, list = F) | ||
validation_data <- data[-parts, ] | ||
train_data <- data[parts, ] | ||
|
||
#------------------------------------------------------ | ||
# Examples of preprocessing pipelines: | ||
#------------------------------------------------------ | ||
|
||
|
||
# -- Feature selection: Random forest ("rf") | ||
# -- Classifier: K-Nearest Neighbors ("knn") | ||
# -- Evaluation metric: F1-Score ("f1") | ||
# Preprocessing methods in this order: | ||
# -- 1) impute missing values with 0 (na=0) | ||
# -- 2) smote for balancing data (up-60; down-300) | ||
prep_models <- preprocess_data_several_methods(train_data, validation_data, | ||
fs_method="rf", model="knn", metric = "f1", | ||
prep_methods=list(na=0, smote=c(60, 300)), | ||
folds=5, reps=1, verbose=T) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#------------------------------------------------------ | ||
# © 2020 Francisco M. Garcia-Moreno | ||
#------------------------------------------------------ | ||
#R Version 4.0.1: https://cloud.r-project.org/bin/macosx/ | ||
#Compiling R on Mac: https://mac.r-project.org/tools/ | ||
|
||
install.packages("prettyR") | ||
install.packages("parallelMap") | ||
install.packages("psych") | ||
install.packages("caret", type="source") | ||
install.packages("e1071") | ||
install.packages("ggplot2") | ||
install.packages("DMwR") | ||
install.packages("ROCR") | ||
install.packages("PRROC") | ||
install.packages("neuralnet") | ||
install.packages("naivebayes") | ||
install.packages("RWeka") | ||
install.packages("FSelector") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#------------------------------------------------------ | ||
# © 2020 Francisco M. Garcia-Moreno | ||
#------------------------------------------------------ | ||
|
||
calculate.accuracy <- function(predictions, ref.labels) { | ||
return(length(which(predictions == ref.labels)) / length(ref.labels)) | ||
} | ||
|
||
calculate.w.accuracy <- function(predictions, ref.labels, weights) { | ||
lvls <- levels(ref.labels) | ||
if (length(weights) != length(lvls)) { | ||
stop("Number of weights should agree with the number of classes.") | ||
} | ||
if (sum(weights) != 1) { | ||
stop("Weights do not sum to 1") | ||
} | ||
accs <- lapply(lvls, function(x) { | ||
idx <- which(ref.labels == x) | ||
return(calculate.accuracy(predictions[idx], ref.labels[idx])) | ||
}) | ||
acc <- mean(unlist(accs)) | ||
return(acc) | ||
} | ||
|
||
calculate_multiclass_accuracy <- function(predictions, ref.labels){ | ||
overall_acc <- calculate.accuracy(predictions, ref.labels) | ||
weights <- rep(1 / length(levels(ref.labels)), length(levels(ref.labels))) | ||
w.acc <- calculate.w.accuracy(predictions, ref.labels, weights) | ||
return(list(overall_acc=overall_acc, weighted_acc=w.acc)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# | ||
# FILENAME: multiplot.R # | ||
# | ||
|
||
# Multiple plot function. | ||
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) | ||
# - cols: Number of columns in layout | ||
# - layout: A matrix specifying the layout. If present, 'cols' is ignored. | ||
# | ||
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), | ||
# then plot 1 will go in the upper left, 2 will go in the upper right, and | ||
# 3 will go all the way across the bottom. | ||
# | ||
# Source: http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/ | ||
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { | ||
library(grid) | ||
|
||
# Make a list from the ... arguments and plotlist | ||
plots <- c(list(...), plotlist) | ||
numPlots = length(plots) | ||
|
||
# If layout is NULL, then use 'cols' to determine layout | ||
if (is.null(layout)) { | ||
# Make the panel | ||
# ncol: Number of columns of plots | ||
# nrow: Number of rows needed, calculated from # of cols | ||
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), | ||
ncol = cols, nrow = ceiling(numPlots/cols)) | ||
} | ||
if (numPlots==1) { | ||
print(plots[[1]]) | ||
} else { | ||
# Set up the page | ||
grid.newpage() | ||
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) | ||
|
||
# Make each plot, in the correct location | ||
for (i in 1:numPlots) { | ||
# Get the i,j matrix positions of the regions that contain this subplot | ||
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) | ||
|
||
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, | ||
layout.pos.col = matchidx$col)) | ||
} | ||
} | ||
} |
Oops, something went wrong.