Skip to content

Commit

Permalink
upload our package
Browse files Browse the repository at this point in the history
  • Loading branch information
frangam committed Mar 16, 2023
1 parent 44001ef commit 1ea090d
Show file tree
Hide file tree
Showing 9 changed files with 4,310 additions and 0 deletions.
120 changes: 120 additions & 0 deletions examples/extract-features.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#------------------------------------------------------
# © 2020 Francisco M. Garcia-Moreno
#------------------------------------------------------

source("utils/utils.R")
source("utils/predict_fun.R")
source("utils/multiplot.R")
source("utils/sensor_utils.R")
Sys.setlocale("LC_ALL", "en_US.UTF-8")

#------------------------------------------------------
# Extract features and generate dataset windowed
#------------------------------------------------------

# Load questionnaire dataset (where we have the target label CLASS - frail, pre-frail, non-frail)
questionnaires_frailty<-read.csv("Datasets/questionnaires/preprocessed/all_questionnaires_frailty.csv", row.names = "X")
questionnaires_frailty <- questionnaires_frailty[questionnaires_frailty$Edad>=65,]
subjects_excluded <- rownames(questionnaires_frailty[questionnaires_frailty$Edad<65,])

sampling_rates<-c(0.1, 0.01, 0.06, 0.08, 0.05, 0.15, 0.25, 0.5) #desired sampling rates
windows_sizes_in_sec<-c(0.5, 1, 1.5, 2, 2.5, 3) #desired window sizes

# Generate the wearable dataset
complete_dataset_extracting_features(subjects_excluded, devices=c("smartwatch_empatica"),
split_events = T, # split_events: True if we have a column "event" where we have labelled some known events
windows_sizes_in_sec = c(1, 1.5, 2, 2.5, 3),
sampling_rates = )

#------------------------------------------------------
# Load generated dataset (whitout event column)
#------------------------------------------------------
dfs <- load_complete_dataset_with_features_extracted(questionnaires_frailty, devices=c(
"smartwatch_empatica"
),
windows_sizes_in_sec = windows_sizes_in_sec,
sampling_rates = sampling_rates,
split_events = split_events,
y_col_name = "fried_frailty_risk", new_cols=c(),
del_cols=c(
built_col_name_of_basic_stats(c("acc", "gyr", "gra", "linacc"), c("corr.xy", "corr.xz", "corr.yz"))
# , built_col_name_of_basic_stats(c("pres", "lig", "noise"), c(statisticsID(), "energy"))
, built_col_name_of_basic_stats(c("pres"), c(statisticsID(), "energy"))
, built_col_name_of_basic_stats(c("lig"), c(statisticsID(), "energy"))
, built_col_name_of_basic_stats(c("noise"), c(statisticsID(), "energy"))

, built_col_name_of_basic_stats(c("linacc.x", "linacc.y", "linacc.z"), c(statisticsID(), "energy"))
, built_col_name_of_basic_stats(c("gra.x", "gra.y", "gra.z"), c(statisticsID(), "energy"))
# built_col_name_of_basic_stats(SMARTWATCH_TRIAXIAL_SENSORS_COL_NAMES, c(statisticsID(), "energy")),
# , built_col_name_of_basic_stats(c("eda"), c(statisticsID(), "energy"))
# , built_col_name_of_basic_stats(c("temp"), c(statisticsID(), "energy"))
# , built_col_name_of_basic_stats(c("hr"), c(statisticsID(), "energy"))
# , built_col_name_of_basic_stats(c("gyr.x", "gyr.y", "gyr.z"), c(statisticsID(), "energy"))
#
# , built_col_name_of_basic_stats(c("acc.x", "acc.y", "acc.z"), c(statisticsID(), "energy"))
# , built_col_name_of_basic_stats(c("acc.x"), c(statisticsID(), "energy"))
# , built_col_name_of_basic_stats(c("acc.y"), c(statisticsID(), "energy"))
# , built_col_name_of_basic_stats(c("acc.z"), c(statisticsID(), "energy"))




)
)


#------------------------------------------------------
# Example: Prepare only data for EMPATICA E4 device
# for the different sampling-rates and window-sizes configurations
#------------------------------------------------------
dfs_smart_emp <- dfs$smartwatch_empatica


#Sampling rate = 1/0.1 = 10Hz
df_smart_emp_sr0.1_w0.5 <- dfs_smart_emp$`0.1S`$`0.5s`
df_smart_emp_sr0.1_w1 <- dfs_smart_emp$`0.1S`$`1s`
df_smart_emp_sr0.1_w1.5 <- dfs_smart_emp$`0.1S`$`1.5s`
df_smart_emp_sr0.1_w2 <- dfs_smart_emp$`0.1S`$`2s`
df_smart_emp_sr0.1_w2.5 <- dfs_smart_emp$`0.1S`$`2.5s`
df_smart_emp_sr0.1_w3 <- dfs_smart_emp$`0.1S`$`3s`

#Sampling rate = 1/0.06 = ~15Hz
df_smart_emp_sr0.06_w0.5 <- dfs_smart_emp$`0.06S`$`0.5s`
df_smart_emp_sr0.06_w1 <- dfs_smart_emp$`0.06S`$`1s`
df_smart_emp_sr0.06_w1.5 <- dfs_smart_emp$`0.06S`$`1.5s`
df_smart_emp_sr0.06_w2 <- dfs_smart_emp$`0.06S`$`2s`
df_smart_emp_sr0.06_w2.5 <- dfs_smart_emp$`0.06S`$`2.5s`
df_smart_emp_sr0.06_w3 <- dfs_smart_emp$`0.06S`$`3s`

#Sampling rate = 1/0.04 = 25Hz
df_smart_emp_sr0.04_w0.5 <- dfs_smart_emp$`0.04S`$`0.5s`
df_smart_emp_sr0.04_w1 <- dfs_smart_emp$`0.04S`$`1s`
df_smart_emp_sr0.04_w1.5 <- dfs_smart_emp$`0.04S`$`1.5s`
df_smart_emp_sr0.04_w2 <- dfs_smart_emp$`0.04S`$`2s`
df_smart_emp_sr0.04_w2.5 <- dfs_smart_emp$`0.04S`$`2.5s`
df_smart_emp_sr0.04_w3 <- dfs_smart_emp$`0.04S`$`3s`

#Sampling rate = 1/0.02 = 50Hz
df_smart_emp_sr0.02_w0.5 <- dfs_smart_emp$`0.02S`$`0.5s`
df_smart_emp_sr0.02_w1 <- dfs_smart_emp$`0.02S`$`1s`
df_smart_emp_sr0.02_w1.5 <- dfs_smart_emp$`0.02S`$`1.5s`
df_smart_emp_sr0.02_w2 <- dfs_smart_emp$`0.02S`$`2s`
df_smart_emp_sr0.02_w2.5 <- dfs_smart_emp$`0.02S`$`2.5s`
df_smart_emp_sr0.02_w3 <- dfs_smart_emp$`0.02S`$`3s`

#Sampling rate = 1/0.01 = 100Hz
df_smart_emp_sr0.01_w0.5 <- dfs_smart_emp$`0.01S`$`0.5s`
df_smart_emp_sr0.01_w1 <- dfs_smart_emp$`0.01S`$`1s`
df_smart_emp_sr0.01_w1.5 <- dfs_smart_emp$`0.01S`$`1.5s`
df_smart_emp_sr0.01_w2 <- dfs_smart_emp$`0.01S`$`2s`
df_smart_emp_sr0.01_w2.5 <- dfs_smart_emp$`0.01S`$`2.5s`
df_smart_emp_sr0.01_w3 <- dfs_smart_emp$`0.01S`$`3s`


#------------------------------------------------------
# For testing the library:
# We can select a single configuration:
# - sampling rate = 0.04
# - window size = 0.5
#------------------------------------------------------
data <- df_smart_emp_sr0.04_w0.5
69 changes: 69 additions & 0 deletions examples/hypeparemter-tuning.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#------------------------------------------------------
# © 2020 Francisco M. Garcia-Moreno
#------------------------------------------------------

source("utils/utils.R")
source("utils/predict_fun.R")
source("utils/multiplot.R")
source("utils/sensor_utils.R")
Sys.setlocale("LC_ALL", "en_US.UTF-8")

#------------------------------------------------------
# Hyperparameter tuning example
#------------------------------------------------------
set.seed(33)
if (Sys.getenv("JAVA_HOME")!="") Sys.setenv(JAVA_HOME="")


#------------------------------------------------------
# For testing purposes we will use the data generated
# from /examples/extract-features.R
# We can select a single configuration:
# - sampling rate = 0.04
# - window size = 0.5
#------------------------------------------------------
data <- df_smart_emp_sr0.04_w0.5

#we have to pass a validation data split
parts <- createDataPartition(data$y, p = 0.8, list = F)
validation_data <- data[-parts, ]
train_data <- data[parts, ]

#------------------------------------------------------
# Examples of hyperparameter tuning:
#------------------------------------------------------


#++++++
#sr: 0.04; wz: 0.5
# we test 3 models: Random forest ("rf"), SVM ("svm") and k-nn ("knn")
# evaluation metric = f1-score ("f1"); 10-fold (non-repeated)
#++++++
f1_1_1 <- search_features_selection(train_data, method="rf", model="rf", minfeatTune=1, metric="f1", search_method="rfe", folds=10, reps=1, verboseOnlyFeatSel=T, verbose=F)
f1_1_2 <- search_features_selection(train_data, method="rf", model="svm", minfeatTune=1, metric="f1", search_method="rfe", folds=10, reps=1, verboseOnlyFeatSel=T, verbose=F)
f1_1_3 <- search_features_selection(train_data, method="rf", model="knn", minfeatTune=1, metric="f1", search_method="rfe", folds=10, reps=1, verboseOnlyFeatSel=T, verbose=F)

#++++++
# plotting results
#++++++
plot_features_vs_acc_list_features(list(f1_1_1$all, f1_1_2$all, f1_1_3$all), metric="f1", tech=c("RF", "SVM", "k-NN"), lineBest=F, interv = 8)
plot_features_vs_acc_list_features(list(f1_1_3$all), metric="f1", tech=c("k-NN"), lineBest=F, interv = 8)
ggsave("../../../Datasets")
f1_1_3$all[f1_1_3$total_features]
f1_1_3$all[25]
length(f1_1_3$all)
model_knn<-model.frame(f1_1_3$features,data)
colnames(model_knn)


#------------------------------------------------------
# Validation
#------------------------------------------------------
features <- f1_1_3

res <- run_cv_prediction(features$features, train_data, validation_data, "knn", features$bestParam)
validation_met <- get_tpr_tnr_auc_in_multiclass(unique(train_data$y), res$prediction, validation_data$y) #get get_metrics(res$prediction, validation_data[,ncol(validation_data)], posClass = 1) #get_tpr_tnr_auc_in_multiclass(unique(train$y), prediction, test_labels, verbose=verbose)
cat(paste("\n >> VALIDATION Accuracy:", validation_met$acc, ". Precision:", validation_met$prec, ". Recall:", validation_met$rec, ". F1-Score:", validation_met$f, ". TPR:", validation_met$tpr, ". TNR:", validation_met$tnr))

f1_1_3$all[f1_1_3$total_features]

46 changes: 46 additions & 0 deletions examples/preprocess-data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#------------------------------------------------------
# © 2020 Francisco M. Garcia-Moreno
#------------------------------------------------------

source("utils/utils.R")
source("utils/predict_fun.R")
source("utils/multiplot.R")
source("utils/sensor_utils.R")
Sys.setlocale("LC_ALL", "en_US.UTF-8")

#------------------------------------------------------
# Preprocessing example
#------------------------------------------------------
set.seed(33)
if (Sys.getenv("JAVA_HOME")!="") Sys.setenv(JAVA_HOME="")


#------------------------------------------------------
# For testing purposes we will use the data generated
# from /examples/extract-features.R
# We can select a single configuration:
# - sampling rate = 0.04
# - window size = 0.5
#------------------------------------------------------
data <- df_smart_emp_sr0.04_w0.5

#we have to pass a validation data split
parts <- createDataPartition(data$y, p = 0.8, list = F)
validation_data <- data[-parts, ]
train_data <- data[parts, ]

#------------------------------------------------------
# Examples of preprocessing pipelines:
#------------------------------------------------------


# -- Feature selection: Random forest ("rf")
# -- Classifier: K-Nearest Neighbors ("knn")
# -- Evaluation metric: F1-Score ("f1")
# Preprocessing methods in this order:
# -- 1) impute missing values with 0 (na=0)
# -- 2) smote for balancing data (up-60; down-300)
prep_models <- preprocess_data_several_methods(train_data, validation_data,
fs_method="rf", model="knn", metric = "f1",
prep_methods=list(na=0, smote=c(60, 300)),
folds=5, reps=1, verbose=T)
19 changes: 19 additions & 0 deletions utils/install_packages.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#------------------------------------------------------
# © 2020 Francisco M. Garcia-Moreno
#------------------------------------------------------
#R Version 4.0.1: https://cloud.r-project.org/bin/macosx/
#Compiling R on Mac: https://mac.r-project.org/tools/

install.packages("prettyR")
install.packages("parallelMap")
install.packages("psych")
install.packages("caret", type="source")
install.packages("e1071")
install.packages("ggplot2")
install.packages("DMwR")
install.packages("ROCR")
install.packages("PRROC")
install.packages("neuralnet")
install.packages("naivebayes")
install.packages("RWeka")
install.packages("FSelector")
30 changes: 30 additions & 0 deletions utils/multiclass_utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#------------------------------------------------------
# © 2020 Francisco M. Garcia-Moreno
#------------------------------------------------------

calculate.accuracy <- function(predictions, ref.labels) {
return(length(which(predictions == ref.labels)) / length(ref.labels))
}

calculate.w.accuracy <- function(predictions, ref.labels, weights) {
lvls <- levels(ref.labels)
if (length(weights) != length(lvls)) {
stop("Number of weights should agree with the number of classes.")
}
if (sum(weights) != 1) {
stop("Weights do not sum to 1")
}
accs <- lapply(lvls, function(x) {
idx <- which(ref.labels == x)
return(calculate.accuracy(predictions[idx], ref.labels[idx]))
})
acc <- mean(unlist(accs))
return(acc)
}

calculate_multiclass_accuracy <- function(predictions, ref.labels){
overall_acc <- calculate.accuracy(predictions, ref.labels)
weights <- rep(1 / length(levels(ref.labels)), length(levels(ref.labels)))
w.acc <- calculate.w.accuracy(predictions, ref.labels, weights)
return(list(overall_acc=overall_acc, weighted_acc=w.acc))
}
46 changes: 46 additions & 0 deletions utils/multiplot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#
# FILENAME: multiplot.R #
#

# Multiple plot function.
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
# Source: http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)

# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)

# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Loading

0 comments on commit 1ea090d

Please sign in to comment.