Skip to content

Commit

Permalink
flowchart
Browse files Browse the repository at this point in the history
  • Loading branch information
franzbischoff committed Jun 6, 2023
1 parent 432425c commit e97479a
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 326 deletions.
31 changes: 31 additions & 0 deletions R/contrast_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,37 @@ register_contrast_model <- function() {
# )
# )

# contrasts
# coverages -> creates cov_sum and cov_idxs
# platos
# thresholds
# cov_counts # never used
# num_segments

# c_total
# c_median
# c_mean
# c_sd
# cov_con_mean
# k_mean
# cov_mean
# coverage
# cov_percent
# redundancy
# samples

# data {
# window
# k***
# plato
# contrast***
# threshold
# cov_sum (same as cov_counts) ***
# cov_idxs***
# cov_con***
# }


parsnip::set_fit(
model = "contrast_model",
eng = "contrast_profile",
Expand Down
381 changes: 128 additions & 253 deletions _contrast_profile/meta/meta

Large diffs are not rendered by default.

101 changes: 50 additions & 51 deletions flowcharts/contrast_profile.mmd
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ title: Contrast Profile Classifier
"theme": "dark",
"fontFamily": "Fira Code Medium, Trebuchet MS, Verdana, Arial, Sans-Serif",
"flowchart": {
"diagramPadding": 10
"rankSpacing": 70,
"nodeSpacing": 70
}
}
}%%
Expand All @@ -23,41 +24,32 @@ classDiagram
%% -- Link (Solid) (Association without arrows)
%% .. Link (Dashed) (Association without arrows and not navigable)

Data "many * classes" <.. "1 * window_sizes" Contrast

Data "*" <.. "1" Contrast
Data "*" o-- "1" ShapeletMeta
Contrast "1" <.. "1" Shapelet
Shapelet "1" <.. "1" ShapeletMeta
Data <.. ShapeletMeta
%% Shapelet "1..window_sizes" *-- "1" PanContrast_TopK
%% Contrast "1..window_sizes" *-- "1" PanContrast_TopK

%% class PanContrast_TopK {
%% Contrast contrasts
%% Shapelet shapelets
%% }


class Data {
List~Factor~ classes
List~Numeric~ ts
List~float[]~ ts
List~int~ ids
}


%% class is the positive class
class Contrast {
List~int~ window_sizes*
Factor class*
List~Numeric~ contrast_profiles
List~int~ window_sizes*
List~float[]~ contrast_profiles
}

class Shapelet {
List~int~ window_sizes*
Factor class*
List~Numeric~ platos
List~int~ platos_indices
List~Numeric~ platos_twin
List~int~ platos_twin_indices
List~float~ plato_nary_contrasts
int num_platos*
List~int~ window_sizes*
List~int[num_platos]~ platos_indices
List~int[num_platos]~ platos_twin_indices
List~float[num_platos]~ plato_nary_contrasts
}

%% all Lists have dim m,n where m == num_of_shapelets(k) and n == length(window_sizes)
Expand All @@ -66,42 +58,49 @@ classDiagram
%% TODO: this need to be reshaped
%% TODO: num_segments reflect the number of positive samples
class ShapeletMeta {
List~int~ window_sizes*
Factor class*
List~Numeric~ thresholds
List~Numeric~ overall_contrasts
List~bool~ coverages
List~int~ coverages_counts
int num_segments
int num_segments*
int num_platos*
List~int~ window_sizes*
List~float[num_platos]~ joint_platos
List~float[num_platos]~ thresholds
List~float[num_platos]~ contrasts
List~bool[num_platos]~ coverages
}

ShapeletMeta *-- Fitted
Fitted *-- Model
Terms *-- Model
ShapeletMeta <-- Terms : optimizes
ShapeletMeta "n" <|.. "1" Candidate : Optimize
Data "*" o-- "1" Score
Score "1" <|.. "1" Candidate : Optimize
Candidate <|-- Fitted : select

class Fitted {
Factor class*
ShapeletMeta best_shapelets
List~Numeric~ platos
List~Numeric~ thresholds
class Score {
float accuracy
float f1
float precision
float recall
}

class Terms {
float contrast_total
float contrast_median
float contrast_mean
fload contrast_std
fload cov_con_ratio_mean
float k_mean
float cov_mean
fload coverage
fload cov_percent
int redundancy
int num_shapelets
class Fitted {
Factor class*
int num_platos*
Candidate best_score_canditate*
- List~float[num_platos]~ joint_platos
- List~float[num_platos]~ thresholds
}

class Model {
Fitted fitted_values
Terms terms
class Candidate {
Factor class*
Score score*
List~ShapeletMeta~ shapelets*
float contrast_total
float contrast_median
float contrast_mean
float contrast_std
float cov_con_ratio_mean
float k_mean
float cov_mean
float coverage
float cov_percent
int redundancy
int num_shapelets
}
35 changes: 20 additions & 15 deletions scripts/_contrast_profile.R
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,13 @@ list(
# iteration = "list" # thus the objects keep their attributes
# ),
tar_target(
#### Pipeline: score_by_segment - Preparation of the data: the model's data is the shapelets with metadata ----
score_by_segment,
{
res <- list()
for (i in seq_len(var_vfolds)) {
cli::cli_alert_info("Scores by segment, fold {i}.")
# These parameter may be tuned on `recipes`
tune1 <- 0.1
tune2 <- 1 / 3
score <- score_by_segment_window(contrast_profiles[[i]]$positive,
Expand All @@ -303,8 +305,15 @@ list(
iteration = "list"
),
tar_target(
#### Pipeline: find_shapelets - This is the model fit. ----
find_shapelets,
{
# Here we can try: fitting all possible solutions and later score them and finally try
# to find which metadata is the best to filter the solutions
# Or, we can try to use some heuristics to find the best metadata for the solutions
# These parameters are tuned on `parsnip`/`tune`
# Currently the parameter `n` draws randomically 1 to `n` samples from the pan contrast profile
# We can try to use a fixed number of samples during the parameter optimization
res <- list()
for (i in seq_len(var_vfolds)) {
cli::cli_alert_info("Finding solutions, fold {i}.")
Expand Down Expand Up @@ -373,8 +382,11 @@ list(
# iteration = "list"
# ),
tar_target(
#### Pipeline: test_classifiers_self - This is the current score function. ----
test_classifiers_self,
{
# With the results of this step, plus the fitted solutions, we need to find which
# metadata is the best to filter the solutions
class(analysis_split) <- c("manual_rset", "rset", class(analysis_split))

res <- list()
Expand All @@ -384,6 +396,9 @@ list(
res[[i]] <- list()
shapelets <- find_shapelets[[i]]

# the `compute_metrics_topk` function may need testing on the `TRUE` criteria
# currently, if `ANY` shapelet matches, it is considered a positive
# as alternative we can try to use `ALL`, `HALF` or other criteria
res[[i]] <- compute_metrics_topk(fold, shapelets, var_future_workers, TRUE)
}

Expand Down Expand Up @@ -416,34 +431,24 @@ list(
tar_target(
test_classifiers,
{
shapelet_sizes <- var_shapelet_sizes

# Here we test the solutions we chose on the assessment split
# The final `model` we need is the shapelet
class(assessment_split) <- c("manual_rset", "rset", class(assessment_split))

res <- list()
for (i in seq_len(var_vfolds)) {
fold <- rsample::get_rsplit(assessment_split, i)
shapelets <- best_shapelets[[i]]
contrast <- contrast_profiles[[i]]
best_shapelets <- purrr::pluck(find_shapelets, i)[1, ]

res[[i]] <- compute_metrics_topk(fold, shapelets, contrast)
res[[i]] <- compute_metrics_topk(fold, best_shapelets, var_future_workers, TRUE)
}
res
overall <- compute_overall_metric(res)
list(fold = res, overall = overall)
},
pattern = map(best_shapelets, assessment_split, contrast_profiles),
pattern = map(assessment_split, find_shapelets),
iteration = "list"
)
# tar_target(
# best_shapelets,
# {
# # algorithm for selecting the best shapelet
# },
# pattern = map(contrast_profiles),
# iteration = "list"
# ),
# tar_target(
# train_classifier,
# {
# # train a classifier based on the best shapelets
Expand Down
18 changes: 11 additions & 7 deletions scripts/classification/pan_contrast.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ score_by_segment_window <- function(true_data, false_data, contrast_profiles, qu
}

# here we compute the total number of segments that each plato could classify
total_counts <- as.matrix(purrr::map_dfr(segs, function(x) apply(x, 1, sum)))
total_counts <- as.matrix(purrr::map_dfr(segs, function(x) apply(x, 1, sum))) # never used
colnames(cont) <- w_sizes # set the column names on the overall contrast matrix
colnames(thlds) <- w_sizes # set the column names on the overall contrast matrix

Expand All @@ -148,9 +148,13 @@ score_by_segment_window <- function(true_data, false_data, contrast_profiles, qu
coverage = segs, # segs == coverage of each plato (~sensitivity)
platos = shapes,
thresholds = thlds, # thlds == threshold of each plato
cov_counts = total_counts, # sum of segs == 1. Best is sum == num_segments
cov_counts = total_counts, # sum of segs == 1. Best is sum == num_segments # never used
num_segments = (length(segments) - 1)
)


# score <- score_candidates(score)
# return(score)
}

score_candidates <- function(score) {
Expand Down Expand Up @@ -686,11 +690,11 @@ compute_overall_metric <- function(all_folds) {
tp <- fp <- tn <- fn <- acc <- ff <- 0

for (fold in all_folds) {
tp <- tp + fold$tp
fp <- fp + fold$fp
tn <- tn + fold$tn
fn <- fn + fold$fn
ff <- ff + fold$f1
tp <- tp + fold[[1]]$tp
fp <- fp + fold[[1]]$fp
tn <- tn + fold[[1]]$tn
fn <- fn + fold[[1]]$fn
ff <- ff + fold[[1]]$f1
}

tm <- (2 * tp) / (2 * tp + fp + fn)
Expand Down

0 comments on commit e97479a

Please sign in to comment.