In [1]:
import os
import numpy as np
import pandas as pd
import sys
import json
import networkx as nx
import matplotlib.pyplot as plt
from venny4py.venny4py import *
import math
from glob import glob

# Add rpy2
%load_ext rpy2.ipython

In [2]:
%%R 

suppressPackageStartupMessages({
    library(broom)
    library(ComplexHeatmap)
    library(cowplot)
    library(facefuns)
    library(factoextra)
    library(FactoMineR)
    library(ggh4x)
    library(ggraph)
    library(ggpubr)
    library(ggrain)
    library(ggridges)
    library(ggseg)
    library(ggsignif)
    library(ggVennDiagram)
    library(glue)
    library(igraph)
    library(LaCroixColoR)
    library(patchwork)
    library(RColorBrewer)
    library(rlist)
    library(scales)
    library(see)
    library(splitstackshape)
    library(tidyverse)
    library(VennDiagram) 
})


# Set cowplot theme
theme_set(theme_cowplot())

In [3]:
intra_individual_classification_path = "/Users/abry4213/data/Cogitate_MEG/derivatives/classification_results/within_participants"
intra_individual_classification_res_list = []

# for classification_file in os.listdir(intra_individual_classification_path):
for classification_file in glob(f"{intra_individual_classification_path}/*Logistic*.csv"):
    subject_id = os.path.basename(classification_file).split("_")[0]
    classification_df = pd.read_csv(classification_file)

    # Drop the 'comparison' column
    if "comparison" in classification_df.columns:
        classification_df = classification_df.drop(columns=["comparison"])

    if "relevance_type" in classification_file:
        classification_df["Classification_Type"] = "Relevance"
    else:
        classification_df["Classification_Type"] = "Stimulus Type"

    # Rename column from subject_id to "subject_id"
    classification_df = classification_df.rename(columns={subject_id: "subject_id"})
    classification_df["subject_id"] = subject_id

    # Append results
    intra_individual_classification_res_list.append(classification_df)

# Concatenate all classification results
intra_individual_classification_res = pd.concat(intra_individual_classification_res_list)

In [None]:
intra_individual_classification_res.head()

## Intra-individual classification performance

In [None]:
# load pyspi SPI info
pyspi_SPI_info = pd.read_csv("../feature_extraction/pyspi_SPI_info.csv")

# Define path for derivatives directory
deriv_dir = "/Users/abry4213/data/Cogitate_MEG/derivatives"

intra_individual_classification_path = f"{deriv_dir}/classification_results/within_participants"
intra_individual_classification_res_list = []

for classification_file in os.listdir(intra_individual_classification_path):
    subject_id = classification_file.split("_")[0]
    subject_base = subject_id.replace("sub-", "")
    classification_df = pd.read_csv(os.path.join(intra_individual_classification_path, classification_file))

    # Drop the 'comparison' column
    if "comparison" in classification_df.columns:
        classification_df = classification_df.drop(columns=["comparison"])

    if "relevance_type" in classification_file:
        classification_df["Classification_Type"] = "Relevance"
    else:
        classification_df["Classification_Type"] = "Stimulus Type"

    # Rename column from subject_id to "subject_id"
    classification_df = classification_df.rename(columns={subject_id: "subject_id"})
    classification_df["subject_id"] = subject_id

    # Drop the column named after the subject_id
    if subject_base in classification_df.columns:
        classification_df = classification_df.drop(columns=[subject_base])

    # Append results
    intra_individual_classification_res_list.append(classification_df)

# Concatenate all classification results
intra_individual_classification_res = pd.concat(intra_individual_classification_res_list)
intra_individual_stimulus_classification_res = intra_individual_classification_res.query("Classification_Type=='Stimulus Type'")
intra_individual_relevance_classification_res = intra_individual_classification_res.query("Classification_Type=='Relevance'")


## What is the max classification performance by participant? 

In [None]:
max_stimulus_performance_per_participant = intra_individual_stimulus_classification_res.groupby(['subject_id'])['accuracy'].agg('max').reset_index().merge(intra_individual_stimulus_classification_res, how='left')

max_stimulus_performance_per_participant.head()

In [None]:
%%R -i max_stimulus_performance_per_participant,pyspi_SPI_info

max_stimulus_performance_per_participant %>% 
    left_join(pyspi_SPI_info) %>%
    mutate(subject_id = fct_reorder(subject_id, accuracy, .desc=T)) %>%
    ggplot(data=., mapping=aes(x=subject_id, y=100*accuracy, color=100*accuracy)) +
    geom_point() + # Add a line from the point to the x-axis
    scale_y_continuous(expand=c(0,0)) +
    geom_segment(aes(xend=subject_id, yend=50)) +
    ylab("Max CV Accuracy (%)") +
    xlab("Participants (N=94)") +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          legend.position="none")
ggsave('../plots/intra_subject_classification/Max_classification_performance_by_participant.svg', width=6, height=3, units='in', dpi=300)