notebook for decoding

In [1]:
from data_structures import PatientData
from sklearn.base import BaseEstimator
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from typing import Tuple
from sklearn.model_selection import train_test_split
import numpy as np


In [2]:
p566 = PatientData(pid='566')
p563 = PatientData(pid='563')

./Data/40m_act_24_S06E01_30fps_character_frames.csv
./Data/40m_act_24_S06E01_30fps_character_frames.csv


In [None]:
class ConceptDecoder:
    
    """
    Handles decoding for a single concept pair


    - add method for PCA visualization in 2D/3D
    """
    def __init__(self, patient_data: PatientData, c1: str, c2: str, epoch: str, classifier: BaseEstimator = LinearSVC()):
        self.patient_data = patient_data
        self.c1 = c1
        self.c2 = c2
        self.epoch = epoch
        self.classifier = classifier
        self.scaler = StandardScaler()
        self.metrics = {}
    
        self.dataset = ConceptPairDataset(
            patient_data=self.patient_data,
            concept_pair=(self.c1, self.c2),
            epoch=self.epoch, 
            #min_samples = 10
            )

In [None]:
class ConceptPairDataset():
    """
    Class to turn both concept bins - np.ndarrays shape (n_onsets, n_neurons) (each row is a response) into dataset with 

    2 methods - one with psuedopopulations

    
    """
    def __init__(self, patient_data: PatientData, concept_pair: Tuple[str, str], 
                 epoch: str, min_samples: int = 10):
        self.patient_data = patient_data
        self.c1, self.c2 = concept_pair
        self.epoch = epoch
        self.min_samples = min_samples

    def create_dataset_normal(self, test_size = 0.3):
        """
        Method to create dataset without pseudopops, liable to make unbalanced dataset

        Returns X_train, X_test, y_train, y_test
        """
        c1_data, c2_data = self.patient_data.get_concept_data(c1=self.c1, c2=self.c2, epoch=self.epoch)

        print(f"c1 shape: {c1_data.shape[0]}, c2 shape: {c2_data.shape[0]}")

        if len(c1_data) < self.min_samples or len(c2_data) < self.min_samples:
            raise ValueError(f"Insufficient samples for {self.concept1} vs {self.concept2}")

        X = np.vstack([c1_data, c2_data])
        y = np.concatenate([np.zeros(len(c1_data)), np.ones(len(c2_data))])

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

        return X_train, X_test, y_train, y_test

    def create_dataset_pseudo(self):
        pass


# testing playground


:)

In [5]:
a = np.array([1, 2, 3])
b = np.array([[4, 5, 6], [7, 8, 9]])
x = np.vstack([a, b])
x

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [6]:
z = ConceptPairDataset(p566, ('A.Amar', 'B.Buchanan'), epoch='movie')
z1 = ConceptPairDataset(p563, ('A.Amar', 'B.Buchanan'), epoch='movie')

In [7]:
X_train, X_test, y_train, y_test = z.create_dataset_normal()

c1 shape: 20, c2 shape: 68


In [8]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(61, 169)
(27, 169)
(61,)
(27,)
