In [None]:
from mylib.statistic_test import *

code_id = '0346 - GLM for all sessions'
loc = os.path.join(figpath, code_id)
mkdir(loc)

from mylib.model.glms import GLM as GenearlizedLinearModel
from mylib.model import EqualRateDriftModel, TwoProbDriftModel
from mylib.model import TwoProbabilityIndependentModel
from mylib.model import JointProbabilityModel
from mylib.model import HMM
from mylib.model import ContinuousHiddenStateModel
from mylib.model import ProbabilityRNN, IntegrativeRNN

from mylib.field.tracker_v2 import Tracker2d

def fit_models(field_reg, GLM: np.ndarray, qualified_idx: np.ndarray, file_name: str):
    Models = {}

    res = {
        "Step": [],
        "Loss": [],
        "Model Type": []
    }
    
    # Standardization
    for j in range(GLM.shape[2]):
        mean, std = np.nanmean(GLM[:, :, j]), np.nanstd(GLM[:, :, j])
        GLM[:, :, j] = (GLM[:, :, j] - mean) / std
    
    GLM = GLM[:, :, [0, 1, 2, 4, 5, 6, 7, 8, 9, 10]]
    print(f"Orignial shape: {field_reg.shape}, qualified shape: {qualified_idx.shape[0]}")
    #field_reg = field_reg[:, qualified_idx]
    tracker = Tracker2d(field_reg=field_reg)
    sequences = tracker.convert_to_sequence()
    lengths = np.array([len(seq) for seq in sequences])
    
    max_length = np.max(lengths)
    if max_length > 10:
        sequences, glm_params = tracker.convert_for_glm(field_reg, GLM, least_length=10, is_seq_format=True)
    else:
        sequences, glm_params = tracker.convert_for_glm(field_reg, GLM, least_length=5, is_seq_format=True)
    
    train_size = int(len(sequences) * 0.8)
    train_indices = np.random.choice(len(sequences), train_size, replace=False)
    test_indices = np.setdiff1d(np.arange(len(sequences)), train_indices)
    res['train_indices'] = train_indices
    res['train_size'] = 0.8
    res['sequences'] = sequences
    
    train_seq = [sequences[i] for i in train_indices]
    test_seq = [sequences[i] for i in test_indices]
    M1 = EqualRateDriftModel()
    M1.fit(train_seq)
    res['Loss'].append(M1.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model I - 1", max_length-1))
    res['Step'].append(np.arange(1, max_length))
    res['Step'].append(np.arange(1, max_length))
    
    M12 = TwoProbDriftModel()
    M12.fit(train_seq)
    res['Loss'].append(M12.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model I - 2", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M2 = TwoProbabilityIndependentModel()
    M2.fit(train_seq)
    res['Loss'].append(M2.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model II", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M3 = JointProbabilityModel()
    M3.fit(train_seq)
    res['Loss'].append(M3.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model III", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M41 = HMM.process_fit(N=5, sequences=train_seq, n_iterations=100)
    res['Loss'].append(M41.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model IV - 5", max_length-1))
    res['Step'].append(np.arange(1, max_length))
    
    M42 = HMM.process_fit(N=10, sequences=train_seq, n_iterations=100)
    res['Loss'].append(M42.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model IV - 10", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M43 = HMM.process_fit(N=20, sequences=train_seq, n_iterations=100)
    res['Loss'].append(M43.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model IV - 20", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M44 = HMM.process_fit(N=40, sequences=train_seq, n_iterations=100)
    res['Loss'].append(M44.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model IV - 40", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M51 = ContinuousHiddenStateModel('reci')
    M51.fit(train_seq)
    res['Loss'].append(M51.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model V - reci", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M52 = ContinuousHiddenStateModel('logistic')
    M52.fit(train_seq)
    res['Loss'].append(M52.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model V - logistic", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M53 = ContinuousHiddenStateModel('poly2')
    M53.fit(train_seq)
    res['Loss'].append(M53.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model V - poly2", max_length-1))
    res['Step'].append(np.arange(1, max_length))
    
    M54 = ContinuousHiddenStateModel('poly3')
    M54.fit(train_seq)
    res['Loss'].append(M54.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model V - poly3", max_length-1))
    res['Step'].append(np.arange(1, max_length))

    M61 = ProbabilityRNN.process_fit(
        sequences,
        train_index=train_indices,
        hidden_size=8,
        lr=0.001,
        epochs=1000, 
        batch_size=2048
    )
    res['Loss'].append(M61.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model VI - 8", max_length-1)) 
    res['Step'].append(np.arange(1, max_length))

    M62 = ProbabilityRNN.process_fit(
        sequences,
        train_index=train_indices,
        hidden_size=16,
        lr=0.001,
        epochs=1000, 
        batch_size=2048
    )
    res['Loss'].append(M62.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model VI - 16", max_length-1)) 
    res['Step'].append(np.arange(1, max_length))

    M63 = ProbabilityRNN.process_fit(
        sequences,
        train_index=train_indices,
        hidden_size=32,
        lr=0.001,
        epochs=1000, 
        batch_size=2048
    )
    res['Loss'].append(M63.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model VI - 32", max_length-1))
    res['Step'].append(np.arange(1, max_length))


    """
    # Process NAN values
    for i in range(len(sequences)):
        nanidx = np.where(np.isnan(glm_params[i][:, :6]))[0]
        if len(nanidx) > 0:
            glm_params[i][nanidx] = 0
        
        # First Lap Appear
        nanidx = np.where(np.isnan(glm_params[i][:, 6]))[0]
        # Get session id
        if len(nanidx) > 0:
            session_id = glm_params[i][nanidx, 1].astype(int)-1
            glm_params[i][nanidx, 6] = np.nanmax(GLM[session_id, :, 6], axis=1)+1
        
        # BTSP-like signature
        nanidx = np.where(np.isnan(glm_params[i][:, [7,8]]))[0]
        if len(nanidx) > 0:   
            glm_params[i][nanidx, :][:, [7,8]] = 1
        
        # Fluctuation
        nanidx = np.where(np.isnan(glm_params[i][:, 9]))[0]
        if len(nanidx) > 0:    
            session_id = glm_params[i][nanidx, 1].astype(int)-1
            glm_params[i][nanidx, 9] = np.nanmean(GLM[session_id, :, 9], axis=1)
    """
    X_train, Y_train = np.concatenate([glm_params[i][:-1, :] for i in train_indices], axis=0), np.concatenate([sequences[i][1:] for i in train_indices], axis=0)
    nan_sum = np.where(np.isnan(np.sum(X_train, axis=1)) == False)[0]
    X_test, Y_test = [glm_params[i] for i in test_indices], [sequences[i] for i in test_indices]
    G1 = GenearlizedLinearModel()
    G1.fit(X_train[nan_sum, :], Y_train[nan_sum])
    res['Loss'].append(G1.calc_loss_along_seq(X_test, Y_test))
    res['Model Type'].append(np.repeat("GLM All", max_length-1))
    res['Step'].append(np.arange(1, max_length))
    
    # Knockout one models
    KO = []
    for i in range(10):
        print(f"Knock out element {i}")
        GKO = GenearlizedLinearModel()
        idx = np.concatenate([np.arange(i), np.arange(i+1, 10)])
        GKO.fit(X_train[:, idx][nan_sum, :], Y_train[nan_sum])
        res['Loss'].append(GKO.calc_loss_along_seq([X_test[d][:, idx] for d in range(len(X_test))], Y_test))
        res['Model Type'].append(np.repeat(f"GLM KO {i}", max_length-1))
        KO.append(GKO)
        res['Step'].append(np.arange(1, max_length))
        
    # Only One
    RO = []
    for i in range(10):
        print(f"Only element {i}")
        GRO = GenearlizedLinearModel()
        GRO.fit(X_train[:, i:i+1][nan_sum, :], Y_train[nan_sum])
        res['Loss'].append(GRO.calc_loss_along_seq([X_test[d][:, i:i+1] for d in range(len(X_test))], Y_test))
        res['Model Type'].append(np.repeat(f"GLM RO {i}", max_length-1))
        RO.append(GRO)
        res['Step'].append(np.arange(1, max_length))

    """
    integrative_seq = [
        np.hstack([sequences[i][:, np.newaxis], glm_params[i]]) for i in range(len(sequences))
    ]
    M71 = IntegrativeRNN.process_fit(
        sequences=integrative_seq,
        train_index=train_indices,
        hidden_size=8,
        lr=0.001,
        epochs=1000, 
        batch_size=32
    )
    res['Loss'].append(M71.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model VII - 8", max_length-1))
    res['Step'].append(np.arange(1, max_length))
    
    M72 = IntegrativeRNN.process_fit(
        sequences=integrative_seq,
        train_index=train_indices,
        hidden_size=16,
        lr=0.001,
        epochs=1000, 
        batch_size=32
    )
    res['Loss'].append(M72.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model VII - 16", max_length-1))
    res['Step'].append(np.arange(1, max_length))
    
    M73 = IntegrativeRNN.process_fit(
        sequences=integrative_seq,
        train_index=train_indices,
        hidden_size=32,
        lr=0.001,
        epochs=1000, 
        batch_size=32
    )
    res['Loss'].append(M73.calc_loss_along_seq(test_seq))
    res['Model Type'].append(np.repeat("Model Venus - 32", max_length-1))
    res['Step'].append(np.arange(1, max_length))
    """
    Models = [M1, M12, M2, M3, M41, M42, M43, M44, M51, M52, M53, M54, M61, M62, M63]
    Models2 = [G1] + KO + RO
    
    with open(join(loc, file_name), 'wb') as f:
        pickle.dump([Models, Models2], f)

    for k in ['Step', 'Loss', 'Model Type']:
        res[k] = np.concatenate(res[k])

    return res

if os.path.exists(join(figdata, code_id+'.pkl')):
    with open(join(figdata, code_id+'.pkl'), 'rb') as handle:
        Data = pickle.load(handle)
else:
    Data = {
        'Step': [],
        'Paradigm': [],
        'MiceID': [],
        'Model Type': [],
        'Loss': []
    }

    for i in range(26, len(f_CellReg_modi)):
        if f_CellReg_modi['Type'][i] != 'Real' or f_CellReg_modi['maze_type'][i] == 0:
            continue
    
        print(f_CellReg_modi['Trace File'][i])
    
        with open(f_CellReg_modi['Trace File'][i], 'rb') as handle:
            trace = pickle.load(handle)

        if f_CellReg_modi['paradigm'][i] == 'CrossMaze':
            mouse = int(f_CellReg_modi['MiceID'][i])
            maze_type = int(f_CellReg_modi['maze_type'][i])
            paradigm = 'MA' if maze_type == 1 else 'MB'
        
            glmparams, is_qualified = trace['GLM']
            losses = []
            for iteration in range(10):
                res = fit_models(trace['field_reg'], glmparams, is_qualified, file_name=f"{mouse}_{paradigm}_iter{iteration}.pkl")
                losses.append(res['Loss'])
                
            losses = np.vstack(losses)
            losses = np.mean(losses, axis=0)
        
            Data['MiceID'] += [mouse] * len(res['Loss'])
            Data['Paradigm'] += [paradigm] * len(res['Loss'])
            Data['Model Type'].append(res['Model Type'])
            Data['Loss'].append(losses)
            Data['Step'].append(res['Step'])
            
        elif f_CellReg_modi['paradigm'][i] == 'ReverseMaze':
            mouse = int(f_CellReg_modi['MiceID'][i])
            maze_type = int(f_CellReg_modi['maze_type'][i])
            paradigm = 'MAf'
        
            glmparams, is_qualified = trace['GLM_cis']
            losses = []
            for iteration in range(10):
                res = fit_models(trace['cis']['field_reg'], glmparams, is_qualified, file_name=f"{mouse}_{paradigm}_iter{iteration}.pkl")
                losses.append(res['Loss'])
                
            losses = np.vstack(losses)
            losses = np.mean(losses, axis=0)
        
            Data['MiceID'] += [mouse] * len(res['Loss'])
            Data['Paradigm'] += [paradigm] * len(res['Loss'])
            Data['Model Type'].append(res['Model Type'])
            Data['Loss'].append(losses)
            Data['Step'].append(res['Step'])
            
            paradigm = 'MAb'
        
            glmparams, is_qualified = trace['GLM_trs']
            losses = []
            for iteration in range(10):
                res = fit_models(trace['trs']['field_reg'], glmparams, is_qualified, file_name=f"{mouse}_{paradigm}_iter{iteration}.pkl")
                losses.append(res['Loss'])
                
            losses = np.vstack(losses)
            losses = np.mean(losses, axis=0)
        
            Data['MiceID'] += [mouse] * len(res['Loss'])
            Data['Paradigm'] += [paradigm] * len(res['Loss'])
            Data['Model Type'].append(res['Model Type'])
            Data['Loss'].append(losses)
            Data['Step'].append(res['Step'])
            
        elif f_CellReg_modi['paradigm'][i] == 'HairpinMaze':
            mouse = int(f_CellReg_modi['MiceID'][i])
            maze_type = int(f_CellReg_modi['maze_type'][i])
            paradigm = 'HPf'
        
            glmparams, is_qualified = trace['GLM_cis']
            losses = []
            for iteration in range(10):
                res = fit_models(trace['cis']['field_reg'], glmparams, is_qualified, file_name=f"{mouse}_{paradigm}_iter{iteration}.pkl")
                losses.append(res['Loss'])
                
            losses = np.vstack(losses)
            losses = np.mean(losses, axis=0)
        
            Data['MiceID'] += [mouse] * len(res['Loss'])
            Data['Paradigm'] += [paradigm] * len(res['Loss'])
            Data['Model Type'].append(res['Model Type'])
            Data['Loss'].append(losses)
            Data['Step'].append(res['Step'])
            
            paradigm = 'HPb'
            
            glmparams, is_qualified = trace['GLM_trs']
            losses = []
            for iteration in range(10):
                res = fit_models(trace['trs']['field_reg'], glmparams, is_qualified, file_name=f"{mouse}_{paradigm}_iter{iteration}.pkl")
                losses.append(res['Loss'])
                
            losses = np.vstack(losses)
            losses = np.mean(losses, axis=0)
        
            Data['MiceID'] += [mouse] * len(res['Loss'])
            Data['Paradigm'] += [paradigm] * len(res['Loss'])
            Data['Model Type'].append(res['Model Type'])
            Data['Loss'].append(losses)
            Data['Step'].append(res['Step'])
        print("\n\n\n\n")
        
    for k in ['MiceID', 'Paradigm']:
        Data[k] = np.array(Data[k])
    
    for k in ['Model Type', 'Loss', 'Step']:
        Data[k] = np.concatenate(Data[k])
        
    with open(join(figdata, code_id+'.pkl'), 'wb') as handle:
        pickle.dump(Data, handle)
        
    D = pd.DataFrame(Data)
    D.to_excel(join(figdata, code_id+'.xlsx'), index=False)

  from .autonotebook import tqdm as notebook_tqdm


        E:\Data\FinalResults\0346 - GLM for all sessions is already existed!
Using device: cuda
Using device: cuda
Using device: cuda
E:\Data\Hairpin_maze\10209\footprint\Cell_reg\trace_mdays_conc.pkl
Orignial shape: (7, 5114), qualified shape: 2660
Simple Drift Model:
  Loss: [0.69152222 0.69271517 0.70166227 0.707627   0.71273125 0.71573362]
  Parameters: (0.5287481015404643,).

Two Probability Drift Model:
  Loss: [0.70329872 0.64965921 0.60605816 0.54941147 0.51147465 0.53477727]
  Parameters: (0.6042154566744731, 0.22215709261430247).



100%|██████████| 768/768 [00:00<00:00, 249610.65it/s]

Retention + Recovery Model:
  Loss: [0.69211047 0.63386045 0.57822857 0.52346375 0.4807547  0.54131898]
  Retention Parameters: [2.22335803 3.89288971]
  Recovery Parameters: [0.69751982 1.38478845].




  probs = probs[1:, :, 1]/np.sum(probs[1:, :, :], axis=2)


Joint Probability Model:
  Loss: [0.69441278 0.63551942 0.56235685 0.54345293 0.5064775  0.61567684]
  Parameters: [-0.23045001  1.42185712  1.16240109  1.56895198].



100%|██████████| 100/100 [00:01<00:00, 84.75it/s]


Hidden Markov Model with 5 hidden states:
  Loss: [0.69529684 0.65285124 0.56333843 0.54143421 0.49139649 0.57439509]



100%|██████████| 100/100 [00:01<00:00, 85.35it/s]


Hidden Markov Model with 10 hidden states:
  Loss: [0.69839503 0.65752764 0.56371647 0.53776853 0.49054622 0.58120553]



100%|██████████| 100/100 [00:01<00:00, 86.55it/s]


Hidden Markov Model with 20 hidden states:
  Loss: [0.69897407 0.65896954 0.56408125 0.53716333 0.49068246 0.58263342]



100%|██████████| 100/100 [00:01<00:00, 56.86it/s]


Hidden Markov Model with 40 hidden states:
  Loss: [0.69929144 0.66005158 0.56438176 0.53717643 0.49103996 0.58351477]

Continuous Hidden State Model with reci:
  Loss: [0.69157343 0.6401577  0.5710398  0.53024032 0.48309735 0.54499902]

Continuous Hidden State Model with logistic:
  Loss: [0.69157343 0.63775477 0.5708499  0.5261523  0.48126692 0.54407659]

Continuous Hidden State Model with poly2:
  Loss: [0.69157343 0.64200095 0.57397394 0.52195231 0.48091883 0.54413259]

Continuous Hidden State Model with poly3:
  Loss: [0.69157343 0.66567792 0.58246813 0.54101076 0.47929856 0.58473438]



 48%|████▊     | 477/1000 [00:10<00:10, 48.33it/s]