In [1]:
import pandas as pd
import numpy as np
import torch
import joblib
import sys 
import os
import argparse
sys.path.append("../time2lang/models")

from tqdm import tqdm
from chronos import ChronosPipeline
from utils import save_model, dictionary_to_arrays_numpy, zscore_sample_wise, zscore, apply_sss
from resnet import ResNet1D, ResNet1Dv2
from llama import CustomLlamaForCausalLM
from projection import ProjectionModel, ProjectionModelRes
from torch import nn
from datetime import datetime
from sklearn.metrics import balanced_accuracy_score
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from transformers.models.llama import LlamaForCausalLM
from transformers import pipeline, AutoTokenizer
from itertools import chain

## (a) Load the required models

In [2]:
device = "cuda:2" if torch.cuda.is_available() else "cpu"

resnet = ResNet1Dv2(in_channels=768,
                 base_filters=32,
                 kernel_size=3,
                 stride=2,
                 n_block=6,
                 groups=1,
                 n_classes=999).to(device) # n_classes does not matter

chronos = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-base",
    device_map=device,
    torch_dtype=torch.float32,
)

n_classes = 6 # 6 periodicity classes

model_id = "meta-llama/Llama-3.2-1B"
llama = CustomLlamaForCausalLM.from_pretrained(model_id).to(device)
projection = ProjectionModelRes(n_classes=n_classes).to(device)

resnet.load_state_dict(torch.load("weights/resnet.pt", weights_only=True))
projection.load_state_dict(torch.load("weights/proj.pt", weights_only=True))

<All keys matched successfully>

## (b) Extract features

There three important functions for feature extraction:

- `hook_linear`: Creates a hook to extract features from the corresponding layer (linear_2 in our case)
- `feature_extraction`: Given a time series, we apply a forward pass and extract the features using the hook
- `extract_all_features`: Helper function to loop over a dataset for feature extraction

In [3]:
features = []
def hook_linear(module, input, output):
    features.append(output)

def feature_extraction(resnet, chronos, llama, projection, ts, use_residual):
    
    resnet.eval()
    chronos.model.eval()
    llama.model.eval()
    projection.eval()

    global features
    features = []  
    hook = projection.linear_2[0].register_forward_hook(hook_linear)

    with torch.inference_mode():
        chronos_embeddings = chronos.embed(ts)[0].permute(0, 2, 1).to(device) # swap context and ts length dimensions
        resnet_embeddings = resnet(chronos_embeddings).permute(0, 2, 1)
        pad_embeddings = torch.nn.functional.pad(resnet_embeddings, (0, 2048 - resnet_embeddings.shape[-1])) # shape = (batch_size, context based on kernel, features)
        llama_embeddings = llama(inputs_embeds=pad_embeddings) # llama embedding shape = (batch_size, context based on kernel, features)
        if use_residual:
            embeddings = projection(llama_embeddings.mean(dim=1), chronos_embeddings.mean(dim=-1))
        else:
            embeddings = projection(llama_embeddings.mean(dim=1), torch.zeros(chronos_embeddings.shape).mean(dim=-1).to(device)) # we simply add 0-tensor if we do not use residual
    hook.remove()
    return features[0]

In [4]:
def extract_all_features(X, use_residual=False):
    all_feats = []
    for ts in tqdm(X):
        ts = torch.tensor(zscore(ts))
        all_feats.append(feature_extraction(resnet, chronos, llama, projection, ts, use_residual).cpu().numpy())
    return all_feats

#### Example feature extraction
- We are given a 2D array `X` of shape `(100, 1000)`
- The extracted features are of shape `(100, 256)`
- These features can be used to classification or regression using traditional ML methods

In [5]:
X = np.random.rand(100, 1000)
print(X.shape)

(100, 1000)


In [6]:
features = extract_all_features(X)

100%|██████████████████████████████████████████████████████████████████| 100/100 [00:06<00:00, 14.62it/s]


In [7]:
print(np.vstack(features).shape)

(100, 256)


## (c) StudentLife

An example of feature extraction from time2lang and classifying the conversation embeddings into flourishing using a random forest. 

In [8]:
train = joblib.load(f"../time2lang/data/studentlife/train.p")
test = joblib.load(f"../time2lang/data/studentlife/test.p")

In [9]:
X_train = np.vstack(extract_all_features(train['features'], use_residual=False))
y_train = train['labels']
X_test = np.vstack(extract_all_features(test['features'], use_residual=False))
y_test= test['labels']

100%|████████████████████████████████████████████████████████████████████| 36/36 [00:01<00:00, 19.44it/s]
100%|████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 19.43it/s]


In [10]:
print(f"Training data: {X_train.shape} | {y_train.shape}")
print(f"Test data: {X_test.shape} | {y_test.shape}")

Training data: (36, 256) | (36,)
Test data: (10, 256) | (10,)


In [11]:
# Training 5 different splits and testing on the hold-out set
auroc_avg, auprc_avg = apply_sss(X_train=X_train, 
                           y_train=y_train,
                           X_test=X_test,
                           y_test=y_test,
                           backbone='rf')

------ Running split 1 -------
Fitting 2 folds for each of 144 candidates, totalling 288 fits
------ Running split 2 -------
Fitting 2 folds for each of 144 candidates, totalling 288 fits
------ Running split 3 -------
Fitting 2 folds for each of 144 candidates, totalling 288 fits
------ Running split 4 -------
Fitting 2 folds for each of 144 candidates, totalling 288 fits
------ Running split 5 -------
Fitting 2 folds for each of 144 candidates, totalling 288 fits


In [12]:
print(f"AUROC: {np.mean(auroc_avg)} ({np.std(auroc_avg)})")
print(f"AUPRC: {np.mean(auprc_avg)} ({np.std(auprc_avg)})")

AUROC: 0.708 (0.09765244492586962)
AUPRC: 0.7445555555555556 (0.10847428588337786)
[CV 2/2; 27/144] START bootstrap=True, max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=50
[CV 2/2; 27/144] END bootstrap=True, max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=50;, score=0.438 total time=   0.1s
[CV 2/2; 55/144] START bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=50
[CV 2/2; 55/144] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=50;, score=0.438 total time=   0.1s
[CV 1/2; 76/144] START bootstrap=False, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=100
[CV 1/2; 76/144] END bootstrap=False, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=100;, score=0.562 total time=   0.1s
[CV 1/2; 108/144] START bootstrap=False,