## LSM Patch Scaling and Feature Order Analysis
##### Colab Kernel (Brainframe GPU)
##### Dataset (Electrodes)

Grants command for Access on Demand (AoD):

https://grants.corp.google.com/#/grants?request=20h%2Fchr-ards-electrodes-deid-colab-jobs&reason=b%2F314799341

### About This Notebook:
This notebook runs down-stream task analysis on Vit-MAE LSM V1 model.
These tasks include:
* I. Classification Fewshot Experiments (Activity Recognition and Exercise Detection)
* II. (Pretrain) Data Scaling Experiments (Activity Recognition and Exercise Detection)
* III. Remedies to previously incorrect mAP classifcation results (for sections I and II - further discussed in section III)
* IV. Generative Task Experiments (Forecasting, Time and Sensor Imputation)

# Setup

In [None]:
# @title Imports

from google3.learning.deepmind.xmanager2.client import xmanager_api
import matplotlib as mpl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import collections
import numpy as np

from google3.pyglib import gfile
import os
import tensorflow as tf
import itertools

from typing import Sequence

In [None]:
# @title Plot Formatting

MEDIUM_SIZE = 18
mpl.rcParams.update({
    'font.size': MEDIUM_SIZE,
    'axes.labelsize': MEDIUM_SIZE,
    'axes.titlesize': MEDIUM_SIZE,
})
mpl.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.size'] = MEDIUM_SIZE
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['axes.edgecolor'] = '#777777'
plt.rcParams['axes.facecolor'] = '#FFFFFF'

plt.rc('font', size=MEDIUM_SIZE)  # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE)  # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)  # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)  # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)  # legend fontsize
plt.rc('figure', titlesize=MEDIUM_SIZE)  # fontsize of the figure title

elegant_palette = sns.color_palette('muted')

In [None]:
# @title Metrics and Field Names

# Get metric names.
metric_names = [
    'valid_accuracy',
    'valid_mAP',

    'valid_mean_absolute_error_masked',
    'valid_mean_squared_error_masked',
    'valid_mean_absolute_error_all',
    'valid_mean_squared_error_all',

    # Generative Task Metrics
    # Forecast
    'forecast_0.034_eval/valid_mean_absolute_error_masked',
    'forecast_0.034_eval/valid_mean_squared_error_masked',
    'forecast_0.067_eval/valid_mean_absolute_error_masked',
    'forecast_0.067_eval/valid_mean_squared_error_masked',
    'forecast_0.1_eval/valid_mean_absolute_error_masked',
    'forecast_0.1_eval/valid_mean_squared_error_masked',
    'forecast_0.2_eval/valid_mean_absolute_error_masked',
    'forecast_0.2_eval/valid_mean_squared_error_masked',
    'forecast_0.4_eval/valid_mean_absolute_error_masked',
    'forecast_0.4_eval/valid_mean_squared_error_masked',

    # Imputation
    'imputation_0.034_eval/valid_mean_absolute_error_masked',
    'imputation_0.034_eval/valid_mean_squared_error_masked',
    'imputation_0.067_eval/valid_mean_absolute_error_masked',
    'imputation_0.067_eval/valid_mean_squared_error_masked',
    'imputation_0.1_eval/valid_mean_absolute_error_masked',
    'imputation_0.1_eval/valid_mean_squared_error_masked',
    'imputation_0.2_eval/valid_mean_absolute_error_masked',
    'imputation_0.2_eval/valid_mean_squared_error_masked',
    'imputation_0.4_eval/valid_mean_absolute_error_masked',
    'imputation_0.4_eval/valid_mean_squared_error_masked',

    # Sensor Imputation Metrics
    'sensor_imputation_0.2_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.2_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.4_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.4_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.5_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.5_eval/valid_mean_squared_error_masked',

    'sensor_imputation_0.2_0.034_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.2_0.034_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.4_0.034_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.4_0.034_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.5_0.034_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.5_0.034_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.7_0.034_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.7_0.034_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.9_0.034_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.9_0.034_eval/valid_mean_squared_error_masked',
    'sensor_imputation_1.0_0.034_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_1.0_0.034_eval/valid_mean_squared_error_masked',

    'sensor_imputation_0.2_0.067_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.2_0.067_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.4_0.067_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.4_0.067_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.5_0.067_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.5_0.067_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.7_0.067_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.7_0.067_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.9_0.067_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.9_0.067_eval/valid_mean_squared_error_masked',
    'sensor_imputation_1.0_0.067_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_1.0_0.067_eval/valid_mean_squared_error_masked',

    'sensor_imputation_0.2_0.1_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.2_0.1_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.4_0.1_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.4_0.1_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.5_0.1_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.5_0.1_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.7_0.1_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.7_0.1_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.9_0.1_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.9_0.1_eval/valid_mean_squared_error_masked',
    'sensor_imputation_1.0_0.1_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_1.0_0.1_eval/valid_mean_squared_error_masked',

    'sensor_imputation_0.2_0.2_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.2_0.2_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.4_0.2_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.4_0.2_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.5_0.2_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.5_0.2_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.7_0.2_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.7_0.2_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.9_0.2_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.9_0.2_eval/valid_mean_squared_error_masked',
    'sensor_imputation_1.0_0.2_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_1.0_0.2_eval/valid_mean_squared_error_masked',

    'sensor_imputation_0.2_0.4_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.2_0.4_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.4_0.4_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.4_0.4_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.5_0.4_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.5_0.4_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.7_0.4_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.7_0.4_eval/valid_mean_squared_error_masked',
    'sensor_imputation_0.9_0.4_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_0.9_0.4_eval/valid_mean_squared_error_masked',
    'sensor_imputation_1.0_0.4_eval/valid_mean_absolute_error_masked',
    'sensor_imputation_1.0_0.4_eval/valid_mean_squared_error_masked',
]


meta_data_name = [
    'num_trainable_params',
    'core_hours',
    'examples_seen',
    'gflops',
]

data_field_names = meta_data_name + metric_names

print('Data fields to fetch:\n', data_field_names)

In [None]:
# @title Helpers

def read_xm_metrics(example_xid, metric_name, unit_id, lowest=True):
  experiment = xm_client.get_experiment(example_xid)
  work_unit = experiment.get_work_unit(unit_id)
  all_series = work_unit.list_measurement_series()
  # Read measurement series metadata.
  for series in all_series:
    if series.label == metric_name:
      # Read measurement points data.
      all_measurements = []
      for measurement in series.measurements:
        all_measurements.append(measurement.objective_value)
      if lowest:
        return min(all_measurements)
      else:
        return all_measurements


def add_min_columns(df):
  # Function to calculate the minimum value in each list
  def min_of_list(lst):
    return min(lst)

  def min_idx_of_list(lst):
    min_idx = np.argmin(lst)
    return min_idx

  def last_of_list(lst):
    if lst is not None:
      return lst[-1]
    else:
      return None

  # Calculate minimum values and add as new columns
  for col in df.columns:
    if col in metric_names:
      new_col_name = 'final_' + col
      df[new_col_name] = df[col].apply(last_of_list)

  return df


def add_better_col_names(df):

  def patch_col_name(patch_size):
    return f'{patch_size[0]}x{patch_size[1]}'

  for col in df.columns:
    if col == 'config.model.patches.size':
      df['patch_size'] = df[col].apply(patch_col_name)

  return df


def get_metrics_df(xm_dict):

  # Get all metrics.
  xm_exp_dict = collections.defaultdict(list)
  for xid, values in xm_dict.items():
    model_size = values['model_size']
    feat_order = values['feature_order']

    experiment = xm_client.get_experiment(xid)
    num_of_units = experiment.get_num_work_units()

    for wid in range(1, num_of_units + 1):
      work_unit = experiment.get_work_unit(wid)
      key_list = work_unit.parameters.keys()
      xm_exp_dict['wid'].append(wid)
      xm_exp_dict['xid'].append(xid)

      xm_exp_dict['Model Size'].append(model_size)
      xm_exp_dict['Feature Order'].append(feat_order)

      if 'spc' in values.keys():
        xm_exp_dict['fewshot_samples_per_class'].append(values['spc'])

      if 'train_data_size' in values.keys():
        xm_exp_dict['train_data_size'].append(values['train_data_size'])

      if 'config.init_from.checkpoint_step' in values.keys():
        xm_exp_dict['config.init_from.checkpoint_step'].append(values['config.init_from.checkpoint_step'])

      for param_name in key_list:
        xm_exp_dict[param_name].append(work_unit.parameters[param_name])
      for metric in data_field_names:
        xm_exp_dict[metric].append(
            read_xm_metrics(xid, metric, wid, lowest=False)
        )

  df = pd.DataFrame(xm_exp_dict)
  df = add_min_columns(df)
  df = add_better_col_names(df)

  return df


In [None]:
# @title CM Plotting Fn

def confusion_matrix_fig(
    confusion_matrix: tf.Tensor, labels: Sequence[str], scale: float = 0.8
) -> plt.Figure:
  """Returns a matplotlib plot of the given confusion matrix.

  Forked from:
  google3/fitbit/research/sensor_algorithms/training/logging/
  confusion_matrix_logging.py

  Args:
      confusion_matrix: Confusion matrix as 2D numpy array.
      labels: List of class names, will be used as axis labels.
      scale: Scale for the image size.
  """
  label_totals = np.sum(confusion_matrix, axis=1, keepdims=True)
  prediction_totals = np.sum(confusion_matrix, axis=0, keepdims=True)

  cm_normalized = np.nan_to_num(confusion_matrix / label_totals)

  num_labels = len(labels)
  longest_label = max([len(label) for label in labels])

  # Guesstimating an appropriate size.
  image_size = scale * (num_labels + (longest_label / 8.0))

  fig = plt.figure(
      figsize=(image_size, image_size), facecolor='w', edgecolor='k'
  )
  ax = fig.add_subplot(1, 1, 1)
  ax.imshow(cm_normalized, cmap='Blues')

  tick_marks = np.arange(num_labels)

  ax.set_xlabel('Predicted')
  ax.set_xticks(tick_marks)
  x_labels = (
      f'{label} ({int(count):,})'
      for label, count in zip(labels, prediction_totals[0, :])
  )
  ax.set_xticklabels(x_labels, rotation=-45, ha='center')
  ax.xaxis.set_label_position('bottom')
  ax.xaxis.tick_bottom()

  ax.set_ylabel('True Label')
  ax.set_yticks(tick_marks)
  y_labels = (
      f'{label} ({int(count):,})'
      for label, count in zip(labels, label_totals[:, 0])
  )
  ax.set_yticklabels(y_labels, va='center')
  ax.yaxis.set_label_position('left')
  ax.yaxis.tick_left()

  for row_idx, col_idx in itertools.product(
      range(confusion_matrix.shape[0]), range(confusion_matrix.shape[1])
  ):
    text_color = 'white' if cm_normalized[row_idx, col_idx] >= 0.5 else 'black'
    if confusion_matrix[row_idx, col_idx] == 0:
      text_str = '.'
    else:
      text_str = (
          f'{cm_normalized[row_idx,col_idx]:2.0%}\n'
          f'({int(confusion_matrix[row_idx, col_idx]):,})'
      )
    ax.text(
        col_idx,
        row_idx,
        text_str,
        horizontalalignment='center',
        verticalalignment='center',
        color=text_color,
    )

  fig.set_tight_layout(True)

  return fig

In [None]:
# @title Embedding Plotting Setup

# Imports
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from collections import Counter
import jax.numpy as jnp


# Constants
ALL_FEATURE_NAMES = ['sclValue', 'sclSlope', 'skinTempValue',
                    'hr', 'hrvPercentGood','hrvRR80thPercentile', 'hrvRR20thPercentile',
                    'hrvRRMedian', 'hrvRRMean', 'hrvShannonEntropyRR', 'hrvShannonEntropyRRDiffs',
                    'hrvPNN30', 'hrvRMSSD', 'hrvSDNN', 'sleepCoefficient', 'onWrist',
                    'jerkAuto', 'stepCount', 'logEnergy', 'grok_covariance', 'logEnergyRatio',
                    'zeroCrossingStd', 'zeroCrossingAvg', 'axisMean', 'altimStdNorm', 'grok_kurtosis']

actDict = {'Yoga': 52000, 'Pilates': 53000, 'Bike':90001,
            'Run':90009,'Hike':90012,'Walk':90013,'Elliptical':90017,'Treadmill':90019,
            'Swim':90024,'HIIT':91040,'Weightlifting':91043,'Core training':91046}

actOHEDict = {
    'Weightlifting': 0, 'Swim': 1, 'Elliptical': 2, 'Walk': 3,
    'Run': 4, 'Bike': 5, 'HIIT': 6, 'Strength training': 7
}


def plot_embeddings(Xd, yd, colors, names):
  if len(names) != len(colors):
    raise ValueError(f'names ({len(names)}) and colors ({len(colors)}) must have the same length.')


  # PCA
  pca = PCA()
  pipe = Pipeline([('scaler', StandardScaler()), ('pca', pca)])
  plt.figure(figsize=(8,6))
  Xt = pipe.fit_transform(Xd)
  plot = plt.scatter(Xt[:,2], Xt[:,3], c=yd);
  plt.xlabel('PCA Dim 1')
  plt.ylabel('PCA Dim 2')
  plt.legend(
      handles=plot.legend_elements()[0],
      labels=names,
      loc='upper left',
      bbox_to_anchor=(1, 1)
  );
  plt.show()
  print('\n\n')


  # LDA
  clf = LDA()
  clf.fit(Xd, yd)
  lda = LDA(n_components=None, priors=None, shrinkage=None, solver='svd',store_covariance=False, tol=0.0001)
  X_r2 = lda.fit(Xd, yd).transform(Xd)

  plt.figure(figsize=(8,6))
  for i in range(len(names)):
    plt.scatter(X_r2[yd == i, 0], X_r2[yd == i, 1], label=names[i], alpha=0.3, c=colors[i])

  plt.xlabel('LDA Dim 1')
  plt.ylabel('LDA Dim 2')
  plt.legend(
      loc='upper left',
      bbox_to_anchor=(1, 1),
      shadow=False,
      scatterpoints=1
  );
  plt.show()
  print('\n\n')


  # LDA 1D Distributions
  plt.figure(figsize=(8,6))
  for i in range(len(names)):
    plt.hist(X_r2[yd == i, 0],20, density=True, label=names[i], alpha=0.5, color=colors[i])

  plt.xlabel('LDA Dim 1')
  plt.ylabel('Frac. of Examples Per Class')
  plt.legend(
      loc='upper left',
      bbox_to_anchor=(1, 1),
      shadow=False,
      scatterpoints=1
  );
  plt.show()


  # TSNE
  tsne = TSNE(n_components=2, random_state=0)
  Xt = tsne.fit_transform(Xd)

  plt.figure(figsize=(6, 6))
  for i in range(len(names)):
    plt.scatter(Xt[yd == i, 0], Xt[yd == i, 1], label=names[i], alpha=0.3, c=colors[i])

  plt.xlabel('t-SNE Dim 1')
  plt.ylabel('t-SNE Dim 2')
  plt.legend(
      loc='upper left',
      bbox_to_anchor=(1, 1),
      shadow=False,
      scatterpoints=1
  );
  plt.show()


def reshape_time_crop_patch_embeddings(
    x,
    patch_reorder_shape,
    start=None,
    end=None,
):
  """Reshape n_token embeddeding into an image of embeddedings."""
  # Get patch and input shape.
  n_h, n_w = patch_reorder_shape
  n_batch, n_tokens, embedding_dim = x.shape  # pylint: disable=unused-variable

  # Get start and end crop (along time axis).
  if end is None:
    end = 1
  if start is None:
    start = 0
  if start >= end:
    raise ValueError(f'start {start}, is greater than end {end}.')
  if start > 1 or end > 1:
    raise ValueError(f'start {start} and end {end} cannot be greater than 1.')

  # reorganize patches into image:
  x = jnp.reshape(x, [n_batch, n_h, n_w, embedding_dim])

  # Time Crop image based on horizon
  start_idx = int(start * n_h)
  end_idx = int(end * n_h)
  x = x[:, start_idx:end_idx, :, :]

  return x

In [None]:
# Setup XM Client
xm_client = xmanager_api.XManagerApi(xm_deployment_env='alphabet')

In [None]:
datasizes = [1000, 10000, 100000, 750000, 1321235]
step_scaling = []

# Confusion Matrix Plotting

## LSM (Base) Finetune

In [None]:
XID = 126268296
WID = 1

lsm_ft_xm_id_dict = {
    XID: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
        'meta_data': 'Activity finetune',
    },
}

lsm_ft_df = get_metrics_df(lsm_ft_xm_id_dict)
lsm_ft_df = lsm_ft_df[lsm_ft_df['wid'] == WID]
lsm_ft_df

In [None]:
# Confusion Matrix

step = 300
file_name = os.path.join('/cns/dz-d/home/xliucs/lsm/xm/', str(XID), str(WID))
cm_file_name = os.path.join(file_name, f'valid_confusion_matrix_{step}.npy')
cm_labels_file_name = os.path.join(file_name, f'valid_confusion_matrix_labels_{step}.npy')

print('Reading CM File:', cm_file_name)
with gfile.Open(cm_file_name, 'rb') as f:
  cm = np.load(f)

print('Reading CM Labels File:', cm_labels_file_name)
with gfile.Open(cm_labels_file_name, 'rb') as f:
  cm_labels = np.load(f)

confusion_matrix_fig(cm, cm_labels, scale=1.2);

## LSM (Base) Linear Probe

In [None]:
XID = 126388131
WID = 1

lsm_lp_xm_id_dict = {
    XID: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
        'meta_data': 'Activity linear probe',
    },
}

lsm_lp_df = get_metrics_df(lsm_lp_xm_id_dict)
lsm_lp_df = lsm_lp_df[lsm_lp_df['wid'] == WID]
lsm_lp_df

In [None]:
# Confusion Matrix

step = 300
file_name = os.path.join('/cns/dz-d/home/xliucs/lsm/xm/', str(XID), str(WID))
cm_file_name = os.path.join(file_name, f'valid_confusion_matrix_{step}.npy')
cm_labels_file_name = os.path.join(file_name, f'valid_confusion_matrix_labels_{step}.npy')

print('Reading CM File:', cm_file_name)
with gfile.Open(cm_file_name, 'rb') as f:
  cm = np.load(f)

print('Reading CM Labels File:', cm_labels_file_name)
with gfile.Open(cm_labels_file_name, 'rb') as f:
  cm_labels = np.load(f)

confusion_matrix_fig(cm, cm_labels, scale=1.2);

# I. Classification Fewshot Experiments

## Activity Recognition Fewshot Results

In [None]:
# @title Linear Probe Fewshot Results

fewshot_lp_xm_id_dict = {
    126994618: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 20
    },

    126994444: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 15
    },

    126993637: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 10
    },

    126993590: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 5
    },

    126993446: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 1
    },
}

fewshot_lp_df = get_metrics_df(fewshot_lp_xm_id_dict)
fewshot_lp_df = fewshot_lp_df.sort_values(by='fewshot_samples_per_class', ascending=True)

lr = 0.005
plt_fewshot_lp_df = fewshot_lp_df[fewshot_lp_df['config.schedule.all.lr_configs.base_learning_rate'] == lr]
plt_fewshot_lp_df

In [None]:
# @title Linear Finetune Fewshot Results

fewshot_ft_xm_id_dict = {
    126950705: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'fine tune',
        'spc': 20
    },

    126949674: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'fine tune',
        'spc': 15
    },

    126949222: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'fine tune',
        'spc': 10
    },

    126948854: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'fine tune',
        'spc': 5
    },

    126947854: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'fine tune',
        'spc': 1
    },
}

fewshot_ft_df = get_metrics_df(fewshot_ft_xm_id_dict)
fewshot_ft_df = fewshot_ft_df.sort_values(by='fewshot_samples_per_class', ascending=True)

lr = 0.0005
plt_fewshot_ft_df = fewshot_ft_df[fewshot_ft_df['config.schedule.all.lr_configs.base_learning_rate'] == lr]
plt_fewshot_ft_df

In [None]:
# @title Conv Probe Fewshot Results

fewshot_cp_xm_id_dict = {
    126972479: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'conv probe',
        'spc': 20
    },

    126993248: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'conv probe',
        'spc': 15
    },

    126971980: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'conv probe',
        'spc': 10
    },

    126971054: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'conv probe',
        'spc': 5
    },

    126970538: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'conv probe',
        'spc': 1
    },
}

fewshot_cp_df = get_metrics_df(fewshot_cp_xm_id_dict)
fewshot_cp_df = fewshot_cp_df.sort_values(by='fewshot_samples_per_class', ascending=True)

lr = 0.0005
plt_fewshot_cp_df = fewshot_cp_df[fewshot_cp_df['config.schedule.all.lr_configs.base_learning_rate'] == lr]
plt_fewshot_cp_df

In [None]:
# @title Supervised Fewshot Results

fewshot_supervised_xm_id_dict = {
    127069108: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'supervised',
        'spc': 20
    },

    127068936: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'supervised',
        'spc': 15
    },

    127068506: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'supervised',
        'spc': 10
    },

    127063814: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'supervised',
        'spc': 5
    },

    127068122: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'supervised',
        'spc': 1
    },
}

fewshot_supervised_df = get_metrics_df(fewshot_supervised_xm_id_dict)
fewshot_supervised_df = fewshot_supervised_df.sort_values(by='fewshot_samples_per_class', ascending=True)

lr = 0.005
plt_fewshot_supervised_df = fewshot_supervised_df[fewshot_supervised_df['config.schedule.all.lr_configs.base_learning_rate'] == lr]
plt_fewshot_supervised_df

In [None]:
# @title Plotting of AR Fewshotting

# Linear Probe
lp_spc = plt_fewshot_lp_df['fewshot_samples_per_class']
lp_acc = plt_fewshot_lp_df['final_valid_accuracy'] * 100
lp_map = plt_fewshot_lp_df['final_valid_mAP'] * 100

# Convolutional Probe
cp_spc = plt_fewshot_cp_df['fewshot_samples_per_class']
cp_acc = plt_fewshot_cp_df['final_valid_accuracy'] * 100
cp_map = plt_fewshot_cp_df['final_valid_mAP'] * 100

# Finetune
ft_spc = plt_fewshot_ft_df['fewshot_samples_per_class']
ft_acc = plt_fewshot_ft_df['final_valid_accuracy'] * 100
ft_map = plt_fewshot_ft_df['final_valid_mAP'] * 100

# Supervised
sup_spc = plt_fewshot_supervised_df['fewshot_samples_per_class']
sup_acc = plt_fewshot_supervised_df['final_valid_accuracy'] * 100
sup_map = plt_fewshot_supervised_df['final_valid_mAP'] * 100

plt.figure()
plt.plot(ft_spc, ft_acc, '--o', color='r', label='fine-tune')
plt.plot(cp_spc, cp_acc, '--o', color='cornflowerblue', label='convolutional probe')
plt.plot(lp_spc, lp_acc, '--o', color='gold', label='linear probe')
plt.plot(sup_spc, sup_acc, '--o', color='black', label='supervised')
plt.xlabel('Fewshot Samples Per Class')
plt.ylabel('Accuracy')
plt.legend()

plt.figure()
plt.plot(ft_spc, ft_map, '--o', color='r', label='fine-tune')
plt.plot(cp_spc, cp_map, '--o', color='cornflowerblue', label='convolutional probe')
plt.plot(lp_spc, lp_map, '--o', color='gold', label='linear probe')
plt.plot(sup_spc, sup_map, '--o', color='black', label='supervised')
plt.xlabel('Fewshot Samples Per Class')
plt.ylabel('mAP')
plt.legend()

## Exercise Detection (Activity vs Mood Events) Fewshot Results

In [None]:
# @title Linear Probe Fewshot Results

fewshot_lp_xm_id_dict = {
    128157854: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 20
    },

    128162567: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 15
    },

    128162668: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 10
    },

    128163759: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 5
    },

    128162732: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'spc': 1
    },
}

fewshot_lp_df = get_metrics_df(fewshot_lp_xm_id_dict)
fewshot_lp_df = fewshot_lp_df.sort_values(by='fewshot_samples_per_class', ascending=True)

lr = 0.005
plt_fewshot_lp_df = fewshot_lp_df[fewshot_lp_df['config.schedule.all.lr_configs.base_learning_rate'] == lr]
plt_fewshot_lp_df

In [None]:
# @title Finetune Fewshot Results

fewshot_ft_xm_id_dict = {
    128423107: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 20
    },

    128424721: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 15
    },

    128425248: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 10
    },

    128426616: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 5
    },

}

fewshot_ft_df = get_metrics_df(fewshot_ft_xm_id_dict)
fewshot_ft_df = fewshot_ft_df.sort_values(by='fewshot_samples_per_class', ascending=True)

lr = 0.0005
plt_fewshot_ft_df = fewshot_ft_df[fewshot_ft_df['config.schedule.all.lr_configs.base_learning_rate'] == lr]
plt_fewshot_ft_df

In [None]:
# @title Conv Probe Fewshot Results

fewshot_cp_xm_id_dict = {
    128427223: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 20
    },

    128427386: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 15
    },

    128427409: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 10
    },

    128427562: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 5
    },

}

fewshot_cp_df = get_metrics_df(fewshot_cp_xm_id_dict)
fewshot_cp_df = fewshot_cp_df.sort_values(by='fewshot_samples_per_class', ascending=True)

lr = 0.005
plt_fewshot_cp_df = fewshot_cp_df[fewshot_cp_df['config.schedule.all.lr_configs.base_learning_rate'] == lr]
plt_fewshot_cp_df

In [None]:
# @title Supervised VIT Fewshot Results

fewshot_sup_xm_id_dict = {
    128448867: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 20
    },

    128448952: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 15
    },

    128449985: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 10
    },

    128451858: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'spc': 5
    },

}

fewshot_sup_df = get_metrics_df(fewshot_sup_xm_id_dict)
fewshot_sup_df = fewshot_sup_df.sort_values(by='fewshot_samples_per_class', ascending=True)

lr = 0.00005
plt_fewshot_sup_df = fewshot_sup_df[fewshot_sup_df['config.schedule.all.lr_configs.base_learning_rate'] == lr]
plt_fewshot_sup_df

In [None]:
# @title Plotting of ED Fewshotting

# Linear Probe
lp_spc = plt_fewshot_lp_df['fewshot_samples_per_class']
lp_acc = plt_fewshot_lp_df['final_valid_accuracy'] * 100
lp_map = plt_fewshot_lp_df['final_valid_mAP'] * 100

# Convolutional Probe
cp_spc = plt_fewshot_cp_df['fewshot_samples_per_class']
cp_acc = plt_fewshot_cp_df['final_valid_accuracy'] * 100
cp_map = plt_fewshot_cp_df['final_valid_mAP'] * 100

# Finetune
ft_spc = plt_fewshot_ft_df['fewshot_samples_per_class']
ft_acc = plt_fewshot_ft_df['final_valid_accuracy'] * 100
ft_map = plt_fewshot_ft_df['final_valid_mAP'] * 100

# Supervised
sup_spc = plt_fewshot_supervised_df['fewshot_samples_per_class']
sup_acc = plt_fewshot_supervised_df['final_valid_accuracy'] * 100
sup_map = plt_fewshot_supervised_df['final_valid_mAP'] * 100

plt.figure()
plt.plot(ft_spc, ft_acc, '--o', color='r', label='fine-tune')
plt.plot(cp_spc, cp_acc, '--o', color='cornflowerblue', label='convolutional probe')
plt.plot(lp_spc, lp_acc, '--o', color='gold', label='linear probe')
plt.plot(sup_spc, sup_acc, '--o', color='black', label='supervised')
plt.xlabel('Fewshot Samples Per Class')
plt.ylabel('Accuracy')
plt.legend()

plt.figure()
plt.plot(ft_spc, ft_map, '--o', color='r', label='fine-tune')
plt.plot(cp_spc, cp_map, '--o', color='cornflowerblue', label='convolutional probe')
plt.plot(lp_spc, lp_map, '--o', color='gold', label='linear probe')
plt.plot(sup_spc, sup_map, '--o', color='black', label='supervised')
plt.xlabel('Fewshot Samples Per Class')
plt.ylabel('mAP')
plt.legend()

In [None]:
meta_data = [
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/128157854/3', 'spc':20},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/128162567/3', 'spc':15},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/128162668/3', 'spc':10},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/128163759/3', 'spc':5},
]

meta_df = pd.DataFrame(meta_data)

lsm_lp_xm_id_dict = {
    128411339: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
    },
}

lsm_lp_df = get_metrics_df(lsm_lp_xm_id_dict)
lsm_lp_df

merged_df = pd.merge(lsm_lp_df, meta_df, on='config.init_from.checkpoint_dir', how='inner')
merged_df[merged_df['spc'].notna()]

# II. Classification (Pre-train) Data Scaling Experiments

## Acitivity Recongition

In [None]:
# @title Finetune results

act_xm_id_dict = {
    127126620: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
    }
}


act_sweep_df = get_metrics_df(act_xm_id_dict)

# Add train sizes
act_sweep_df['train_data_size'] = act_sweep_df['config.init_from.checkpoint_dir'].str.extract(r'/(\d+)$')
act_sweep_df['train_data_size'] = act_sweep_df['train_data_size'].astype(int)
act_sweep_df['train_data_size'] = act_sweep_df['train_data_size'].apply(lambda x: datasizes[x - 1])


# Add table results (for best model), run in a different XM job
table_results_act_xm_id_dict = {
    125999449: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
        'train_data_size': datasizes[-1],
        'config.init_from.checkpoint_step': 50000,
    }
}

table_act_sweep_df = get_metrics_df(table_results_act_xm_id_dict)
table_act_sweep_df = table_act_sweep_df[table_act_sweep_df['config.linear_dropout_rate'] == 0.3]
table_act_sweep_df

# Merge tables
merge_cols = ['config.init_from.checkpoint_step', 'train_data_size']
act_sweep_df = pd.concat([act_sweep_df, table_act_sweep_df], ignore_index=True)
act_finetune_sweep_df = act_sweep_df.drop_duplicates(subset=merge_cols, keep='last')
act_finetune_sweep_df

ft_sub_df = act_finetune_sweep_df[act_finetune_sweep_df['config.init_from.checkpoint_step'] == 50000]
ft_train_size = ft_sub_df['train_data_size'].astype(int)
ft_acc = ft_sub_df['final_valid_accuracy'] * 100
ft_map = ft_sub_df['final_valid_mAP'] * 100
ft_sub_df

In [None]:
# @title Linear Probe Results

act_xm_id_dict = {
    127224896: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
    }
}


act_sweep_df = get_metrics_df(act_xm_id_dict)

# Add train sizes
act_sweep_df['train_data_size'] = act_sweep_df['config.init_from.checkpoint_dir'].str.extract(r'/(\d+)$')
act_sweep_df['train_data_size'] = act_sweep_df['train_data_size'].astype(int)
act_sweep_df['train_data_size'] = act_sweep_df['train_data_size'].apply(lambda x: datasizes[x - 1])


# Add table results (for best model), run in a different XM job
table_results_act_xm_id_dict = {
    126009342: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
        'train_data_size': datasizes[-1],
        'config.init_from.checkpoint_step': 50000,
    }
}

table_act_sweep_df = get_metrics_df(table_results_act_xm_id_dict)
table_act_sweep_df = table_act_sweep_df[table_act_sweep_df['config.linear_dropout_rate'] == 0.3]
table_act_sweep_df

# Merge tables
merge_cols = ['config.init_from.checkpoint_step', 'train_data_size']
act_sweep_df = pd.concat([act_sweep_df, table_act_sweep_df], ignore_index=True)
act_linearprobe_sweep_df = act_sweep_df.drop_duplicates(subset=merge_cols, keep='last')
act_linearprobe_sweep_df


lp_sub_df = act_linearprobe_sweep_df[act_linearprobe_sweep_df['config.init_from.checkpoint_step'] == 50000]
lp_train_size = lp_sub_df['train_data_size'].astype(int)
lp_acc = lp_sub_df['final_valid_accuracy'] * 100
lp_map = lp_sub_df['final_valid_mAP'] * 100
lp_sub_df

In [None]:
# @title Conv Probe Results

act_xm_id_dict = {
    127225258: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'conv probe',
    }
}


act_sweep_df = get_metrics_df(act_xm_id_dict)

# Add train sizes
act_sweep_df['train_data_size'] = act_sweep_df['config.init_from.checkpoint_dir'].str.extract(r'/(\d+)$')
act_sweep_df['train_data_size'] = act_sweep_df['train_data_size'].astype(int)
act_sweep_df['train_data_size'] = act_sweep_df['train_data_size'].apply(lambda x: datasizes[x - 1])


# Add table results (for best model), run in a different XM job
table_results_act_xm_id_dict = {
    126030364: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'conv probe',
        'train_data_size': datasizes[-1],
        'config.init_from.checkpoint_step': 50000,
    }
}

table_act_sweep_df = get_metrics_df(table_results_act_xm_id_dict)
table_act_sweep_df = table_act_sweep_df[table_act_sweep_df['config.linear_dropout_rate'] == 0.3]
table_act_sweep_df

# Merge tables
merge_cols = ['config.init_from.checkpoint_step', 'train_data_size']
act_sweep_df = pd.concat([act_sweep_df, table_act_sweep_df], ignore_index=True)
act_convprobe_sweep_df = act_sweep_df.drop_duplicates(subset=merge_cols, keep='last')
act_convprobe_sweep_df


cp_sub_df = act_convprobe_sweep_df[act_convprobe_sweep_df['config.init_from.checkpoint_step'] == 50000]
cp_train_size = cp_sub_df['train_data_size'].astype(int)
cp_acc = cp_sub_df['final_valid_accuracy'] * 100
cp_map = cp_sub_df['final_valid_mAP'] * 100
cp_sub_df

In [None]:
plt.figure()
# plt.yscale('log')
plt.xscale('log')
plt.plot(ft_train_size, ft_acc, '--o', color='r', label='fine tune')
plt.plot(lp_train_size, lp_acc, '--o', color='cornflowerblue', label='linear probe')
plt.plot(cp_train_size, cp_acc, '--o', color='gold', label='conv probe')
plt.xlabel('Train Data Size')
plt.ylabel('Acc.')
plt.legend()

plt.figure()
# plt.yscale('log')
plt.xscale('log')
plt.plot(ft_train_size, ft_map, '--o', color='r', label='fine-tune')
plt.plot(lp_train_size, lp_map, '--o', color='cornflowerblue', label='linear probe')
plt.plot(cp_train_size, cp_map, '--o', color='gold', label='conv probe')
plt.xlabel('Train Data Size')
plt.ylabel('mAP')
plt.legend()

## Exercise Detection (Activity vs Mood Classification)

In [None]:
# @title Finetune results

mood_act_xm_id_dict = {
    127138981: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'finetune',
    }
}


mood_act_sweep_df = get_metrics_df(mood_act_xm_id_dict)

# Add train sizes
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['config.init_from.checkpoint_dir'].str.extract(r'/(\d+)$')
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['train_data_size'].astype(int)
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['train_data_size'].apply(lambda x: datasizes[x - 1])

mood_act_finetune_sweep_df = mood_act_sweep_df
mood_act_finetune_sweep_df

ft_sub_df = mood_act_finetune_sweep_df[mood_act_finetune_sweep_df['config.init_from.checkpoint_step'] == 50000]
ft_train_size = ft_sub_df['train_data_size'].astype(int)
ft_acc = ft_sub_df['final_valid_accuracy'] * 100
ft_map = ft_sub_df['final_valid_mAP'] * 100

In [None]:
# @title Linear Probe results

mood_act_xm_id_dict = {
    127198831: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'linear probe',
    }
}


mood_act_sweep_df = get_metrics_df(mood_act_xm_id_dict)

# Add train sizes
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['config.init_from.checkpoint_dir'].str.extract(r'/(\d+)$')
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['train_data_size'].astype(int)
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['train_data_size'].apply(lambda x: datasizes[x - 1])

mood_act_linearprobe_sweep_df = mood_act_sweep_df
mood_act_linearprobe_sweep_df

lp_sub_df = mood_act_linearprobe_sweep_df[mood_act_linearprobe_sweep_df['config.init_from.checkpoint_step'] == 50000]
lp_train_size = lp_sub_df['train_data_size'].astype(int)
lp_acc = lp_sub_df['final_valid_accuracy'] * 100
lp_map = lp_sub_df['final_valid_mAP'] * 100

In [None]:
# @title Conv Probe results

mood_act_xm_id_dict = {
    127225558: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'conv probe',
    }
}


mood_act_sweep_df = get_metrics_df(mood_act_xm_id_dict)

# Add train sizes
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['config.init_from.checkpoint_dir'].str.extract(r'/(\d+)$')
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['train_data_size'].astype(int)
mood_act_sweep_df['train_data_size'] = mood_act_sweep_df['train_data_size'].apply(lambda x: datasizes[x - 1])

mood_act_convprobe_sweep_df = mood_act_sweep_df
mood_act_convprobe_sweep_df

cp_sub_df = mood_act_convprobe_sweep_df[mood_act_convprobe_sweep_df['config.init_from.checkpoint_step'] == 50000]
cp_train_size = cp_sub_df['train_data_size'].astype(int)
cp_acc = cp_sub_df['final_valid_accuracy'] * 100
cp_map = cp_sub_df['final_valid_mAP'] * 100

In [None]:
plt.figure()
# plt.yscale('log')
plt.xscale('log')
plt.plot(ft_train_size, ft_acc, '--o', color='r', label='fine tune')
plt.plot(lp_train_size, lp_acc, '--o', color='cornflowerblue', label='linear probe')
plt.plot(cp_train_size, cp_acc, '--o', color='gold', label='conv probe')
plt.xlabel('Train Data Size')
plt.ylabel('Acc.')
plt.legend()

plt.figure()
# plt.yscale('log')
plt.xscale('log')
plt.plot(ft_train_size, ft_map, '--o', color='r', label='fine-tune')
plt.plot(lp_train_size, lp_map, '--o', color='cornflowerblue', label='linear probe')
plt.plot(cp_train_size, cp_map, '--o', color='gold', label='conv probe')
plt.xlabel('Train Data Size')
plt.ylabel('mAP')
plt.legend()

# III. Fixed Classification mAP Results

The original immplementation of mAP was incorrect (based off predictions not logits). To remedy this eval was re-run for each AR / ED classification for both (Pre-train) Data Scaling and Fewshot experiments.

NOTE:
Fewshots experiment result need only be remedied for AR. ED fewshot results were run AFTER the bug was caught and fixed.

The remedied results are presented below.

## Activity Recognition (Fewshot and Data Scaling Experiments)

### Linear Probe

In [None]:
meta_data = [
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126009342/2', 'data_size':1321235},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127224896/24', 'data_size':750000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127224896/18', 'data_size':100000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127224896/12', 'data_size':10000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127224896/6', 'data_size':1000},

    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126994618/2', 'spc':20},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126994444/2', 'spc':15},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126993637/2', 'spc':10},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126993590/2', 'spc':5},
]

meta_df = pd.DataFrame(meta_data)

lsm_lp_xm_id_dict = {
    128247616: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
    },
}

lsm_lp_df = get_metrics_df(lsm_lp_xm_id_dict)
lsm_lp_df

merged_df = pd.merge(lsm_lp_df, meta_df, on='config.init_from.checkpoint_dir', how='inner')


In [None]:
# Fewshot results
merged_df[merged_df['spc'].notna()]

In [None]:
# Data scaling results
merged_df[merged_df['data_size'].notna()]

### Finetune

In [None]:
meta_data = [
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/125999449/2', 'data_size':1321235},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127126620/24', 'data_size':750000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127126620/18', 'data_size':100000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127126620/12', 'data_size':10000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127126620/6', 'data_size':1000},

    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126950705/1', 'spc':20},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126949674/1', 'spc':15},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126949222/1', 'spc':10},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126948854/1', 'spc':5},
]

meta_df = pd.DataFrame(meta_data)

lsm_ft_xm_id_dict = {
    128258737: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
    },
}

lsm_ft_df = get_metrics_df(lsm_ft_xm_id_dict)
lsm_ft_df

merged_df = pd.merge(lsm_ft_df, meta_df, on='config.init_from.checkpoint_dir', how='inner')


In [None]:
# Fewshot results
merged_df[merged_df['spc'].notna()]

In [None]:
# Data scaling results
merged_df[merged_df['data_size'].notna()]

### Conv Probe

In [None]:
meta_data = [
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126030364/6', 'data_size':1321235},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225258/24', 'data_size':750000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225258/18', 'data_size':100000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225258/12', 'data_size':10000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225258/6', 'data_size':1000},

    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126972479/1', 'spc':20},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126993248/1', 'spc':15},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126971980/1', 'spc':10},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/126971054/1', 'spc':5},
]

meta_df = pd.DataFrame(meta_data)

lsm_cp_xm_id_dict = {
    128261369: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
    },
    128368341: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
    },
}

lsm_cp_df = get_metrics_df(lsm_cp_xm_id_dict)
lsm_cp_df

merged_df = pd.merge(lsm_cp_df, meta_df, on='config.init_from.checkpoint_dir', how='inner')
merged_df[merged_df['spc'].notna()]

In [None]:
# Fewshot results
merged_df[merged_df['spc'].notna()]

In [None]:
# Data scaling results
merged_df[merged_df['data_size'].notna()]

## Exercise Detection (Data Scaling Experiments)

NOTE: ED Fewshot Results are Correct

### Linear Probe

In [None]:
meta_data = [
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127198831/30', 'data_size':1321235},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127198831/24', 'data_size':750000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127198831/18', 'data_size':100000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127198831/12', 'data_size':10000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127198831/6', 'data_size':1000},
]

meta_df = pd.DataFrame(meta_data)

lsm_lp_xm_id_dict = {
    128376553: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
    },
}

lsm_lp_df = get_metrics_df(lsm_lp_xm_id_dict)
lsm_lp_df

merged_df = pd.merge(lsm_lp_df, meta_df, on='config.init_from.checkpoint_dir', how='inner')
merged_df[merged_df['data_size'].notna()]

### Finetune

In [None]:
meta_data = [
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127138981/30', 'data_size':1321235},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127138981/24', 'data_size':750000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127138981/18', 'data_size':100000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127138981/12', 'data_size':10000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127138981/6', 'data_size':1000},
]

meta_df = pd.DataFrame(meta_data)

lsm_ft_xm_id_dict = {
    128377260: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
    },
}

lsm_ft_df = get_metrics_df(lsm_ft_xm_id_dict)
lsm_ft_df

merged_df = pd.merge(lsm_ft_df, meta_df, on='config.init_from.checkpoint_dir', how='inner')
merged_df[merged_df['data_size'].notna()]

### Conv Probe

In [None]:
meta_data = [
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225558/30', 'data_size':1321235},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225558/24', 'data_size':750000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225558/18', 'data_size':100000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225558/12', 'data_size':10000},
    {'config.init_from.checkpoint_dir': '/cns/dz-d/home/xliucs/lsm/xm/127225558/6', 'data_size':1000},
]

meta_df = pd.DataFrame(meta_data)

lsm_cp_xm_id_dict = {
    128377364: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'loss_only_masked_patches': True,
    },
}

lsm_cp_df = get_metrics_df(lsm_cp_xm_id_dict)
lsm_cp_df

merged_df = pd.merge(lsm_cp_df, meta_df, on='config.init_from.checkpoint_dir', how='inner')
merged_df[merged_df['data_size'].notna()]

# IV. Generative Eval

Below is a supplemental analysis for generative eval.
It focuses on adding 0.034 and 0.067 mask-percentage time-imputation/forecast tasks.

It additionally adds a large sweep on the sensor imputation task.

## Supplemental Generative Eval

In [None]:
gen_eval_xm_id_dict = {
    127392302: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'generative eval',
    }
}


gen_eval_df = get_metrics_df(gen_eval_xm_id_dict)

# Add train sizes
gen_eval_df['train_data_size'] = gen_eval_df['config.init_from.checkpoint_dir'].str.extract(r'/(\d+)$')
gen_eval_df['train_data_size'] = gen_eval_df['train_data_size'].astype(int)
gen_eval_df['train_data_size'] = gen_eval_df['train_data_size'].apply(lambda x: datasizes[x - 1])

# Cut down to 50K step pretraining, on max pretrain size (1.65M)
sub_gen_eval_df = gen_eval_df[gen_eval_df['train_data_size'] == datasizes[-1]]
sub_gen_eval_df = sub_gen_eval_df[sub_gen_eval_df['config.init_from.checkpoint_step'] == 50000]
sub_gen_eval_df

In [None]:
sen_imp = [
    'final_sensor_imputation_0.2_eval/valid_mean_absolute_error_masked',
    'final_sensor_imputation_0.2_eval/valid_mean_squared_error_masked',
    'final_sensor_imputation_0.4_eval/valid_mean_absolute_error_masked',
    'final_sensor_imputation_0.4_eval/valid_mean_squared_error_masked',
    'final_sensor_imputation_0.5_eval/valid_mean_absolute_error_masked',
    'final_sensor_imputation_0.5_eval/valid_mean_squared_error_masked'
]

imp_10_20 = [
    'final_imputation_0.034_eval/valid_mean_absolute_error_masked',
    'final_imputation_0.034_eval/valid_mean_squared_error_masked',
    'final_imputation_0.067_eval/valid_mean_absolute_error_masked',
    'final_imputation_0.067_eval/valid_mean_squared_error_masked',
]

for_10_20 = [
    'final_forecast_0.034_eval/valid_mean_absolute_error_masked',
    'final_forecast_0.034_eval/valid_mean_squared_error_masked',
    'final_forecast_0.067_eval/valid_mean_absolute_error_masked',
    'final_forecast_0.067_eval/valid_mean_squared_error_masked',
]

s = ''
for metric in sen_imp:
  s += f'{list(sub_gen_eval_df[metric])[0]} & '
print(s)
print('\n\n')

print('IMP 10 20 mins')
s = ''
for metric in imp_10_20:
  s += f'{list(sub_gen_eval_df[metric])[0]} & '
print(s)
print('\n\n')


print('FOR 10 20 mins')
s = ''
for metric in for_10_20:
  s += f'{list(sub_gen_eval_df[metric])[0]} & '
print(s)

## SENSOR IMPUTATION

In [None]:
# @title Get results

gen_eval_xm_id_dict = {
    128010871: {
        'model_size': 'Base',
        'feature_order': 'Ordered',
        'type': 'generative eval',
    }
}

# Get data from XM
gen_eval_df = get_metrics_df(gen_eval_xm_id_dict)
gen_eval_df['train_data_size'] = gen_eval_df['config.init_from.checkpoint_dir'].str.extract(r'/(\d+)$')
gen_eval_df['train_data_size'] = gen_eval_df['train_data_size'].astype(int)
gen_eval_df['train_data_size'] = gen_eval_df['train_data_size'].apply(lambda x: datasizes[x - 1])
gen_eval_df

# Generate DF from single row df:
df_rows = []
for c in gen_eval_df.columns:
  if 'valid' in c and 'final' in c and 'error' in c:
    if gen_eval_df[c][0] is not None:
      if len(c.split('_')) != 10:
        continue

      row = {
          'task': c.split('_')[2],
          'metric': c.split('/')[1],
          'values': gen_eval_df[c][0],
          'sensor_horizon': float(c.split('_')[3]),
          'time_horizon': float(c.split('_')[4]),
      }
      df_rows.append(row)

df = pd.DataFrame(df_rows)
df

In [None]:
# @title Plot sensor imputation trends

time_horizons = [0.034, 0.067, 0.1, 0.2, 0.4]
sensor_horizons = [0.2, 0.4, 0.5, 0.7, 0.9, 1.0]
metric = 'valid_mean_squared_error_masked'

# Changing time horizon
cp = sns.light_palette("#69d", len(time_horizons) * 2, reverse=False).as_hex()
plt.figure()
for i, t in enumerate(time_horizons):
  sub_df = df[df['time_horizon'] == t]
  sub_df = sub_df[sub_df['metric'] == metric]
  plt.plot(
      sub_df['sensor_horizon'] * 6 * 5,
      sub_df['values'],
      '-o',
      label=f'{int(t*300)}mins',
      color=cp[i*2 + 1]
  )

plt.xlabel('Imputated Sensors')
plt.ylabel('MSE')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))


# Changing time horizon
cp = sns.light_palette("#69d", len(sensor_horizons) * 2, reverse=False).as_hex()
plt.figure()
for i, s in enumerate(sensor_horizons):
  sub_df = df[df['sensor_horizon'] == s]
  sub_df = sub_df[sub_df['metric'] == metric]
  plt.plot(
      sub_df['time_horizon'] * 300,
      sub_df['values'],
      '-o',
      label=f'{5*int(6*s)} sensors',
      color=cp[i*2 + 1]
  )

plt.xlabel('Imputation Time Horizon (mins)')
plt.ylabel('MSE')
plt.legend(title='Imputed Sensors', loc='upper left', bbox_to_anchor=(1, 1))