# Apply FA to HELM-lite leaderboard
Note - we ran a version of the HELM-lite config with 100 samples per category.
However, we got poor coverage of helm-lite features ... and high noise on the features we did have.
So take these results with a grain of salt.

In [None]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Local files
from lsoc.factor import factor, selection, vis, data 

In [None]:
# Load the helm-lite reference data
df = pd.read_csv(data.default_path + "/evals/helm_lite_1.9.csv")
df.set_index('Model/adapter', inplace=True)
display(df.head())
print(f"...({df.shape[0]} rows)\n")
fresh = True

In [None]:
# Missing analysis
if fresh:
    fresh = False
    missing_per_row = df.isna().sum(axis=1)
    # missing_per_row
    # df = df[missing_per_row <= 10].copy()
    missing_per_col = df.isna().sum()
    display(missing_per_row)
    df.drop("Phi-3 (7B)", inplace=True)  # only phi-3 is missing any

In [None]:
# Holdout model selection
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df) # is a numpy array

# model
model = factor.PCA()  # doesn't matter about rotation so let's use the scikit learn one
errs = selection.cross_validate(X_scaled, model, max_factors=6, n_folds=10, repeats=3)
fig = vis.crossval(*errs, method_name=model.name)
fig.show()  # answer - 4 or 5 dimensions

# Yep... factor analysis says 3 factors - consistent with burnell paper

In [None]:
# TODO: get offset working
n_components = 3
from factor_analyzer import FactorAnalyzer
fa_final = FactorAnalyzer(rotation='oblimin', n_factors=n_components)
fa_final.fit(X_scaled)


#W = fa.fit_transform(imputed)
#H = fa.components_
H = fa_final.loadings_.T

component_names = [f"PC{i+1}" for i in range(n_components)]

loading_df = pd.DataFrame(
    data=H.T,
    index=df.columns,
    columns=component_names,
)

loading_df.sort_values(by='PC1', inplace=True, ascending=False)
fig = vis.heatmap(
    loading_df,
    title="Task Loadings",
    width=6,
    height=12,
    reversescale=True
)
fig.show()

Copyright (c) Gradient Institute and Timaeus. All rights reserved.

Licensed under the Apache 2.0 License.
