In [1]:
import torch
import torch.nn as nn
import torchvision 
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import math
import cvxpy as cvx
import mosek

from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import pairwise_kernels
from random import sample


%load_ext autoreload
%autoreload 2

In [2]:
from group import compute_group_coverages, compute_split_coverages 
from group import compute_group_qr_coverages, compute_cqr_coverages
from Synthetic_data_generation import get_groups, generate_group_synthetic_data, generate_cqr_data
from rkhs import compute_shifted_coverage, compute_qr_coverages, compute_adaptive_threshold
from sklearn.metrics.pairwise import pairwise_kernels

In [3]:
from wilds import get_dataset
from wilds.common.data_loaders import get_train_loader
import torchvision.transforms as transforms

In [4]:
def strip_prefix(state_dict):
    return_dict = {}
    for key in state_dict:
        #new_key = key.removeprefix('model.')
        new_key = key[6:]
        return_dict[new_key] = state_dict[key]

    return return_dict
def my_load(module, path, device=None):
    if device is not None:
        state = torch.load(path, map_location=device)
    else:
        state = torch.load(path)
    state = strip_prefix(state['algorithm'])
    
    module.load_state_dict(state)
    return 


In [5]:
d_out = 1139

constructor = torchvision.models.resnet50
model = constructor()
d_features = model.fc.in_features
#last_layer = nn.Identity(d_features)
#model.d_out = d_features
last_layer = nn.Linear(d_features,d_out)
model.d_out = d_features

model.fc = last_layer

#featurizer = model
#classifier = nn.Linear(featurizer.d_out, d_out)
#rx1Model = (featurizer, classifier)
#rx1Model = nn.Sequential(*rx1model)

rx1Model = model
my_load(rx1Model,
            '/Users/isaacgibbs/Documents/ConformalGans/Code/rxrx1_seed_0_epoch_best_model.pth',
            device=torch.device("cpu"))
featurizer = rx1Model
classifier = rx1Model.fc
featurizer.fc = nn.Identity()

rx1Model = nn.Sequential(*(featurizer,classifier))

rx1Model.eval()
featurizer.eval()
classifier.eval()

Linear(in_features=2048, out_features=1139, bias=True)

In [6]:
rx1Data = get_dataset(dataset="rxrx1", download=True)


In [7]:
def initialize_rxrx1_transform(is_training):
    def standardize(x: torch.Tensor) -> torch.Tensor:
        mean = x.mean(dim=(1, 2))
        std = x.std(dim=(1, 2))
        std[std == 0.] = 1.
        return TF.normalize(x, mean, std)
    t_standardize = transforms.Lambda(lambda x: standardize(x))

    angles = [0, 90, 180, 270]
    def random_rotation(x: torch.Tensor) -> torch.Tensor:
        angle = angles[torch.randint(low=0, high=len(angles), size=(1,))]
        if angle > 0:
            x = TF.rotate(x, angle)
        return x
    t_random_rotation = transforms.Lambda(lambda x: random_rotation(x))

    if is_training:
        transforms_ls = [
            t_random_rotation,
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            t_standardize,
        ]
    else:
        transforms_ls = [
            transforms.ToTensor(),
            t_standardize,
        ]
    transform = transforms.Compose(transforms_ls)
    return transform
my_transform_train = initialize_rxrx1_transform(True)
my_transform_eval = initialize_rxrx1_transform(False)

torch.manual_seed(0)
np.random.seed(0)

test_data_iid = rx1Data.get_subset(
    "id_test",
    transform = my_transform_train
)
test_data_ood = rx1Data.get_subset(
    "test",
    transform = my_transform_eval
)
#meta_data = pd.read_csv('data/rxrx1_v1.0/metadata.csv')
#meta_test_iid = meta_data[meta_data['dataset'] == 'id_test']
#meta_test_ood = meta_data[meta_data['dataset'] == 'test']

In [8]:
from wilds.common.grouper import CombinatorialGrouper
from wilds.common.data_loaders import get_train_loader, get_eval_loader
import time

n_iid = 40612
n_ood = 34432

train_grouper = CombinatorialGrouper(
            dataset=rx1Data,
            groupby_fields=['experiment']
        )
myloaderTestIID = get_eval_loader(
                loader='standard',
                dataset=test_data_iid,
                grouper=train_grouper,
                batch_size=n_iid)

myloaderTestOOD = get_eval_loader(
                loader='standard',
                dataset=test_data_ood,
                grouper=train_grouper,
                batch_size=n_ood)

t0 = time.time()
final_iid_test_data = next(iter(myloaderTestIID))
t1 = time.time()
print(t1-t0)
print(final_iid_test_data[0].shape)

t0 = time.time()
final_ood_test_data = next(iter(myloaderTestOOD))
t1 = time.time()
print(t1-t0)
print(final_ood_test_data[0].shape)


376.54104804992676
torch.Size([40612, 3, 256, 256])
323.8342959880829
torch.Size([34432, 3, 256, 256])


In [None]:
# def move_to(obj, device):
#     if isinstance(obj, dict):
#         return {k: move_to(v, device) for k, v in obj.items()}
#     elif isinstance(obj, list):
#         return [move_to(v, device) for v in obj]
#     elif isinstance(obj, float) or isinstance(obj, int):
#         return obj
#     else:
#         # Assume obj is a Tensor or other type
#         # (like Batch, for MolPCBA) that supports .to(device)
#         return obj.to(device)
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def predictNN2(model,x):
    #x = move_to(x, torch.device("cpu"))
    #nnOutput = model(x.reshape(1,3,64,64))[0].detach().numpy()
#     x = move_to(x, torch.device("cpu"))
    nnOutput = model(x).detach().numpy()
    return np.apply_along_axis(softmax,1,nnOutput)

from random import sample
train_points = sample(range(0,n_iid),100)
test_points = sample(range(0,n_ood),100)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_x = final_iid_test_data[0][train_points,:,:,:].to(device)
test_x = final_ood_test_data[0][test_points,:,:,:].to(device)
featurizer.to(device)
classifier.to(device)
rx1Model.to(device)

t0 = time.time()
allFeatures_iid = featurizer(train_x)
t1 = time.time()
print(t1-t0)
t0 = time.time()
allProbs_iid = np.apply_along_axis(softmax,1,classifier(allFeatures_iid).detach().numpy())
t1 = time.time()
print(t1-t0)

t0 = time.time()
allFeatures_ood = featurizer(test_x)
t1 = time.time()
print(t1-t0)
t0 = time.time()
allProbs_ood = np.apply_along_axis(softmax,1,classifier(allFeatures_ood).detach().numpy())
t1 = time.time()
print(t1-t0)


In [None]:
def classConfScore(probs,y):
    return sum(probs[probs > probs[y]])

scores_train = np.zeros(len(train_points))
for i in range(len(train_points)):
    scores_train[i] = classConfScore(allProbs_iid[i,:],final_iid_test_data[1][train_points[i]])

scores_test = np.zeros(len(test_points))
for i in range(len(test_points)):
    scores_test[i] = classConfScore(allProbs_ood[i,:],final_ood_test_data[1][test_points[i]])\
    
q = np.quantile(scores_train,0.9)
print(q)
print(sum(scores_test > q)/len(scores_test))

print(scores_train)

In [11]:
a = np.array([[1,2,3],[3,4,6]])
print(a[0,1])
print(a[1,0])
print(np.apply_along_axis(sum,0,a))
print(np.apply_along_axis(sum,1,a))

2
3
[4 6 9]
[ 6 13]


In [12]:
npAllFeatures_iid = allFeatures_iid.detach().numpy()
meanMat = np.ones(allFeatures.shape[0]).reshape(-1,1) @ npAllFeatures_iid @ np.ones(allFeatures.shape[1])
cented_featured_iid = npAllFeatures_iid - meanMat

NameError: name 'allFeatures' is not defined