In [1]:
import jax.numpy as jnp
import numpy as onp
import matplotlib.pyplot as plt
import matplotlib
from sklearn.linear_model import LinearRegression
from scipy.stats import linregress
import pandas as pd
import seaborn as sns
import time
from matplotlib.ticker import ScalarFormatter

font = {"size": 20}
matplotlib.rc("font", **font)
matplotlib.rcParams["text.usetex"] = True

import sys
sys.path.append("../../models")
from linear_cr import LinearCR
from linear_cr_new import LinearContrastiveRegression

In [2]:
sample_size_list = [20, 100, 200, 500, 1_000] # [20, 100, 250, 500, 750, 1000, 2000, 3000, 4000, 5000] # 
p = 2
d = 1
n_repititions = 50
results = onp.zeros((n_repititions, len(sample_size_list)))
results_time = onp.zeros((n_repititions, len(sample_size_list)))

for ii in range(n_repititions):
    for jj, sample_size in enumerate(sample_size_list):

        n, m = sample_size, sample_size

        zx = onp.random.normal(size=(n, d))
        zy = onp.random.normal(size=(m, d))
        t = onp.random.normal(size=(n, d))
        W = onp.random.normal(size=(d, p))
        S = onp.random.normal(size=(d, p))
        beta = onp.random.normal(size=(d, 1))
        sigma2 = 1e-2
        tau2 = 1e-2

        X = zx @ S + t @ W + onp.random.normal(scale=sigma2, size=(n, p))
        Y = zy @ S + onp.random.normal(scale=sigma2, size=(m, p))
        R = t @ beta + onp.random.normal(scale=tau2, size=(n, 1))

        model = LinearContrastiveRegression()
        start = time.time()
        model.fit(X, Y, R, d, seed = ii, verbose = False)
        end = time.time()
        results_time[ii, jj] = end - start

        preds = model.predict(X)
        _, _, r2, _, _ = linregress(R.squeeze(), preds.squeeze())
        results[ii, jj] = r2

results_df_sample_size = pd.melt(pd.DataFrame(results, columns=sample_size_list))
results_df_time = pd.melt(pd.DataFrame(results_time, columns=sample_size_list))

In [3]:
p_list = [20, 50, 75, 125, 175, 200] #, 500, 1_000]
n = 200
m = 200
d = 1
n_repititions = 50
results = onp.zeros((n_repititions, len(p_list)))
results_time = onp.zeros((n_repititions, len(p_list)))

for ii in range(n_repititions):
    for jj, p in enumerate(p_list):

        zx = onp.random.normal(size=(n, d))
        zy = onp.random.normal(size=(m, d))
        t = onp.random.normal(size=(n, d))
        W = onp.random.normal(size=(d, p))
        S = onp.random.normal(size=(d, p))
        beta = onp.random.normal(size=(d, 1))
        sigma2 = 1e-2
        tau2 = 1e-2

        X = zx @ S + t @ W + onp.random.normal(scale=sigma2, size=(n, p))
        Y = zy @ S + onp.random.normal(scale=sigma2, size=(m, p))
        R = t @ beta + onp.random.normal(scale=tau2, size=(n, 1))

        model = LinearContrastiveRegression()
        start = time.time()
        model.fit(X, Y, R, d, seed = ii, verbose = False)
        end = time.time()
        results_time[ii, jj] = end - start

        preds = model.predict(X)
        _, _, r2, _, _ = linregress(R.squeeze(), preds.squeeze())
        results[ii, jj] = r2

results_df_p = pd.melt(pd.DataFrame(results, columns=p_list))
results_df_p_time = pd.melt(pd.DataFrame(results_time, columns=p_list))

In [4]:
results_df_sample_size.to_csv("./out/simulation_sample_size_n.csv", index = False)
results_df_time.to_csv("./out/simulation_sample_size_n_time.csv", index = False)

# results_df_p.to_csv("./out/simulation_sample_size_p.csv", index = False)
# results_df_p_time.to_csv("./out/simulation_sample_size_p_time.csv", index = False)