In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("toyset.csv")
df = df.drop(columns=["ID"])

X_vars = ["VIDEO", "PUZZLE", "FEMALE"]
X = df[X_vars].values
Y = df["ICE_CREAM"].values

np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
pd.set_option('display.precision', 6)

SEED = 0

In [2]:
from sklearn.linear_model import LogisticRegression
from IPython.display import HTML, display


def coefficient_cmp(multinomial_clf, binary1_clf, binary2_clf):
    mnl, bnl = [], []
        
    m_beta1 = np.append(multinomial_clf.coef_[0], multinomial_clf.intercept_[0])
    m_beta2 = np.append(multinomial_clf.coef_[1], multinomial_clf.intercept_[1])
    m_beta3 = np.append(multinomial_clf.coef_[2], multinomial_clf.intercept_[2])
    b_beta1 = np.append(binary1_clf.coef_[0], binary1_clf.intercept_[0])
    b_beta2 = np.append(binary2_clf.coef_[0], binary2_clf.intercept_[0])
    
    m_beta1 = m_beta1 - m_beta3
    m_beta2 = m_beta2 - m_beta3
    
    columns = ["Beta1", "", "", "", "Beta2", "", "", ""]
    index = ["MNL", "BNL", "% Diff"]
    mnl = np.concatenate((m_beta1, m_beta2))
    bnl = np.concatenate((b_beta1, b_beta2))
    dif = np.abs(2*(mnl - bnl) / (mnl + bnl))
    tab = np.stack((mnl, bnl, dif))
    
    df = pd.DataFrame(tab, columns=columns, index=index)
    display(HTML(df.to_html()))
#     print("Absolute Coef Diff Sum:", np.sum(np.abs(mnl-bnl)))
    
    
def aggregate_stats(multinomial_clf, binary1_clf, binary2_clf, X, Y):
    P = multinomial_clf.predict_proba(X)
    m = P.mean(axis=0)
    M1 = P[:,0] / (P[:,0] + P[:,2])
    M2 = P[:,1] / (P[:,1] + P[:,2])
    B1 = binary1_clf.predict_proba(X)[:,1]
    B2 = binary2_clf.predict_proba(X)[:,1]
    CP1D = np.abs(M1 - B1)
    CP2D = np.abs(M2 - B2)
    
    tab = []
    col_headers = ["X1","X2","X3","Y","Multinom CP1","Multinom CP2,","Binom CP1","Binom CP2","Abs CP1 Diff","Abs CP2 Diff"]
    row_headers = ["Mean", "Stdev", "Mean(Y=1)", "Stdev(Y=1)", "Mean(Y=2)", "Stdev(Y=2)", "Mean(Y=3)", "Stdev(Y=3)"]
    tab.append([*X.mean(axis=0), Y.mean(axis=0), M1.mean(), M2.mean(), B1.mean(), B2.mean(), CP1D.mean(), CP2D.mean()])
    tab.append([*X.std(axis=0), Y.std(axis=0), M1.std(), M2.std(), B1.std(), B2.std(), CP1D.std(), CP2D.std()])
    for i in range(1, 4):
        idx = np.where(Y == i)
        tab.append([*X[idx].mean(axis=0), Y[idx].mean(axis=0), M1[idx].mean(), M2[idx].mean(), B1[idx].mean(), B2[idx].mean(), CP1D[idx].mean(), CP2D[idx].mean()])
        tab.append([*X[idx].std(axis=0), Y[idx].std(axis=0), M1[idx].std(), M2[idx].std(), B1[idx].std(), B2[idx].std(), CP1D[idx].std(), CP2D[idx].std()])
    
    df = pd.DataFrame(tab, columns=col_headers, index=row_headers)
    display(HTML(df.to_html()))
    
    
def fit_models(X, Y):
    # Multinomial
    multinomial_clf = LogisticRegression(solver="lbfgs", multi_class='multinomial', max_iter=400)
    multinomial_clf.fit(X, Y)

    # Binary
    # 1 vs. 3 (exclude 2)
    idx = np.where(Y != 2)
    X1, Y1 = X[idx], Y[idx]
    Y1[Y1 == 3] = 0
    binary1_clf = LogisticRegression(solver="lbfgs", multi_class='ovr', max_iter=400)
    binary1_clf.fit(X1, Y1)

    # 2 vs. 3 (exclude 1)
    idx = np.where(Y != 1)
    X2, Y2 = X[idx], Y[idx]
    Y2[Y2 == 3] = 0
    binary2_clf = LogisticRegression(solver="lbfgs", multi_class='ovr', max_iter=400)
    binary2_clf.fit(X2, Y2)
        
    # Return observation-level probabilities
    P = multinomial_clf.predict_proba(X)
    return P, multinomial_clf, binary1_clf, binary2_clf

### Compare MNL vs. BNL on original data

In [3]:
P, multinomial_clf, binary1_clf, binary2_clf = fit_models(X, Y)
coefficient_cmp(multinomial_clf, binary1_clf, binary2_clf)
aggregate_stats(multinomial_clf, binary1_clf, binary2_clf, X, Y)

Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.046961,-0.081139,0.769087,6.004344,-0.023112,-0.042894,0.019524,4.068166
BNL,-0.041046,-0.072079,0.71493,5.266007,-0.024158,-0.042844,0.010742,4.124978
% Diff,0.134422,0.118258,0.072986,0.131023,0.044264,0.001164,0.580272,0.013868


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.85,52.405,0.545,2.055,0.448556,0.641349,0.449335,0.641181,0.01801,0.001719
Stdev,9.876108,10.70892,0.497971,0.722478,0.243522,0.128241,0.224533,0.12959,0.008505,0.001205
Mean(Y=1),47.702128,47.319149,0.680851,1.0,0.584612,0.706855,0.576092,0.707155,0.020016,0.001714
Stdev(Y=1),10.301652,10.72104,0.466147,0.0,0.232111,0.115095,0.213277,0.116348,0.00805,0.001053
Mean(Y=2),51.705263,52.031579,0.505263,2.0,0.451178,0.647643,0.451232,0.647631,0.016297,0.001521
Stdev(Y=2),8.879158,9.920158,0.499972,0.0,0.223353,0.11666,0.205535,0.117833,0.009006,0.001074
Mean(Y=3),55.448276,57.137931,0.5,3.0,0.33401,0.577955,0.343511,0.577156,0.019189,0.002048
Stdev(Y=3),9.709882,9.875864,0.5,0.0,0.225814,0.126974,0.208726,0.128462,0.007389,0.001432


In [4]:
def simulate(X, P, choices, N):
    """ Takes an existing set of input data X and associated predicted probabilities (generated by some estimated model).
    Copies X independent variable and re-simulates N examples (spread as evenly as possible among the observations in X). """
    np.random.seed(SEED)
    X_new, Y_new = [], []
    for i in range(N):
        X_new.append(X[i % len(P)])
        Y_new.append(np.random.choice(choices, p=P[i % len(P)]))
    return np.array(X_new), np.array(Y_new)

#### Experiment 0: Use the multinomial model to simulate more data.

#### As simulations increase, the coefficients converge.

In [5]:
models = []
sizes = [200, 2000, 20000, 200000]
for N in sizes:
    print(N)
    X_new, Y_new = simulate(X, P, [1, 2, 3], N)
    _, MNL, BNL1, BNL2 = fit_models(X_new, Y_new)
    aggregate_stats(MNL, BNL1, BNL2, X_new, Y_new)
    models.append([MNL, BNL1, BNL2])

200


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.85,52.405,0.545,2.045,0.473198,0.663034,0.477118,0.662245,0.017015,0.006279
Stdev,9.876108,10.70892,0.497971,0.709207,0.246996,0.159574,0.25774,0.164423,0.012793,0.005259
Mean(Y=1),48.021739,47.869565,0.673913,1.0,0.599033,0.72825,0.609632,0.72715,0.018012,0.007288
Stdev(Y=1),10.224627,10.765502,0.46878,0.0,0.232154,0.145398,0.241024,0.15144,0.014409,0.006431
Mean(Y=2),50.676768,52.666667,0.555556,2.0,0.495614,0.683463,0.499234,0.68403,0.01726,0.005271
Stdev(Y=2),9.282141,10.63632,0.496904,0.0,0.241661,0.15102,0.253183,0.154637,0.012614,0.003965
Mean(Y=3),57.163636,55.727273,0.418182,3.0,0.327603,0.571718,0.326479,0.568747,0.015742,0.00725
Stdev(Y=3),8.318137,9.385588,0.49326,0.0,0.190532,0.145654,0.198331,0.151543,0.011514,0.005838


2000


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.85,52.405,0.545,2.0675,0.436681,0.619717,0.437148,0.61956,0.008769,0.001487
Stdev,9.876108,10.70892,0.497971,0.734809,0.244894,0.12597,0.249524,0.126256,0.006536,0.000426
Mean(Y=1),47.293501,46.872117,0.643606,1.0,0.584822,0.688229,0.588132,0.687885,0.00894,0.00137
Stdev(Y=1),9.529267,10.375063,0.478934,0.0,0.228539,0.112406,0.232167,0.112888,0.006257,0.000435
Mean(Y=2),51.580681,52.441273,0.511526,2.0,0.435691,0.623101,0.436109,0.623062,0.009026,0.001484
Stdev(Y=2),9.884393,10.202484,0.499867,0.0,0.236091,0.122319,0.240965,0.122602,0.006652,0.000424
Mean(Y=3),55.802288,56.663399,0.517974,3.0,0.322691,0.561282,0.321014,0.561094,0.008253,0.001582
Stdev(Y=3),8.404119,9.692044,0.499677,0.0,0.205453,0.112462,0.209215,0.112742,0.006545,0.000398


20000


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.85,52.405,0.545,2.0478,0.454372,0.64102,0.454289,0.64098,0.001255,0.000892
Stdev,9.876108,10.70892,0.497971,0.725614,0.242191,0.125459,0.242096,0.125701,0.000658,0.000605
Mean(Y=1),47.727859,47.35738,0.665904,1.0,0.592181,0.706023,0.592153,0.705944,0.001188,0.000818
Stdev(Y=1),9.646763,10.476731,0.471673,0.0,0.22449,0.111613,0.224319,0.111917,0.000705,0.000588
Mean(Y=2),51.816638,52.091787,0.510293,2.0,0.453908,0.644677,0.453712,0.644698,0.001315,0.000906
Stdev(Y=2),9.728755,10.396958,0.499894,0.0,0.232892,0.120197,0.232796,0.120446,0.000659,0.000611
Mean(Y=3),55.343219,57.127645,0.500867,3.0,0.340172,0.580818,0.340227,0.580709,0.001212,0.00093
Stdev(Y=3),8.922721,9.264338,0.499999,0.0,0.20948,0.115843,0.209475,0.116068,0.000604,0.000603


200000


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.85,52.405,0.545,2.056755,0.446465,0.639195,0.446482,0.639163,0.000567,0.000236
Stdev,9.876108,10.70892,0.497971,0.723128,0.241763,0.126238,0.242034,0.126166,0.000392,0.000135
Mean(Y=1),47.728791,47.353551,0.672547,1.0,0.585433,0.704439,0.585641,0.704324,0.000588,0.000228
Stdev(Y=1),9.61218,10.473129,0.469284,0.0,0.224908,0.11189,0.225144,0.111857,0.000402,0.000135
Mean(Y=2),51.724637,52.035907,0.507602,2.0,0.446965,0.643847,0.446943,0.643827,0.000582,0.000235
Stdev(Y=2),9.690684,10.4211,0.499942,0.0,0.232947,0.121294,0.233212,0.12123,0.000398,0.000135
Mean(Y=3),55.372489,57.072861,0.503097,3.0,0.333745,0.579092,0.333674,0.579108,0.000525,0.000246
Stdev(Y=3),9.025967,9.275381,0.49999,0.0,0.208158,0.116628,0.208401,0.116556,0.000368,0.000134


In [6]:
for i in range(len(sizes)):
    print(sizes[i])
    coefficient_cmp(*models[i])

200


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.081258,-0.046518,0.931824,6.000387,-0.073853,-0.002964,0.370338,4.554908
BNL,-0.081965,-0.057365,0.909113,6.638242,-0.078704,2.4e-05,0.349441,4.664013
% Diff,0.008662,0.208838,0.024673,0.100937,0.063596,2.032147,0.058064,0.02367


2000


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.062259,-0.073347,0.534873,6.454311,-0.037467,-0.028017,-0.087531,3.987327
BNL,-0.067441,-0.071826,0.600525,6.60514,-0.037671,-0.027976,-0.10171,4.003111
% Diff,0.079898,0.020955,0.115646,0.023099,0.00544,0.00143,0.149851,0.003951


20000


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.044898,-0.081379,0.770689,5.941015,-0.019929,-0.043599,0.05377,3.917441
BNL,-0.044828,-0.081172,0.783578,5.918808,-0.020316,-0.043479,0.044932,3.936031
% Diff,0.001573,0.002548,0.016586,0.003745,0.019204,0.002762,0.17907,0.004734


200000


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.045674,-0.08053,0.790407,5.88355,-0.021927,-0.042504,0.027958,3.969607
BNL,-0.045508,-0.080772,0.797095,5.883832,-0.021989,-0.042425,0.025661,3.969728
% Diff,0.003655,0.003004,0.008425,4.8e-05,0.002817,0.001856,0.085666,3.1e-05


### How can we modify the data so that the coeefficients CANNOT converge to each other?

#### Experiment 1: Add noise to the simulated data; take a fixed % of class 1 (here we use 40%) and "rotate" labels (1>2>3). 

#### This produces a noticeable difference in the MNL vs. BNL coefficients.

In [7]:
def rotate(Y, ratio):
    np.random.seed(SEED)
    idx1 = np.where(Y == 1)[0]
    idx2 = np.where(Y == 2)[0]
    idx3 = np.where(Y == 3)[0]
    M = int(ratio*len(idx1))
    Y_new = np.copy(Y)
    Y_new[np.random.choice(idx1, M, replace=False)] = 2
    Y_new[np.random.choice(idx2, M, replace=False)] = 3
    Y_new[np.random.choice(idx3, M, replace=False)] = 1
    return Y_new

### How does adjusting the prevalence of class 1/2/3 affect the MNL vs. BNL coefficient similarity?

#### Experiment 2: Duplicate occurrences of class 3. 

In [8]:
def dup3(X, Y, N):
    X_new, Y_new = np.copy(X), np.copy(Y)
    idx3 = np.where(Y == 3)[0]
    while len(X_new) < N:
        X_new = np.append(X_new, X[idx3], axis=0)
        Y_new = np.append(Y_new, Y[idx3], axis=0)
    return X_new[:N], Y_new[:N]

#### Experiment 3: Try going the other way around: simulate a lot of examples, then downsample class 3.

In [9]:
def downsample(X, Y, ratio, k):
    """ k: target class to downsample """
    np.random.seed(SEED)
    idx_k = np.where(Y == k)[0]
    del_inds = np.random.choice(idx_k,  int((1-ratio)*len(idx_k)), replace=False)
    return np.delete(X, del_inds, axis=0), np.delete(Y, del_inds, axis=0)

### Maybe the originally simulated data (at N=1,000,000) is too "clean" to be affected by downsampling.

#### Experiment 4: Rotate, then downsample class 1.
#### Experiment 5: Rotate, then downsample class 3.
#### Experiment 6: Rotate, downsample class 3, then use 2 as the multinomial base - is there a difference from experiment 5?

#### This is done by changing all labels where Y=2 to Y=3, and vice versa. Values for CP2 are now low, because the downsampled observations have been labeled as Y=2. The results show that coefficients are closer vs. the previous experiment.

In [10]:
def switch23(Y):
    Y_new = np.copy(Y)
    inds2 = np.where(Y_new == 2)[0]
    inds3 = np.where(Y_new == 3)[0]
    Y_new[inds2] = 3
    Y_new[inds3] = 2
    return Y_new

#### Simulation sample size N = 1,000,000

#### Coefficients / generated data statistics listed together for easy comparison.

In [11]:
# sizes = [200, 2000, 20000, 200000, 2000000]
N = 100000
print("Original")
X_base, Y_base = simulate(X, P, [1, 2, 3], N)
model_base = fit_models(X_base, Y_base)[1:]
coefficient_cmp(*model_base)
print("----")

print("Downsample 10% of class 1")
X_ds1, Y_ds1 = downsample(X_base, Y_base, 0.10, 1)
model_ds1 = fit_models(X_ds1, Y_ds1)[1:]
coefficient_cmp(*model_ds1)
print("----")

print("Downsample 10% of class 3")
X_ds3, Y_ds3 = downsample(X_base, Y_base, 0.10, 3)
model_ds3 = fit_models(X_ds3, Y_ds3)[1:]
coefficient_cmp(*model_ds3)
print("----")

print("Rotate 40% class 1")
Y_rot = rotate(Y_base, 0.4)
model_rot = fit_models(X_base, Y_rot)[1:]
coefficient_cmp(*model_rot)
print("----")

print("Rotate -> downsample 10% of class 1")
X_rot_ds1, Y_rot_ds1 = downsample(X_base, Y_rot, 0.10, 1)
model_rot_ds1 = fit_models(X_rot_ds1, Y_rot_ds1)[1:]
coefficient_cmp(*model_rot_ds1)
print("----")

print("Rotate -> downsample 10% of class 3")
X_rot_ds3, Y_rot_ds3 = downsample(X_base, Y_rot, 0.10, 3)
model_rot_ds3 = fit_models(X_rot_ds3, Y_rot_ds3)[1:]
coefficient_cmp(*model_rot_ds3)
print("----")

print("Rotate -> downsample -> Use 2 as base")
Y_rot_ds3_b2 = switch23(Y_rot_ds3)
model_rot_ds3_b2 = fit_models(X_rot_ds3, Y_rot_ds3_b2)[1:]
coefficient_cmp(*model_rot_ds3_b2)
print("----")

print("Aggregate Stats")

print("Original")
aggregate_stats(*model_base, X_base, Y_base)
print("Downsample 10% of class 1")
aggregate_stats(*model_ds1, X_ds1, Y_ds1)
print("Downsample 10% of class 3")
aggregate_stats(*model_ds3, X_ds3, Y_ds3)
print("Rotate only: 40% class 1")
aggregate_stats(*model_rot, X_base, Y_rot)
print("Rotate -> downsample 10% of class 1")
aggregate_stats(*model_rot_ds1, X_rot_ds1, Y_rot_ds1)
print("Rotate -> downsample 10% of class 3")
aggregate_stats(*model_rot_ds3, X_rot_ds3, Y_rot_ds3)
print("Rotate -> downsample -> Use 2 as base")
aggregate_stats(*model_rot_ds3_b2, X_rot_ds3, Y_rot_ds3_b2)

Original


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.045143,-0.081232,0.810301,5.89465,-0.02191,-0.042615,0.033181,3.976855
BNL,-0.045169,-0.081372,0.814399,5.900686,-0.022012,-0.04241,0.03021,3.972599
% Diff,0.00057,0.00172,0.005044,0.001023,0.004639,0.004825,0.09373,0.001071


----
Downsample 10% of class 1


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.043239,-0.080743,0.868721,3.436474,-0.022008,-0.04242,0.031051,3.972459
BNL,-0.043555,-0.080386,0.869708,3.432663,-0.022012,-0.04241,0.03021,3.972599
% Diff,0.007291,0.004432,0.001135,0.00111,0.000187,0.00023,0.027479,3.5e-05


----
Downsample 10% of class 3


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.044071,-0.084193,0.841567,8.290004,-0.020889,-0.045445,0.063021,6.36908
BNL,-0.043715,-0.085448,0.848408,8.334382,-0.021043,-0.045285,0.058637,6.371418
% Diff,0.008115,0.014795,0.008096,0.005339,0.007304,0.003519,0.072069,0.000367


----
Rotate 40% class 1


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.015958,-0.032627,0.42173,2.141869,-0.017292,-0.033098,0.135923,3.092023
BNL,-0.01518,-0.031428,0.414871,2.041043,-0.018044,-0.034106,0.147938,3.178943
% Diff,0.049958,0.037422,0.016397,0.048208,0.042542,0.029978,0.084656,0.027721


----
Rotate -> downsample 10% of class 1


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.014973,-0.039693,0.463895,0.129239,-0.017882,-0.033929,0.14744,3.161426
BNL,-0.014146,-0.038343,0.43144,0.034553,-0.018044,-0.034106,0.147938,3.178943
% Diff,0.056762,0.034598,0.072498,1.156177,0.008993,0.005179,0.003374,0.005526


----
Rotate -> downsample 10% of class 3


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,-0.014325,-0.036059,0.426529,4.545303,-0.015678,-0.036625,0.140306,5.501503
BNL,-0.01254,-0.033498,0.400443,4.328873,-0.016862,-0.038003,0.155134,5.629681
% Diff,0.132894,0.073617,0.063089,0.048778,0.072821,0.036949,0.100375,0.023031


----
Rotate -> downsample -> Use 2 as base


Unnamed: 0,Beta1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Beta2,Unnamed: 6,Unnamed: 7,Unnamed: 8
MNL,0.001364,0.000571,0.286246,-0.957053,0.015695,0.036616,-0.13989,-5.502113
BNL,0.001369,0.000578,0.28597,-0.957571,0.016862,0.038003,-0.155134,-5.629681
% Diff,0.004115,0.012629,0.000967,0.000542,0.071687,0.03718,0.103338,0.02292


----
Aggregate Stats
Original


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.85,52.405,0.545,2.05365,0.448978,0.640264,0.448943,0.640224,0.000347,0.000387
Stdev,9.876108,10.70892,0.497971,0.723679,0.242673,0.12629,0.24295,0.12603,0.000195,0.000261
Mean(Y=1),47.757686,47.347274,0.674589,1.0,0.588779,0.705663,0.588922,0.705435,0.000356,0.000386
Stdev(Y=1),9.595413,10.424861,0.468528,0.0,0.224517,0.111492,0.224766,0.111308,0.00019,0.00026
Mean(Y=2),51.729537,52.057118,0.506939,2.0,0.448874,0.644651,0.448814,0.644618,0.000353,0.000381
Stdev(Y=2),9.711413,10.421255,0.499952,0.0,0.233651,0.121322,0.233922,0.121079,0.000196,0.00026
Mean(Y=3),55.382118,57.095099,0.501482,3.0,0.3352,0.579801,0.335059,0.579902,0.000331,0.000397
Stdev(Y=3),9.005926,9.297744,0.499998,0.0,0.209622,0.116838,0.209861,0.116584,0.000197,0.000263


Downsample 10% of class 1


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,52.961699,53.776354,0.510379,2.338512,0.096039,0.622531,0.095918,0.622537,0.000256,9e-05
Stdev,9.651726,10.36916,0.499892,0.532925,0.101957,0.123959,0.10176,0.123943,0.00038,2.5e-05
Mean(Y=1),47.934461,47.47907,0.688372,1.0,0.177456,0.703557,0.177204,0.703524,0.000436,8e-05
Stdev(Y=1),9.502617,10.506783,0.463159,0.0,0.139345,0.111826,0.139104,0.111826,0.000494,2.2e-05
Mean(Y=2),51.729537,52.057118,0.506939,2.0,0.109687,0.644615,0.109534,0.644618,0.000296,8.7e-05
Stdev(Y=2),9.711413,10.421255,0.499952,0.0,0.108078,0.121093,0.107862,0.121079,0.000411,2.4e-05
Mean(Y=3),55.382118,57.095099,0.501482,3.0,0.067132,0.57989,0.067073,0.579902,0.000176,9.4e-05
Stdev(Y=3),9.005926,9.297744,0.499998,0.0,0.076694,0.116603,0.076558,0.116584,0.000291,2.4e-05


Downsample 10% of class 3


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,50.603857,50.755515,0.560143,1.719245,0.856685,0.947044,0.856226,0.947071,0.000962,0.000102
Stdev,9.870001,10.687481,0.49637,0.529604,0.138207,0.029562,0.139392,0.029514,0.001125,9.3e-05
Mean(Y=1),47.757686,47.347274,0.674589,1.0,0.907054,0.95734,0.906974,0.957335,0.000638,7.7e-05
Stdev(Y=1),9.595413,10.424861,0.468528,0.0,0.099216,0.024394,0.100005,0.024374,0.000779,7e-05
Mean(Y=2),51.729537,52.057118,0.506939,2.0,0.837494,0.943191,0.836898,0.943233,0.001084,0.000112
Stdev(Y=2),9.711413,10.421255,0.499952,0.0,0.14455,0.030107,0.145819,0.030054,0.001201,9.8e-05
Mean(Y=3),55.43246,57.29428,0.49552,3.0,0.75932,0.925984,0.758012,0.926057,0.001621,0.000145
Stdev(Y=3),9.035123,9.275266,0.49998,0.0,0.170619,0.032661,0.172136,0.032592,0.001483,0.000118


Rotate only: 40% class 1


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.85,52.405,0.545,2.05365,0.460912,0.624294,0.460444,0.624252,0.003384,0.002937
Stdev,9.876108,10.70892,0.497971,0.723679,0.116941,0.104104,0.113095,0.107506,0.002195,0.001984
Mean(Y=1),50.907303,51.314247,0.60828,1.0,0.479376,0.63744,0.478369,0.637896,0.003589,0.003024
Stdev(Y=1),10.046407,11.046884,0.488135,0.0,0.120998,0.106739,0.117011,0.110169,0.002295,0.001948
Mean(Y=2),50.90222,51.08274,0.538983,2.0,0.474081,0.637907,0.473108,0.638293,0.003376,0.002834
Stdev(Y=2),9.836481,10.616288,0.498478,0.0,0.113991,0.100671,0.110224,0.103908,0.00224,0.001891
Mean(Y=3),54.164932,55.451675,0.50324,3.0,0.424376,0.591366,0.425169,0.590219,0.003231,0.003035
Stdev(Y=3),9.407323,9.940144,0.49999,0.0,0.110021,0.10002,0.106463,0.103415,0.002017,0.002146


Rotate -> downsample 10% of class 1


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,52.102543,52.683287,0.527745,2.338512,0.085266,0.62074,0.084659,0.620683,0.0016,0.000526
Stdev,9.818231,10.607751,0.49923,0.532925,0.043724,0.105947,0.04159,0.106557,0.001701,0.000336
Mean(Y=1),50.830021,50.761522,0.603383,1.0,0.096943,0.641844,0.095695,0.641895,0.002028,0.000547
Stdev(Y=1),10.173,11.322105,0.489195,0.0,0.050672,0.111474,0.048056,0.112101,0.002217,0.000333
Mean(Y=2),50.90222,51.08274,0.538983,2.0,0.091713,0.638248,0.090831,0.638293,0.001745,0.000509
Stdev(Y=2),9.836481,10.616288,0.498478,0.0,0.045137,0.103321,0.04287,0.103908,0.001858,0.000319
Mean(Y=3),54.164932,55.451675,0.50324,3.0,0.073796,0.590451,0.073688,0.590219,0.001328,0.000552
Stdev(Y=3),9.407323,9.940144,0.49999,0.0,0.037986,0.102808,0.036244,0.103415,0.001301,0.00036


Rotate -> downsample 10% of class 3


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.033482,51.340114,0.55971,1.719245,0.891066,0.942528,0.891678,0.942285,0.003092,0.001025
Stdev,9.906848,10.762625,0.496422,0.529604,0.049393,0.025857,0.045306,0.027203,0.00292,0.001028
Mean(Y=1),50.907303,51.314247,0.60828,1.0,0.892498,0.942616,0.893079,0.942377,0.003187,0.001064
Stdev(Y=1),10.046407,11.046884,0.488135,0.0,0.050871,0.026761,0.046667,0.028154,0.002983,0.001056
Mean(Y=2),50.90222,51.08274,0.538983,2.0,0.891716,0.943206,0.89223,0.942998,0.002998,0.000988
Stdev(Y=2),9.836481,10.616288,0.498478,0.0,0.048113,0.025144,0.044152,0.026441,0.00282,0.000989
Mean(Y=3),54.202963,55.749483,0.502068,3.0,0.868793,0.930741,0.871246,0.929885,0.003862,0.001314
Stdev(Y=3),9.362805,9.789002,0.499996,0.0,0.052414,0.026983,0.047716,0.028563,0.003757,0.001329


Rotate -> downsample -> Use 2 as base


Unnamed: 0,X1,X2,X3,Y,Multinom CP1,"Multinom CP2,",Binom CP1,Binom CP2,Abs CP1 Diff,Abs CP2 Diff
Mean,51.033482,51.340114,0.55971,2.320666,0.333031,0.057478,0.333027,0.057715,3.3e-05,0.001024
Stdev,9.906848,10.762625,0.496422,0.926228,0.031282,0.025859,0.031255,0.027203,2.2e-05,0.001027
Mean(Y=1),50.907303,51.314247,0.60828,1.0,0.336048,0.057391,0.336041,0.057623,3.4e-05,0.001063
Stdev(Y=1),10.046407,11.046884,0.488135,0.0,0.030674,0.026763,0.030647,0.028154,2.3e-05,0.001055
Mean(Y=2),54.202963,55.749483,0.502068,2.0,0.330893,0.069265,0.330904,0.070115,3.3e-05,0.001311
Stdev(Y=2),9.362805,9.789002,0.499996,0.0,0.031413,0.026984,0.031384,0.028563,2.2e-05,0.001328
Mean(Y=3),50.90222,51.08274,0.538983,3.0,0.331655,0.056798,0.331651,0.057002,3.3e-05,0.000986
Stdev(Y=3),9.836481,10.616288,0.498478,0.0,0.031467,0.025146,0.03144,0.026441,2.1e-05,0.000988


In [12]:
#### Misc: Copy N% of examples from Y=2 and relabel them as Y=1. This produces some "overlap" between Y=1 and Y=2?.

# def copy_2to1(X, Y, ratio):
#     idx2 = np.where(Y == 2)[0]
#     X_new = np.concatenate((X, X[idx2[:int(ratio*len(idx2))]]))
#     Y_new = np.concatenate((Y, np.ones(int(ratio*len(idx2)))))
#     return X_new, Y_new