# Code to Generate Figures

#### This notebook contains code from the paper, "Stochastic Expectation-Maximization for Shuffled Linear Regression" by Abid _et al._, which appeared in the 2018 Allerton Conference.

In [None]:
import sys, os;
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from utils import *
from algorithms import *
from scipy.io import savemat

%load_ext autoreload
%matplotlib inline
%autoreload 2

# Figure 3e


In [None]:
MC  = 3
iters = 1
n   = 1000
d   = 100
k_  = [200,250,300,350,400,500,550,600,650,700,750,800] # number of mismatches
error_proposed = np.zeros((len(k_), MC))
error_hard_scale = np.zeros((len(k_), MC))
error_soft_scale = np.zeros((len(k_), MC))
SNR = 100
for i in range(len(k_)):
    k = k_[i]
    for s in range(MC):
        x, y_, w0_ = generate_distribution(n=n, dim=d,  dist='normal', bias=False, SNR=100)
        y    = y_
        idx1 = np.random.permutation(n)[:k]
        idx2 = idx1
        idx2 = np.random.permutation(idx1) # in-place shuffle elements of idx2
        y[idx1,0] = y_[idx2,0]
#-------------------------------------------------------------        
        weights = em_mcmc(x,y,steps=50,return_all_weights=True)
        error = calc_error(w0_, weights[-1])
        error_soft_scale[i,s] = error            
        print(error)
#-------------------------------------------------------------
        weights = sls(x,y,steps=50,return_all_weights=True,n_starts=100)
        error = calc_error(w0_, weights[-1])
        error_hard_scale[i,s] = error        
        print(error)
#-------------------------------------------------------------------------
        weights = sls_init(x,y,steps=50,r_local=0,r=0)
        error = np.linalg.norm(weights - w0_)
        error_proposed[i,s] = error
        print(error)
#-------------------------------------------------------------        
    

####  Save the error variables in a dictionary and export to MATLAB for plotting.

In [None]:
hard = np.mean(error_hard_scale,axis = 1)
proposed = np.mean(error_proposed,axis = 1)
stochastic = np.mean(error_soft_scale,axis = 1)
mdict = {"naive_altMIn":hard,"proposed":proposed,"stochastic":stochastic,"k_":k_}
savemat("dataAltMin.mat",mdict)
print(hard)
print(stochastic)
print(proposed)


In [None]:
def get_permutation (n,num_assigned):
    P = np.eye(n)
    idx = np.random.permutation(n)[0:n-num_assigned]
    P[idx,idx] = 0
    P[idx,np.random.permutation(idx)] = 1    
    return P