In [11]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from ipyparallel import Client

sns.set_style("whitegrid")

In [12]:
def generate_data(N):
    return np.concatenate(
        (np.ones(N).reshape(N, 1),
         np.random.uniform(low=-1, size=(N, 2))),
        axis=1)


def define_noise_targets(x):
    y = np.sign(x[:, 1]**2 + x[:, 2]**2 - 0.6)
    
    noise_amount = int(0.1 * x.shape[0])
    noise_idxs = np.random.choice(x.shape[0], noise_amount, replace=False)
    
    y[noise_idxs] *= -1
    
    return y


def create_dataframe(x, y):
    df = pd.DataFrame(data=x, columns=['bias', 'x1', 'x2'])
    df['x1*x2'] = df['x1'].values * df['x2'].values
    df['x1^2'] = np.square(df['x1'].values)
    df['x2^2'] = np.square(df['x2'].values)
    df['y'] = y

    return df

In [13]:
x = generate_data(1000)

In [14]:
x.shape

(1000, 3)

In [15]:
y = define_noise_targets(x)

In [16]:
y.shape

(1000,)

In [17]:
df = create_dataframe(x, y)

In [18]:
df.head()

Unnamed: 0,bias,x1,x2,x1*x2,x1^2,x2^2,y
0,1.0,-0.700132,0.377773,-0.264491,0.490185,0.142712,1.0
1,1.0,0.199539,0.504844,0.100736,0.039816,0.254867,-1.0
2,1.0,-0.757058,0.890424,-0.674102,0.573137,0.792854,1.0
3,1.0,0.991547,-0.56874,-0.563933,0.983166,0.323466,-1.0
4,1.0,0.724711,-0.968432,-0.701833,0.525206,0.93786,1.0


In [38]:
def run_experiment(run_id, N):
    """
    Awesome function description goes here
    """
    import numpy as np
    import pandas as pd
    
###########################################
    def generate_data(N):
        return np.concatenate(
            (np.ones(N).reshape(N, 1),
             np.random.uniform(low=-1, size=(N, 2))),
            axis=1)


    def define_noise_targets(x):
        y = np.sign(x[:, 1]**2 + x[:, 2]**2 - 0.6)

        noise_amount = int(0.1 * x.shape[0])
        noise_idxs = np.random.choice(x.shape[0], noise_amount, replace=False)

        y[noise_idxs] *= -1

        return y


    def create_dataframe(x, y):
        df = pd.DataFrame(data=x, columns=['bias', 'x1', 'x2'])
        df['x1*x2'] = df['x1'].values * df['x2'].values
        df['x1^2'] = np.square(df['x1'].values)
        df['x2^2'] = np.square(df['x2'].values)
        df['y'] = y

        return df
###########################################

    x = generate_data(N)
    y = define_noise_targets(x)
    df = create_dataframe(x, y)
    
    X = df[['bias', 'x1', 'x2', 'x1*x2', 'x1^2', 'x2^2']].values
    y = df['y'].values
    
    pseudo_inverse = lambda X: np.linalg.inv(X.T @ X) @ X.T
    X_pinv = pseudo_inverse(X)
    
    w = X_pinv @ y
    
    test_points = generate_data(1000)
    y_true = define_noise_targets(test_points)
    df_test = create_dataframe(test_points, y)
    
    X_test = df_test[['bias', 'x1', 'x2', 'x1*x2', 'x1^2', 'x2^2']].values
    y_pred = np.sign(X_test @ w)
    
    E_out = np.mean(y_true != y_pred)
    
    return E_out
#     y_pred = np.sign(X @ w)
    
#     w_a = np.array([-1, -0.05, 0.08, 0.13, 1.5, 1.5])
#     w_b = np.array([-1, -0.05, 0.08, 0.13, 1.5, 15])
#     w_c = np.array([-1, -0.05, 0.08, 0.13, 15, 1.5])
#     w_d = np.array([-1, -1.5, 0.08, 0.13, 0.05, 0.05])
#     w_e = np.array([-1, -0.05, 0.08, 1.5, 0.15, 0.15])
    
#     ws = [w_a, w_b, w_c, w_d, w_e]
    
#     disagreements = []
    
#     for _w in ws:
#         y = np.sign(X @ _w)
#         disagreements.append(np.mean(y_pred != y))
        
#     return disagreements

In [39]:
rc = Client()
print(rc.ids)

[0, 1, 2, 3]


In [40]:
dv = rc[:]

In [41]:
runs = 1000

res = dv.map_async(run_experiment, np.arange(runs), [1000]*runs)
x = res.get()

In [42]:
df = pd.DataFrame(data=x, columns=['E_out'])
df.mean(axis=0)

E_out    0.125615
dtype: float64