In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from ipyparallel import Client

sns.set_style("whitegrid")

In [31]:
def generate_data(N):
    return np.concatenate(
        (np.ones(N).reshape(N, 1),
         np.random.uniform(low=-1, size=(N, 2))),
        axis=1)


def define_noise_targets(x):
    y = np.sign(x[:, 1]**2 + x[:, 2]**2 - 0.6)
    
    noise_amount = int(0.1 * x.shape[0])
    noise_idxs = np.random.choice(x.shape[0], noise_amount, replace=False)
    
    y[noise_idxs] *= -1
    
    return y


def create_dataframe(x, y):
    df = pd.DataFrame(data=x, columns=['bias', 'x1', 'x2'])
    df['y'] = y

    return df

In [32]:
x = generate_data(1000)

In [33]:
x.shape

(1000, 3)

In [34]:
y = define_noise_targets(x)

In [35]:
y.shape

(1000,)

In [37]:
df = create_dataframe(x, y)

In [41]:
df.head()

Unnamed: 0,bias,x1,x2,y
0,1.0,-0.981365,-0.684393,1.0
1,1.0,0.542275,-0.216316,1.0
2,1.0,-0.58027,-0.821939,1.0
3,1.0,-0.740913,0.78701,1.0
4,1.0,0.058982,0.900738,1.0


In [42]:
def run_experiment(run_id, N):
    """
    Awesome function description goes here
    """
    import numpy as np
    import pandas as pd
    
###########################################
    def generate_data(N):
        return np.concatenate(
            (np.ones(N).reshape(N, 1),
             np.random.uniform(low=-1, size=(N, 2))),
            axis=1)


    def define_noise_targets(x):
        y = np.sign(x[:, 1]**2 + x[:, 2]**2 - 0.6)

        noise_amount = int(0.1 * x.shape[0])
        noise_idxs = np.random.choice(x.shape[0], noise_amount, replace=False)

        y[noise_idxs] *= -1

        return y


    def create_dataframe(x, y):
        df = pd.DataFrame(data=x, columns=['bias', 'x1', 'x2'])
        df['y'] = y

        return df
###########################################

    x = generate_data(N)
    y = define_noise_targets(x)
    df = create_dataframe(x, y)
    
    X = df[['bias', 'x1', 'x2']].values
    y = df['y'].values
    
    pseudo_inverse = lambda X: np.linalg.inv(X.T @ X) @ X.T
    X_pinv = pseudo_inverse(X)
    
    w = X_pinv @ y
    y_pred = np.sign(X @ w)
    
    E_in = np.mean(y != y_pred)
    
#     test_points = generate_data(1000)
#     y_true = np.sign(test_points @ g)
#     y_pred = np.sign(test_points @ w)
    
#     E_out = np.mean(y_true != y_pred)
    
#     return [E_in, E_out]
    return E_in

In [43]:
rc = Client()
print(rc.ids)

[0, 1, 2, 3]


In [44]:
dv = rc[:]

In [45]:
runs = 1000

res = dv.map_async(run_experiment, np.arange(runs), [1000]*runs)
x = res.get()

In [46]:
df = pd.DataFrame(data=x, columns=['E_in'])
df.mean(axis=0)

E_in    0.506541
dtype: float64