In [2]:
import numpy as np
import random

In [3]:
def coin_flip():
    res = []
    for i in range(1000):
        temp = []
        for i in range(10):
            coin = random.randint(0,1)
            temp.append(coin)
        res.append(temp)
    return res

In [69]:
def coin_experiment(n: int):
    c_1, c_rand, c_min = [], [], []
    for i in range(n):
        res = coin_flip()
        c_1.append(sum(res[0]) / 10)
        c_rand.append(sum(res[random.randint(0, 999)]) / 10)
        c_min.append(sum(min(res, key=sum)) / 10)
    return np.mean(c_1), np.mean(c_rand), np.mean(c_min)

In [70]:
coin_experiment(1000)

(0.5, 0.495, 0.036899999999999995)

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score

In [7]:
def generate_sample(N: int):
        x1, y1, x2, y2 = [random.uniform(-1, 1) for i in range(4)]
        w0 = 1
        w1 = (y1 - y2) / (x1 * y2 - x2 * y1)
        w2 = (x2 - x1) / (x1 * y2 - x2 * y1)
        W = np.array([w0, w1, w2])
        X = []
        for i in range(N):
            X.append([1, random.uniform(-1, 1), random.uniform(-1, 1)])
        X = np.array(X)
        
        y = np.sign(W.dot(X.T))
        return X, y, W

In [213]:
def linreg_experiment(N: int, n: int):
    in_sample = []
    out_sample = []
    for i in range(n):
        X, y, W = generate_sample(N)
        reg = LinearRegression()
        reg.fit(X[:, 1:], y)
        res = np.sign(reg.predict(X[:, 1:]))
        acc = accuracy_score(y, res)
        in_sample.append(1 - acc)
        
        X, y, W = generate_sample(N * 10)
        res = np.sign(reg.predict(X[:, 1:]))
        acc = accuracy_score(y, res)
        out_sample.append(1 - acc)
        
    return np.mean(in_sample), np.mean(out_sample)

In [214]:
linreg_experiment(100, 1000)

(0.03979000000000002, 0.400303)

In [64]:
def pla(N: int, W: np.array, X: np.array, y: list, w: np.array):
        def misclassified_pts(N: int, w: np.array, X: np.array, y: list):
            pts = []
            labels = np.sign(w.dot(X.T))
            for i in range(N):
                if labels[i] != y[i]:
                    pts.append(i)
            return pts
        
        i = 0
        mis_pts = misclassified_pts(N, w, X, y)
        
        while len(mis_pts):
            rand = random.randint(0, len(mis_pts) - 1)
            rand_x = X[mis_pts[rand]]
            w += rand_x * y[mis_pts[rand]]
            i += 1
            mis_pts = misclassified_pts(N, w, X, y)
        
        return i
    
    
def linreg_pla_experiment(N: int, n: int):
    iterations = 0
    for i in range(n):
        X, y, W = generate_sample(N)
        reg = LinearRegression()
        reg.fit(X[:, 1:], y)
        w = [reg.intercept_]
        w.extend(reg.coef_)
        iterations += pla(N, np.array(W), np.array(X), y, np.array(w))
    return iterations / n

In [68]:
linreg_pla_experiment(10, 1000)

4.643

In [193]:
def generate_sample_with_noise(N: int):
    X = [[random.uniform(-1, 1), random.uniform(-1, 1)] for i in range(N)]
    y = np.sign(np.array(X)[:, 0] ** 2 + np.array(X)[:, 1] ** 2 - 0.6)
    noise = random.sample(range(0, N - 1), N // 10)
    y[noise] *= -1
    return X, y

def no_transformation_experiment(N: int, n: int):
    in_sample = []
    for i in range(n):
        X, y = generate_sample_with_noise(N)
        reg = LinearRegression()
        reg.fit(X, y)
        res = np.sign(reg.predict(X))
        acc = accuracy_score(y, res)
        in_sample.append(1 - acc)
    return np.mean(in_sample)

In [194]:
no_transformation_experiment(1000, 1000)

0.504987

In [195]:
def agreement_rate(w_res: list, res: list, N: int):
            a = 0
            for i in range(N):
                if w_res[i] == res[i]:
                    a += 1
            return a / N
            

def transform_X(X: list):
    X = np.concatenate((np.array(X), np.array([np.array(X)[:, 0] * np.array(X)[:, 1]]).T), axis=1)
    X = np.concatenate((X, np.array([np.array(X)[:, 0] ** 2]).T), axis=1)
    X = np.concatenate((X, np.array([np.array(X)[:, 1] ** 2]).T), axis=1)
    return X


def transformation_experiment(N: int, n: int, W: list):
    agreement = [0 for i in range(len(W))]
    out_sample = []
    for i in range(n):
        X, y = generate_sample_with_noise(N)
        X = transform_X(X)
        reg = LinearRegression()
        reg.fit(X, y)
        res = np.sign(reg.predict(X))
        X = np.concatenate((np.array([[1 for i in range(N)]]).T, X), axis=1)
        for i in range(len(W)):
            w_res = np.sign(np.array(W[i]).dot(X.T))
            agreement[i] += agreement_rate(w_res, res, N)
            
                
        X, y = generate_sample_with_noise(N)
        X = transform_X(X)
        res = np.sign(reg.predict(X))
        acc = accuracy_score(y, res)
        out_sample.append(1 - acc)
            
    return np.array(agreement) / n, np.mean(out_sample)

In [197]:
transformation_experiment(10, 1000, [[-1.0, -0.05, 0.08, 0.13, 1.5, 1.5],
                                    [-1.0, -0.05, 0.08, 0.13, 1.5, 15],
                                    [-1.0, -0.05, 0.08, 0.13, 15, 1.5],
                                    [-1.0, -1.5, 0.08, 0.13, 0.05, 0.05],
                                    [-1.0, -0.05, 0.08, 1.5, 0.15, 0.15]])

(array([0.873 , 0.6548, 0.6531, 0.6083, 0.5444]), 0.3055)