# Quiz processing

## Q4 - Calculate the MSE

In [1]:
import numpy as np

X = np.array([[1, 2, 1, 2]]) # one training example
w = np.array([2, -1, -2, 4])  # linear regression model
y = [4.0]                    # target

#####
m = 1
y_pred = np.dot(X, w)
MSE = 1/float(m) * np.dot((y_pred - y), (y_pred - y))

#####
print(f"MSE is : {MSE:.3}")

MSE is : 4.0


## Q6 - Gradient descent step

In [5]:
X = np.array([[1,2,1,2],[1,1,2,2]])
w = np.array([4, -2, -4, 8])
eta = 0.05
Y= np.array([4, 3])
w = w-eta*np.dot((np.dot(X,w)-Y),X)
print(w)

[ 3.25 -3.15 -5.1   6.5 ]


In [6]:
4/5

0.8

## Q10

In [2]:
import numpy as np
import pandas as pd
from pyspark.sql import SparkSession

try:
    spark
except NameError:
    print("starting Spark")
    app_name = "Quiz4_notebook"
    master = "local[*]"
    spark = SparkSession\
            .builder\
            .appName(app_name)\
            .master(master)\
            .getOrCreate()
sc = spark.sparkContext

def generate_data(num_inputs = 5, size=100, seed = 2022):
    np.random.seed(seed)
    true_function_coeffs = np.random.uniform(-2, 2, num_inputs+1)
    print("The TRUE linear model is:")
    display(pd.DataFrame(np.expand_dims(true_function_coeffs, axis=0),
                     columns=["b"]+[ f"x{i+1}" for i in range(len(true_function_coeffs)-1)] ))

    x = np.random.uniform(-4, 4, (size, num_inputs))
    noise = np.random.normal(0, 2, size)
    y = (np.dot(x, true_function_coeffs[1:]) + true_function_coeffs[0] + noise)
    X_y = np.c_[x, y]
    display(pd.DataFrame(X_y[:5], columns=[ f"x{i+1}" for i in range(len(true_function_coeffs)-1)] + ["y"]))
    return X_y
num_inputs= 5
X_y_rdd = sc.parallelize(generate_data(num_inputs, 100)).filter(lambda x: sum(x) > 1.0).cache()

# given a fictictious multiple linear regression model
# make it available in memory as read-only to the executors 
wBroadcast = sc.broadcast(np.random.uniform(-2, 2, num_inputs+1)) 
print("A possible linear model is: ")
display(pd.DataFrame(np.expand_dims(wBroadcast.value, axis=0),
                     columns=["b"]+[ f"x{i+1}" for i in range(len(wBroadcast.value)-1)] ))


The TRUE linear model is:


Unnamed: 0,b,x1,x2,x3,x4,x5
0,-1.962566,-0.003769,-1.546465,-1.800104,0.74163,-0.052048


Unnamed: 0,x1,x2,x3,x4,x5,y
0,3.181258,1.179617,3.175705,1.769079,2.650827,-7.701905
1,2.620545,2.668637,3.656355,-1.055645,-0.041299,-14.104847
2,-1.283924,0.955435,3.820237,-3.228535,1.95365,-14.120576
3,-1.660004,-1.610597,2.019788,-3.85069,0.189899,-4.496001
4,2.915487,-0.889257,-2.302465,-0.198554,0.517379,3.012831


A possible linear model is: 


Unnamed: 0,b,x1,x2,x3,x4,x5
0,0.182321,-0.823293,-0.756158,-1.389402,-0.209324,0.064855


In [9]:
#    MSE                                  (                  Xw                   -    y   )**2
MSE  = X_y_rdd.map(lambda d: (np.dot(np.append(1, d[:-1]), wBroadcast.value) - d[-1])**2).mean()
print(f"The MSE is:{MSE:0.3f}")

[Stage 0:>                                                          (0 + 2) / 2]

The MSE is:15.809


                                                                                

## Q11 - Ridge regression loss

In [21]:
import numpy as np
import pandas as pd
from pyspark.sql import SparkSession


def generate_data(num_inputs = 5, size=100, seed = 2022):
    np.random.seed(seed)
    true_function_coeffs = np.random.uniform(-2, 2, num_inputs+1)
    print("The TRUE linear model is:")
    display(pd.DataFrame(np.expand_dims(true_function_coeffs, axis=0),
                     columns=["b"]+[ f"x{i+1}" for i in range(len(true_function_coeffs)-1)] ))

    x = np.random.uniform(-4, 4, (size, num_inputs))
    noise = np.random.normal(0, 2, size)
    y = (np.dot(x, true_function_coeffs[1:]) + true_function_coeffs[0] + noise)
    X_y = np.c_[x, y]
    display(pd.DataFrame(X_y[:5], columns=[ f"x{i+1}" for i in range(len(true_function_coeffs)-1)] + ["y"]))
    return X_y
num_inputs= 5
X_y_rdd = sc.parallelize(generate_data(num_inputs, 100)).filter(lambda x: sum(x) > 1.0).cache()

# given a fictictious multiple linear regression model
# make it available in memory as read-only to the executors 
wBroadcast = sc.broadcast(np.random.uniform(-2, 2, num_inputs+1)) 
W = wBroadcast.value[1:]
print("A possible linear model is: ")
display(pd.DataFrame(np.expand_dims(wBroadcast.value, axis=0),
                     columns=["b"]+[ f"x{i+1}" for i in range(len(wBroadcast.value)-1)] ))

ridge_MSE  = X_y_rdd.map( lambda d: (np.dot(np.append(1, d[:-1]), wBroadcast.value) - d[-1])**2 ).mean()  + 1*np.sum(W**2)
print(f"The ridge regression loss is: {ridge_MSE:0.3f}")

The TRUE linear model is:


Unnamed: 0,b,x1,x2,x3,x4,x5
0,-1.962566,-0.003769,-1.546465,-1.800104,0.74163,-0.052048


Unnamed: 0,x1,x2,x3,x4,x5,y
0,3.181258,1.179617,3.175705,1.769079,2.650827,-7.701905
1,2.620545,2.668637,3.656355,-1.055645,-0.041299,-14.104847
2,-1.283924,0.955435,3.820237,-3.228535,1.95365,-14.120576
3,-1.660004,-1.610597,2.019788,-3.85069,0.189899,-4.496001
4,2.915487,-0.889257,-2.302465,-0.198554,0.517379,3.012831


A possible linear model is: 


Unnamed: 0,b,x1,x2,x3,x4,x5
0,0.182321,-0.823293,-0.756158,-1.389402,-0.209324,0.064855


The ridge regression loss is: 19.037


## Q12 - Perpendicular distance

In [4]:
w1 = [-3, 2, 1]  
X  = [1, 2, 2]      # augmented input
#modify here
d1 = np.dot(w1, X)
#######
print(f"score for w1 is : {d1:7.5f}")

score for w1 is : 3.00000


## Q13 - Classification

In [7]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

W = np.array([-2., 1., 1])
X = np.array( [1, 2., 3.] ) #augmented input
pdists = np.dot(X, W)
print(f"Perpendicular distances are: ",pdists)
p_1 = sigmoid(pdists)
print("probability of class 1 :", p_1)
print("probability of class 0 :", 1 - p_1)

Perpendicular distances are:  3.0
probability of class 1 : 0.9525741268224334
probability of class 0 : 0.047425873177566635


**It's class 1**

## Q14 - Log loss

In [10]:
# replace the code at ...
pt1 = 0*np.log(.5) + (1-0)*np.log(1-0.5)
pt2 = 1*np.log(0.3) + (1-1)*np.log(1 - 0.3)
loss = (-1/2) *(pt1 + pt2)
print(np.round(loss,3))

0.949


## Q15 - Log loss pt 2

In [11]:
X = np.array([[1,4,0], [1,3,2], [1,4,3], [1,0,1]]).reshape(-1,3)
w = np.array([1,1,-1])
y = np.array([0, 1, 1,0])

p_hat = 1/ (1 + np.exp(-np.dot(X,w)))

CXE_loss = -np.mean(y * np.log(p_hat) + (1 - y) * np.log(1 - p_hat))
print(f"{CXE_loss:5.2f}")

 1.49


Q16  - only B

## Q18

In [12]:
X= np.array([[1,1],[1,2]]) # augmented inputs
w= np.array([1,1])        # augmented model
y= np.array([0,1])        # target values    
#==================================================#
# Your code starts here #
#==================================================#
perpDist= np.dot(X, w)
p =1 / (1 + np.exp(-perpDist)) #sigmoid
gradient = 1/len(y)*np.dot(X.T, (p - y)) 
print(f'predictions: {p}')
print(f'Gradient: {gradient}')
print(f'w before: {w}')
lr = 0.1
print(f'lr * Gradient: {lr *gradient}')
w = w - (lr * gradient)
print(f'w before: {np.round(w,3)}')

predictions: [0.88079708 0.95257413]
Gradient: [0.4166856  0.39297267]
w before: [1 1]
lr * Gradient: [0.04166856 0.03929727]
w before: [0.958 0.961]


## Q19 - ridge CXE loss at scale

In [25]:
def generate_data(num_inputs = 5, size=100, seed = 2022):
    np.random.seed(seed)
    true_function_coeffs = np.random.uniform(-2, 2, num_inputs+1)
    print("The TRUE linear model is:")
    display(pd.DataFrame(np.expand_dims(true_function_coeffs, axis=0),
                     columns=["b"]+[ f"x{i+1}" for i in range(len(true_function_coeffs)-1)] ))

    x = np.random.uniform(-4, 4, (size, num_inputs))
    noise = np.random.normal(0, 2, size)
    y = (np.dot(x, true_function_coeffs[1:]) + true_function_coeffs[0] + noise)
    y = np.where(y > 0, 1, 0)
    X_y = np.c_[x, y]
    display(pd.DataFrame(X_y[:num_inputs], columns=[ f"x{i+1}" for i in range(len(true_function_coeffs)-1)] + ["y"]))
    return X_y
num_inputs= 10
X_y_rdd = sc.parallelize(generate_data(num_inputs, 100)).filter(lambda x: sum(x) > 1.0).cache()

# given a fictitious linear classificationmodel
# make it available in memory as read-only to the executors 
wBroadcast = sc.broadcast(np.random.uniform(-2, 2, num_inputs+1)) 
print("A possible linear model is: ")
display(pd.DataFrame(np.expand_dims(wBroadcast.value, axis=0),
                     columns=["b"]+[ f"x{i+1}" for i in range(num_inputs)] ))
def class_prob(X, model):
    "Class prob for a binomial logistic regression model"
    return 1/(1+np.exp(-1*np.dot(np.append(1, X), wBroadcast.value))) #jgs
                            


The TRUE linear model is:


Unnamed: 0,b,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,-1.962566,-0.003769,-1.546465,-1.800104,0.74163,-0.052048,1.590629,0.589808,1.587852,0.88454,1.325414


Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,y
0,2.620545,2.668637,3.656355,-1.055645,-0.041299,-1.283924,0.955435,3.820237,-3.228535,1.95365,0.0
1,-1.660004,-1.610597,2.019788,-3.85069,0.189899,2.915487,-0.889257,-2.302465,-0.198554,0.517379,0.0
2,-1.204566,3.807269,-3.69744,2.354157,-1.136939,1.983712,3.316074,-1.018701,3.719068,-3.348914,1.0
3,-3.660392,-1.625632,-1.090371,-0.077959,1.34815,1.387317,0.576805,-3.355262,3.18665,-3.692892,0.0
4,2.257555,-3.706749,-1.862529,-2.358209,-1.928845,3.460922,-3.935001,-0.772215,3.152818,-2.366328,1.0
5,-3.825792,1.577337,-2.471813,0.371464,0.825802,3.910351,-3.260432,-3.485701,3.903616,-0.383136,1.0
6,2.83129,-0.788439,-0.894348,3.07526,1.215783,2.214392,-3.229363,3.729296,-1.421888,-1.654653,1.0
7,1.613038,-1.133779,-2.734632,-3.934155,2.63996,1.011561,2.971506,1.007259,-2.954898,-0.380559,1.0
8,-2.445002,3.558906,-0.656156,-3.368282,2.931076,1.491987,-3.841907,-3.404309,-3.797306,-0.408729,0.0
9,0.076737,2.196001,-1.72149,-3.37316,3.324417,1.143847,2.537446,1.093146,2.170285,-2.358284,0.0


A possible linear model is: 


Unnamed: 0,b,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,0.622831,1.474318,1.311174,1.417228,1.53173,1.917852,-1.51131,1.204692,-0.878298,1.029086,-0.362585


In [26]:
W = wBroadcast.value[1:]
BXE  = -X_y_rdd.map(lambda X_y: X_y[-1]*np.log(class_prob(X_y[:-1], wBroadcast.value)) + (1-X_y[-1])*np.log(1 - class_prob(X_y[:-1], wBroadcast.value)) ).mean() 
llambda = 1.0
ridge_BXE  = BXE  + llambda*np.sum(W**2)
print(f"The BXE is:{BXE:0.3f}")
print(f"The ridge BXE loss is:{ridge_BXE:0.3f}")

The BXE is:5.736
The ridge BXE loss is:23.359


---

In [16]:
import numpy as np
import pandas as pd
from pyspark.sql import SparkSession

try:
    spark
except NameError:
    print('starting Spark')
    app_name = 'Quiz4_notebook'
    master = "local[*]"
    spark = SparkSession\
            .builder\
            .appName(app_name)\
            .master(master)\
            .getOrCreate()
sc = spark.sparkContext

def generate_data(num_inputs = 5, size=100, seed = 2022):
    np.random.seed(seed)
    true_function_coeffs = np.random.uniform(-2, 2, num_inputs+1)
    print("The TRUE linear model is:")
    display(pd.DataFrame(np.expand_dims(true_function_coeffs, axis=0),
            columns=["b"]+[ f"x{i+1}" for i in range(len(true_function_coeffs)-1)] ))

    x = np.random.uniform(-4, 4, (size, num_inputs))
    noise = np.random.normal(0, 2, size)
    y = (np.dot(x, true_function_coeffs[1:]) + true_function_coeffs[0] + noise)
    X_y = np.c_[x, y]
    display(pd.DataFrame(X_y[:5], columns=[ f"x{i+1}" for i in range(len(true_function_coeffs)-1)] + ["y"]))
    return X_y
num_inputs= 5
X_y_rdd = sc.parallelize(generate_data(num_inputs, 100)).filter(lambda x: sum(x) > 1.0).cache()

# given a fictictious multiple linear regression model
# make it available in memory as read-only to the executors 
wBroadcast = sc.broadcast(np.random.uniform(-2, 2, num_inputs+1))
print("A possible linear model is: ")
display(pd.DataFrame(np.expand_dims(wBroadcast.value, axis=0),
                    columns=["b"]+[ f"x{i+1}" for i in range(len(wBroadcast.value)-1)] ))

#    MSE                                  (                  Xw                   -    y   )**2
MSE = X_y_rdd.map(lambda X_y: (np.dot(np.append(1, X_y[:-1]), wBroadcast.value) - X_y[-1])**2).mean()
print(f"The MSE is:{MSE:0.3f}")

The TRUE linear model is:


Unnamed: 0,b,x1,x2,x3,x4,x5
0,-1.962566,-0.003769,-1.546465,-1.800104,0.74163,-0.052048


Unnamed: 0,x1,x2,x3,x4,x5,y
0,3.181258,1.179617,3.175705,1.769079,2.650827,-7.701905
1,2.620545,2.668637,3.656355,-1.055645,-0.041299,-14.104847
2,-1.283924,0.955435,3.820237,-3.228535,1.95365,-14.120576
3,-1.660004,-1.610597,2.019788,-3.85069,0.189899,-4.496001
4,2.915487,-0.889257,-2.302465,-0.198554,0.517379,3.012831


A possible linear model is: 


Unnamed: 0,b,x1,x2,x3,x4,x5
0,0.182321,-0.823293,-0.756158,-1.389402,-0.209324,0.064855


The MSE is:15.809


In [17]:
import numpy as np

w1 = [-3, 2, 1]
X = [1,2,2]
d1 = np.dot(w1,X)
print(f"score for w1 is : {d1:7.5f}")

score for w1 is : 3.00000
