In [11]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model

In [4]:
#here we'll construct the neccessary matrices for the above example network
#node edge incidence matrix scaled by admittance
#these are just arbitrary admittance values for this assignment
F = np.array([[5.0,0.0,0.0,-5.0,0.0,0.0,0.0,0.0],
              [0.0,3.0,0.0,0.0,-3.0,0.0,0.0,0.0],
              [0.0,3.0,0.0,0.0,0.0,-3.0,0.0,0.0],
              [0.0,0.0,13.0,-13.0,0.0,0.0,0.0,0.0],
              [0.0,0.0,0.0,10.0,-10.0,0.0,0.0,0.0],
              [0.0,0.0,0.0,0.0,5.0,-5.0,0.0,0.0],
              [0.0,0.0,0.0,0.0,3.2,0.0,-3.2,0.0],
              [0.0,0.0,0.0,0.0,2.5,0.0,0.0,-2.5]])

self_admittance = np.sum(np.abs(F), axis=0)

off_diag = np.array([[0, 0, 0, -5, 0, 0, 0, 0],
                     [0, 0, 0, 0, -3, -3, 0, 0],
                     [0, 0, 0, -13, 0, 0, 0, 0],
                     [-5, 0, -13, 0, -10, 0, 0, 0],
                     [0, -3, 0, -10, 0, -5, -3.2, -2.5],
                     [0, -3, 0, 0, -5, 0, 0, 0],
                     [0, 0, 0, 0, -3.2, 0, 0, 0],
                     [0, 0, 0, 0, -2.5, 0, 0, 0]])

#admittance matrix
B = np.diag(self_admittance) + off_diag

#stacked
H = np.vstack((F, B))


In [5]:
#true flows and power injections for reference

f = 0.01*np.array([-1, -2, -1, 20, 17, 1, 10, 3])
p = 0.01*np.array([-1, -3, 20, -2, 0, -1, -10, -3])

z = np.expand_dims(np.append(f, p), axis=1)

### Problem 1

In [9]:
#here are the samples, they are not formatted correctly, you'll need to format them correctly 
#in order to use linear_model.LinearRegression().fit()
samples_1 = []

for i in range(100):
    samples_1.append(z + np.random.normal(0,0.02,size=z.shape)) #the true value + some noise
    
samples_1 = np.asarray(samples_1)[:,:,0]
print(samples_1.shape)

(100, 16)


In [19]:
model = linear_model.LinearRegression().fit(H, samples_1.T) #find the best x for each sample
x_s = model.coef_ #get the x's
print("size of solving for all x's at once:", x_s.shape, "\n")  #100 x's of size 8
x_hat = np.mean(x_s, axis=0)   #since the noise is Gaussian, the mean of all the best x's will minimize the loss for any z sample in sample_1 list
print("best x:", x_hat, "\n")
loss = np.linalg.norm(H.dot(x_hat) - z, 2)
print("Error:", loss, "\n")

size of solving for all x's at once: (100, 8) 

best x: [ 0.01433467 -0.00783658  0.03098395  0.01574861 -0.0013483  -0.00438868
 -0.03322899 -0.01426469] 

Error: 1.384233447099722 



### Problem 2

In [35]:
samples_2 = []

for i in range(100):
    injection_noise = np.random.normal(0,0.01,size=z[0:8].shape)  #variance equal to 0.01
    line_noise = np.random.normal(0,0.03,size=z[0:8].shape)       #variance equal to 0.03
    samples_2.append(z + np.expand_dims(np.append(line_noise,injection_noise),axis=1)) #the true value + some noise
    
samples_2 = np.asarray(samples_2)[:,:,0].T

In [121]:
weights_one = np.ones(z.shape)[:,0]  #the weights object must be 1-dimensional
model = linear_model.LinearRegression().fit(H, samples_2, sample_weight=weights_one)  
x_hat_one = np.mean(model.coef_, axis=0)

weights_f = 0.98*np.ones(f.shape)
weights_p = 1.02*np.ones(p.shape)
weights = np.concatenate((weights_f, weights_p))
model = linear_model.LinearRegression().fit(H, samples_2, sample_weight=weights)  #this sets *all* samples to this weight scheme
x_hat_bias = np.mean(model.coef_, axis=0)

In [122]:
loss = np.linalg.norm(H.dot(x_hat_one) - z, 2)
print("Error:", loss, "\n")

loss = np.linalg.norm(H.dot(x_hat_bias) - z, 2)
print("Error:", loss, "\n")

Error: 1.3807782306961334 

Error: 1.3807781091262918 



### Problem 3

In [103]:
#16 x 100 array of z samples
samples_3_array = np.loadtxt("homework_3_data.txt")

samples_3_array = samples_3_array.T               #data ended up getting saved transposed
sample_3_mean = np.mean(samples_3_array, axis=0)  #compute the sample mean
sample_3_var = np.var(samples_3_array, axis=0)    #compute the sample variance

#apply a Chi-squared test statistic to each feature column of z
test_stats = []
for row in range(samples_3_array.shape[0]):
    test_stat = np.sum(np.power(np.power(sample_3_var, -1)*(samples_3_array[row,:] - sample_3_mean),2))
    test_stats.append(test_stat)

In [116]:
outliers = []

for i in range(len(test_stats)):
    if test_stats[i] > 100:  #inspect the test stat values and find that there are small values and 4 very large ones, this threshold is arbitrary
        outliers.append(i)
        print("outlier at index: ", i)
        
outliers = np.array(outliers) #conver to array object to use np delete function

samples_3_clean = np.delete(samples_3_array, outliers, axis=0)
print("\n")
print("size with outliers: ", samples_3_clean.shape)
print("size without outliers: ", samples_3_array.shape)

outlier at index:  5
outlier at index:  10
outlier at index:  25
outlier at index:  89


size with outliers:  (96, 16)
size without outliers:  (100, 16)


In [120]:
model = linear_model.LinearRegression().fit(H, samples_3_array.T)
x_hat_outliers = np.mean(model.coef_, axis=0)
loss_outliers = np.linalg.norm(H.dot(x_hat_outliers) - z, 2)

model = linear_model.LinearRegression().fit(H, samples_3_clean.T)
x_hat_clean = np.mean(model.coef_, axis=0)
loss_clean = np.linalg.norm(H.dot(x_hat_clean) - z, 2)

print("loss with outliers: ", loss_outliers)
print("loss without outliers: ", loss_clean)

loss with outliers:  1.4200645723896477
loss without outliers:  1.3856004602679295
