Skip to content

Commit

Permalink
Added statistic measures and new regression wrapper
Browse files Browse the repository at this point in the history
Matrix:
- defined iterability for the Matrix class
- added method get_absolute_matrix() for returning the matrix where elements are absolute values of the original elements

utils:
- added methods for calculating covariance, variance, standard deviation and Pearson linear correlation coefficient

regression:
- added a method for parameter estimation using iteratively reweighted least squares method
  • Loading branch information
jerela committed Apr 4, 2024
1 parent ec07272 commit 6b19fe0
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 13 deletions.
49 changes: 40 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,20 +122,19 @@


# TEST GAUSS-NEWTON ITERATION
h = Matrix([lambda a,x: pow(a,x)])
independents = Matrix([1, 2, 3]).get_transpose()
y = Matrix([2, 4, 8]).get_transpose()
# let J be the Jacobian of h(x)
J = Matrix([lambda a,x: x*pow(a,x-1)])

theta = regression.fit_nonlinear(independents, y, h, J, initial=Matrix([0.5]))
print(theta)
#h = Matrix([lambda a,x: pow(a,x)])
#independents = Matrix([1, 2, 3]).get_transpose()
#y = Matrix([2, 4, 8]).get_transpose()
## let J be the Jacobian of h(x)
#J = Matrix([lambda a,x: x*pow(a,x-1)])

#theta = regression.fit_nonlinear(independents, y, h, J, initial=Matrix([0.5]))
#print(theta)

# TEST K-MEANS CLUSTERING
initial_centers = Matrix([[0,0],[20,0]])
symmetric_points = Matrix([[-1,0],[-2,2],[1,1],[20,2],[18,0],[22,-1],[23,-1]])
centers = clustering.find_k_means(data=symmetric_points,num_centers=2,initial_centers=initial_centers)
centers = clustering.find_k_means(data=symmetric_points,num_centers=2,initial_centers=initial_centers)[0]


print(centers)
Expand All @@ -146,3 +145,35 @@
density_points = Matrix([[1,2], [0.5,1.5], [0,1], [0,0.5], [0,0], [0,-0.5], [0,-1], [0.5,-1.5], [1,-2], [2,0], [2.5,-0.5], [3,-1], [3,-1.5], [3,-2], [3,-2.5], [3,-3], [2.5,-3.5], [2,-4]])
mountaintops = clustering.find_density_clusters(data=density_points, num_centers=2, beta = 0.25, sigma = 0.25)
print(mountaintops)



x = [1, 2, 4, 7]
c = utils.cov(x)
sd = utils.std(x)
vari = utils.var(x)
print(x)
print(c)
print(sd)
print(vari)
X = Matrix([[1, 2], [4, 7]])
C = utils.cov(X)
Cm = utils.covmat(X)
print(X)
print(C)
print(Cm)

x = Matrix([1, 2, 3, 4]).get_transpose()
y = Matrix([2, 4, 6, 8]).get_transpose()
R = utils.pearson(x,y)
print(R)


# define the observation matrix
H = Matrix([[2,1],[4,1],[6,1]])
# define the measurements
y = Matrix([[0],[1],[2]])

theta = regression.irls(H, y, threshold = 1e-12)

print(theta)
28 changes: 26 additions & 2 deletions mola/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def __init__(self, *args):
elif len(args) == 3:
self.__construct_by_dimensions(args[0], args[1], args[2])


# construct a matrix with r rows, c columns, and some initial value (default 0)
def __construct_by_dimensions(self,r,c,value=0):
"""
Expand Down Expand Up @@ -76,6 +75,18 @@ def __construct_from_lists(self,lists):
self.n_cols = len(lists)
self.data = [lists]

# define iterability for the matrix
def __iter__(self):
"""
Return an iterator of the matrix.
For a single-column matrix, the iterator loops through the elements of the column.
For other matrices, the iterator loops through the columns as lists.
"""
if self.n_cols == 1:
return iter(self.get_column(0,as_list=True))
else:
return iter(self.data)

def __abs__(self):
"""
Return the absolute value of a 1x1 matrix (i.e., a matrix with just one element).
Expand Down Expand Up @@ -987,7 +998,20 @@ def get_dominant_eigenvector(self):
err = (eigenvector-eigenvector_prev).norm_Euclidean()

return eigenvector


def get_absolute_matrix(self):
"""
Return a matrix where the elements are the absolute values of the original matrix.
"""

n = self.get_height()
m = self.get_width()
mat = Matrix(n,m,0)

for i in range(n):
for j in range(m):
mat[i,j] = abs(self[i,j])
return mat


class LabeledMatrix(Matrix):
Expand Down
34 changes: 33 additions & 1 deletion mola/regression.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from mola.matrix import Matrix
from mola.utils import identity, ones, zeros, randoms
from mola.utils import identity, ones, zeros, randoms, covmat, diag
from copy import deepcopy

def linear_least_squares(H: Matrix, z: Matrix, W=None):
Expand All @@ -23,6 +23,38 @@ def linear_least_squares(H: Matrix, z: Matrix, W=None):
th_tuple = (th.get(0,0), th.get(1,0))
return th_tuple

def irls(H: Matrix, z: Matrix, threshold = 1e-6):
"""
Return the iteratively reweighted least squares (IRLS) estimate of the parameters of a model defined by observation matrix H and dependent values z.
Arguments:
H -- Matrix: the observation matrix of the linear system of equations
z -- Matrix: the observed or dependent values depicting the right side of the linear system of equations
"""

# estimate the parameters using ordinary least squares
th = ((H.get_transpose())*H).get_inverse() * H.get_transpose() * z


difference = float('inf')

while difference > threshold:

# calculate absolute values of residuals
residuals = (H*th-z).get_absolute_matrix()

# estimate sample variance-covariance matrix V using the OLS residuals
#V = covmat(residuals)
W = diag(residuals)
print("W: ", W)

# re-estimate the parameters using generalized least squares
th = ((H.get_transpose())*W*H).get_inverse() * H.get_transpose() * W * z

difference = residuals.norm_Euclidean()

th_tuple = tuple(th.get_column(0))
return th_tuple

def fit_univariate_polynomial(independent_values: Matrix, dependent_values: Matrix, degrees=[1], intercept=True, weights = None, regularization_coefficient = None):
"""
Expand Down
112 changes: 111 additions & 1 deletion mola/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import random
import statistics
from mola.matrix import Matrix


Expand Down Expand Up @@ -226,4 +227,113 @@ def column(data: list) -> Matrix:
if isinstance(list[0],list):
raise Exception("exception in utils.column(): list is multidimensional")

return Matrix(data).get_transpose()
return Matrix(data).get_transpose()

# calculate the variance of a vector of values
def var(X):
"""
Return the variance of the input.
"""
return statistics.variance(X)

# calculate the covariance of two vectors
def cov(X,Y = None) -> float:
"""
Return the covariance of a random variable or between two random variables.
Note that the result is the "sample covariance" that is normalized by N-1 rather than N, where N is the number of samples.
Use covmat() if you want to calculate the covariance matrix.
Arguments:
X -- list or Matrix: the values representing samples from the first random variable or column-organized samples from several random variables
Y -- list or Matrix: the values representing samples from the second random variable (optional)
"""

# if X is a list and Y is not defined, calculate the covariance of X with itself, i.e., variance
if isinstance(X,list) and Y is None:
return var(X)

# if X is a matrix and Y is not defined, try to calculate the covariance between the columns of X
elif isinstance(X,Matrix) and Y is None:
n_cols = X.get_width()
# if X has more than 2 columns, calculate the covariance matrix
if n_cols > 2:
return covmat(X)
# if X has 2 columns, calculate the covariance between them
if n_cols == 2:
return cov(X[:,0],X[:,1])
# if X has 1 column, calculate its covariance with itself (variance)
if X.get_width() == 1:
return var(X.get_column(0,as_list=True))

# if X and Y are both lists, calculate their sample covariance
elif isinstance(X,list) and isinstance(Y,list):
n = len(X)
mX = statistics.mean(X)
mY = statistics.mean(Y)
c = 0
for x,y in zip(X,Y):
c += (x-mX) * (y-mY)
c = c/(n-1)
return c

# if X and Y are both matrices but have 1 column each, calculate their covariance
elif isinstance(X,Matrix) and isinstance(Y,Matrix):
if X.get_width() == 1 and Y.get_width() == 1:
return cov(X.get_column(0,as_list=True),Y.get_column(0,as_list=True))
else:
raise Exception("Invalid arguments for cov()")





def std(X):
"""
Return the standard deviation of the input.
"""
return statistics.stdev(X)

def covmat(X: Matrix) -> Matrix:
"""
Return the covariance matrix for the input matrix.
The diagonal of the covariance matrix contains variances.
Note that the result is the "sample covariance" that is normalized by N-1 rather than N, where N is the number of samples.
Arguments:
X -- Matrix: a matrix where the vectors are organized into columns
"""
if isinstance(X,Matrix):

n = X.get_height()
m = X.get_width()

c = zeros(m,m)

for i in range(m):
for j in range(m):
c[i,j] = cov(X.get_column(i,as_list=True),X.get_column(j,as_list=True))

return c
else:
raise Exception("Input to covmat() must be a Matrix object.")


def pearson(X,Y):
return cov(X,Y)/(std(X)*std(Y))

def diag(x):
if isinstance(x,Matrix):
if x.get_height() == 1:
v = x.get_row(0,as_list = True)
elif x.get_width() == 1:
v = x.get_column(0,as_list = True)
else:
raise Exception("Input argument to diag() is invalid")
n = len(v)
mat = identity(n)
for i in range(n):
mat[i,i] = v[i]
return mat

# ITERATIVELY REWEIGHTED GENERALIZED LEAST SQUARES

0 comments on commit 6b19fe0

Please sign in to comment.