In [1]:
import numpy as np
from scipy.linalg import svd

def leverage_scores_sampling(A, k):
    # compute the SVD of A
    U, s, Vt = svd(A, full_matrices=False)
    # compute the leverage scores
    leverage_scores = np.sum(U**2, axis=0)
    # normalize the scores
    p = leverage_scores / np.sum(leverage_scores)
    return p

In [2]:
def leverage_scores(A):
    # compute the pseudoinverse of A^T A
    A_tA_inv = np.linalg.pinv(A.T @ A)
    # compute the hat matrix
    H = A @ A_tA_inv @ A.T
    # extract the diagonal elements of H
    scores = np.diag(H)
    return scores

In [6]:
A = np.random.randn(100, 10)
U, s, Vt = svd(A, full_matrices=False)

In [14]:
leverage_scores = [ np.sum(U[i,:]**2) for i in range(10) ]
leverage_scores

[0.11454040931473884,
 0.03521105469357307,
 0.11730614653093195,
 0.03473617786087899,
 0.11281481495929219,
 0.1044929313420797,
 0.07883556696367161,
 0.07299001407642833,
 0.08043665525660648,
 0.10845672888893837]

In [17]:
A_tA_inv = np.linalg.pinv(A.T @ A)
H = A @ A_tA_inv @ A.T

(100, 100)

In [20]:
# vetorize the following operator for computing the leverage scores
leverage_scores = [ A[i,:] @ A_tA_inv @ A[i,:].T for i in range(10) ]
leverage_scores

[0.11454040931473893,
 0.03521105469357309,
 0.11730614653093191,
 0.034736177860878924,
 0.11281481495929216,
 0.10449293134207979,
 0.07883556696367168,
 0.07299001407642834,
 0.08043665525660655,
 0.10845672888893841]

## Finding Geometry

### manifold structure

Suppose, I have a dataset which represent a manifold type of structure (e.g., $\mathbb R^n$ dimensional ball). Now, if we choose $m$ points for this ball then the matrix representation is a $\mathbb R^{m\times n}$ matrix. 

- Is there any systematic way to removes some points from this dataset which will still preserve the manifold structure?

Because after forming the matrix, we destroy the manifold representation (what I understand). And the intrinsic structure was somewhere hidden in the matrix. How to explore it with using minimum data points?

### Constructing canonical manifold

Can we construct manifold from our dataset? 

- what will be the set on which the topology needed to be defined?
- what will be the notion of neighborhood, compactness? 
- what will be the atlas?

$$\textrm{vector space}\stackrel{?}{\rightarrow}\textrm{manifold}$$