In [377]:
import numpy as np 
import math
from numpy.linalg import inv 
from numpy.linalg import pinv
from scipy.linalg import norm
from numpy import linalg as LA
import sympy

### Position of Vectors

In [379]:
A = np.array([[0],[4],[-3]])   # initial point
B = np.array([[5],[-6],[-2]])  # terminal point

position = B - A
position

array([[  5],
       [-10],
       [  1]])

### Vector Operations ( Addition and multipliction)

In [381]:
u = np.array([[5],[-9],[2]])
print("shape of U ",u.shape)

v = np.array([[8],[-5],[-6]])
k = np.array([[9],[1],[7]])

print(5*u+4*v+3*k)

shape of U  (3, 1)
[[ 84]
 [-62]
 [  7]]


### Matrix multiplication

In [383]:
A = np.array([[-5,3,-4],[-5,2,-1],[5,-1,3]])
B = np.array([[3],[2],[-1]])
print('A=\n' ,A, '\n' , 'B=\n',B) 
print('A=\n' ,A.shape, '\n' , 'B=\n',B.shape) 

print("Matrix Multiplication",A@B) # matrix multiplication 


A=
 [[-5  3 -4]
 [-5  2 -1]
 [ 5 -1  3]] 
 B=
 [[ 3]
 [ 2]
 [-1]]
A=
 (3, 3) 
 B=
 (3, 1)
Matrix Multiplication [[ -5]
 [-10]
 [ 10]]


### Dot product

In [385]:
u = np.array([5,-5,6,7,7])
v = np.array([-2,-7,-1])
print(np.dot(v,v))

from numpy.linalg import norm
norm(v)

54


7.3484692283495345

### Length of Vector or Magnitude of vector or L2 Norm

In [387]:
u = [8,-6]
print(math.sqrt(np.dot(u,u)))
from numpy.linalg import norm
norm(u)

10.0


10.0

### Unit Vector

In [389]:
a = np.array([1,1,1])
unitA = a/norm(a)
print(unitA)
norm(a)

[0.57735027 0.57735027 0.57735027]


1.7320508075688772

### Scaling back to original vector

In [391]:
import numpy as np

def scale_unit_vector(unit_vector, original_magnitude):
  """Scales a unit vector back to its original magnitude.

  Args:
    unit_vector: A NumPy array representing the unit vector.
    original_magnitude: The original magnitude of the vector.

  Returns:
    A NumPy array representing the original vector.
  """

  return unit_vector * original_magnitude

v = np.array([3, 4])
magnitude = np.linalg.norm(v)
unit_v = v / magnitude

scaled_v = scale_unit_vector(unit_v, magnitude)
print(scaled_v)

[3. 4.]


### Finding the Angle between the vectors  or geometricla relationships 

In [393]:
u = np.array([-2,1]) ##enter the first matrix
v = np.array([0,3]) ## enter the second matrix
dot= np.dot(u,v)             ## if the dot product of u and v are 0 then its orthogonal 
mag_u = norm(u)
mag_v = norm(v)

costheta = dot / (mag_u * mag_v)

rad = math.acos(costheta)
deg = math.degrees(rad)

print(deg)

63.43494882292201


### Scalar and vector Projections

In [466]:
u = np.array([1,-2,1]) ##enter the first matrix
v = np.array([1,0,-1]) ## enter the second matrix
dot= np.dot(u,v)  
mag_v = norm(v)

projvu = dot / mag_v
projvu


TypeError: unsupported operand type(s) for /: 'int' and 'rv_continuous_frozen'

### Inverse of matrix 

In [397]:
A = np.array([[1,2],[-3,-5]])
Ainv = inv(A)
print(A)
print(inv(A))

[[ 1  2]
 [-3 -5]]
[[-5. -2.]
 [ 3.  1.]]


### X = A^-1b

In [399]:
A = np.array([[6,7],[-1,-1]])
b = np.array([[-3],[1]])
Ainv =inv(A)
print(Ainv)
x = Ainv@b
print(x)

[[-1. -7.]
 [ 1.  6.]]
[[-4.]
 [ 3.]]


In [400]:
x = pinv(A)@b
print(x)

err = norm(A@x - b)
print(err)

[[-4.]
 [ 3.]]
1.4715376939894124e-14


### least-squares solution solve for X^

In [402]:
A = np.array([[1,1,-2]])
b = np.array([[2,1,-6]])

print('Matrix A\n',A)
print('Matrix B\n',b)

ATA = A.T @ A
ATb = A.T @ b 

print('ATA =\n',ATA)
print('ATb = \n',ATb)

#ATA = np.array([[4,48],[48,774]])
#ATb = np.array([[491.5],[6898.0]])

# Create an augmented matrix
augmented_matrix = np.hstack((ATA, ATb))

print("Augmented Matrix: \n",augmented_matrix)

# Find the reduced row echelon form
rref = sympy.Matrix(augmented_matrix).rref()[0]

# Extract the solution from the reduced row echelon form
solution = rref[:, -1]

print("Solution:\n", solution)

Matrix A
 [[ 1  1 -2]]
Matrix B
 [[ 2  1 -6]]
ATA =
 [[ 1  1 -2]
 [ 1  1 -2]
 [-2 -2  4]]
ATb = 
 [[ 2  1 -6]
 [ 2  1 -6]
 [-4 -2 12]]
Augmented Matrix: 
 [[ 1  1 -2  2  1 -6]
 [ 1  1 -2  2  1 -6]
 [-2 -2  4 -4 -2 12]]
Solution:
 Matrix([[-6], [0], [0]])


### To find eigen values or to find the spectrum of matrix 

In [404]:
A = np.array([[4,-3],[2,-1]])
print(A)

l, ev = LA.eig(A)
print('Eigen values are:', l )
print('eigen vectors are:', ev[:,0],'and',ev[:,1])

[[ 4 -3]
 [ 2 -1]]
Eigen values are: [2. 1.]
eigen vectors are: [0.83205029 0.5547002 ] and [0.70710678 0.70710678]


### similarity B =M^-1 AM

In [406]:
A = np.array([[-3,-2,0,0],[-3,4,0,0],[0,0,-5,-4],[0,0,-2,2]])
M = np.array([[0,0,1,2],[0,0,3,1],[4,-1,0,0],[1,2,0,0]])
M_inv = LA.inv(M)
B = abs(M_inv@A@M)
B

array([[6.00000000e+00, 2.22044605e-16, 0.00000000e+00, 0.00000000e+00],
       [2.22044605e-16, 3.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 5.40000000e+00, 8.00000000e-01],
       [0.00000000e+00, 0.00000000e+00, 7.20000000e+00, 4.40000000e+00]])

### The Diagonalization Theorem

In [408]:
import numpy as np 
from numpy import linalg
A = np.array([[1,0],[6,-1]])
print(A)
l , ev  = linalg.eig(A)
print(l)
print(ev)
import sympy as sp 
rref_M, pivot_cols = sp.Matrix(ev).rref()
rref_M
pivot_cols
D = np.diag(l)
D
P = abs(ev)
P
ev = np.array([[3,2],[1,1]])
P_inv = linalg.inv(ev)
print(ev)
P_inv
P@D@P_inv

[[ 1  0]
 [ 6 -1]]
[-1.  1.]
[[0.         0.31622777]
 [1.         0.9486833 ]]
[[3 2]
 [1 1]]


array([[-0.31622777,  0.9486833 ],
       [-1.9486833 ,  4.84604989]])

### singular matrix

In [410]:
def is_singular(matrix):
    # Calculate the determinant
    determinant = np.linalg.det(matrix)
    
    if np.isclose(determinant, 0):  # Check if determinant is close to zero
        return True, determinant
    else:
        return False, determinant

# Example matrix
A = np.array([[1, 2], [2, 4]])  # Singular matrix (rows are linearly dependent)

# Check if the matrix is singular
singular, det = is_singular(A)

if singular:
    print(f"The matrix is singular. Determinant = {det}")
else:
    print(f"The matrix is not singular. Determinant = {det}")

The matrix is singular. Determinant = 0.0


### PCA python implementation 

In [412]:
import numpy as np

# Example dataset
X = np.array([[2.5, 2.4],
              [0.5, 0.7],
              [2.2, 2.9],
              [1.9, 2.2],
              [3.1, 3.0]])

# Step 1: Standardize the data
X_mean = np.mean(X, axis=0)
X_centered = X - X_mean

# Step 2: Compute the covariance matrix
cov_matrix = np.cov(X_centered, rowvar=False)

# Step 3: Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Step 4: Sort eigenvalues and eigenvectors
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

# Step 5: Transform data to the first principal component
PC1 = eigenvectors[:, 0]  # First principal component
X_reduced = np.dot(X_centered, PC1)

print("Eigenvalues:", eigenvalues)
print("Eigenvectors:", eigenvectors)
print("Reduced Data:", X_reduced)

Eigenvalues: [1.73707382 0.05392618]
Eigenvectors: [[ 0.72474155 -0.68902082]
 [ 0.68902082  0.72474155]]
Reduced Data: [ 0.44362444 -2.17719404  0.57071239 -0.12902465  1.29188186]


### PCA Scikit Learn 

In [414]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Example dataset
X = np.array([[2.5, 2.4],
              [0.5, 0.7],
              [2.2, 2.9],
              [1.9, 2.2],
              [3.1, 3.0]])

# Step 1: Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Apply PCA
pca = PCA(n_components=1)  # Reduce to 1 principal component
X_reduced = pca.fit_transform(X_scaled)

print("Explained Variance Ratio:", pca.explained_variance_ratio_)
print("Principal Components:", pca.components_)
print("Reduced Data:", X_reduced.shape)

Explained Variance Ratio: [0.96982031]
Principal Components: [[-0.70710678 -0.70710678]]
Reduced Data: (5, 1)


### SVD


In [416]:
import numpy as np

# Example data matrix
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])

# Perform SVD
U, Sigma, Vt = np.linalg.svd(A)

# Display the components
print("Original Matrix (A):")
print(A)

print("\nLeft Singular Vectors (U):")
print(U)

print("\nSingular Values (Sigma):")
print(Sigma)

print("\nRight Singular Vectors (V^T):")
print(Vt)

Original Matrix (A):
[[1 2 3]
 [4 5 6]
 [7 8 9]]

Left Singular Vectors (U):
[[-0.21483724  0.88723069  0.40824829]
 [-0.52058739  0.24964395 -0.81649658]
 [-0.82633754 -0.38794278  0.40824829]]

Singular Values (Sigma):
[1.68481034e+01 1.06836951e+00 3.33475287e-16]

Right Singular Vectors (V^T):
[[-0.47967118 -0.57236779 -0.66506441]
 [-0.77669099 -0.07568647  0.62531805]
 [-0.40824829  0.81649658 -0.40824829]]


In [417]:
# Reconstruct the original matrix
Sigma_diag = np.zeros((A.shape[0], A.shape[1]))
np.fill_diagonal(Sigma_diag, Sigma)
A_reconstructed = np.dot(U, np.dot(Sigma_diag, Vt))

print("\nReconstructed Matrix (A):")
print(A_reconstructed)


Reconstructed Matrix (A):
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]


### hypothesis testing

1. One-Sample Z-Test
$$
Z = \frac{\bar{X} - \mu}{\frac{\sigma}{\sqrt{n}}}
$$


Where:
	•	\bar{X}: Sample mean.
 
	•	\mu: Population mean.
 
	•	\sigma: Population standard deviation.
 
	•	n: Sample size.

#### which P value to use
 Formulas:
	•	Two-tailed Test:

p = 2 x (1 - CDF(|Z|))

	•	One-tailed Test (Right-tailed):

p = 1 - CDF(Z)

	•	One-tailed Test (Left-tailed):

p = CDF(Z)


In [422]:
#A school claims that the average height of students is 5.5 feet. 
#A sample of 50 students has a mean height of 5.7 feet with a population standard deviation of 0.5 feet. Is the claim valid?

from scipy.stats import norm
import numpy as np

# Given data
sample_mean = 5.7
population_mean = 5.5
population_std = 0.5
sample_size = 50

# Calculate Z-Statistic
z_stat = (sample_mean - population_mean) / (population_std / np.sqrt(sample_size))

# Calculate p-value
p_value = 2 * (1 - norm.cdf(abs(z_stat)))  # Two-tailed test

print("Z-Statistic:", z_stat)
print("P-Value:", p_value)

# Decision
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

Z-Statistic: 2.8284271247461925
P-Value: 0.004677734981047177
Reject the null hypothesis.


### Two sample z test

$$
Z = \frac{(\bar{X}_1 - \bar{X}_2)}{\sqrt{\frac{\sigma_1^2}{n_1} + \frac{\sigma_2^2}{n_2}}}
$$

Where:
	•	\bar{X}_1, \bar{X}_2: Sample means.
 
	•	\sigma_1, \sigma_2: Population standard deviations.
 
	•	n_1, n_2: Sample sizes.

In [426]:
#The average weight of males is 70 kg, and for females, it is 65 kg. 
#Both groups have a standard deviation of 10 kg, with sample sizes of 100 each. Test if males weigh significantly more than females.
# Given data
mean1, mean2 = 70, 65
std1, std2 = 10, 10
n1, n2 = 100, 100

# Calculate Z-Statistic
z_stat = (mean1 - mean2) / np.sqrt((std1**2 / n1) + (std2**2 / n2))

# Calculate p-value for a right-tailed test
p_value = 1 - norm.cdf(z_stat)

print("Z-Statistic:", z_stat)
print("P-Value:", p_value)

# Decision
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

Z-Statistic: 3.5355339059327373
P-Value: 0.00020347600872250293
Reject the null hypothesis.


| **Scenario**                               | **Test**                              | **Example**                                                                 |
|--------------------------------------------|---------------------------------------|-----------------------------------------------------------------------------|
| Numerical Data: 1 sample, mean comparison  | One-Sample Z-Test/T-Test             | Is the average height of students 5.5 feet?                                |
| Numerical Data: 2 independent samples, mean comparison | Two-Sample T-Test/Z-Test            | Is the average salary of males different from females?                     |
| Numerical Data: 2 related samples (paired) | Paired T-Test                        | Did a weight-loss program reduce weight (before vs. after)?                |
| Numerical Data: 3+ groups, mean comparison | ANOVA                                | Does the test score differ among three teaching methods?                   |
| Categorical Data: 1 variable, proportions  | Chi-Square Goodness-of-Fit Test      | Does a die roll follow a uniform distribution?                             |
| Categorical Data: 2 variables, association | Chi-Square Test for Independence     | Is gender associated with voting preference?                               |
| Numerical vs. Numerical: Correlation       | Pearson/Spearman Correlation         | Is there a relationship between income and happiness scores?               |
| Numerical Data: Predicting one variable from another | Linear Regression                  | Does the number of study hours predict exam scores?                        |
| Categorical Data: Predicting a category    | Logistic Regression                  | Can we predict whether a person will buy a product based on their income?  |

### chi square test 

In [429]:
from scipy.stats import chi2_contingency
import numpy as np

# Example Contingency Table
data = np.array([[30, 20], [10, 40]])

# Perform Chi-Square Test
chi2, p_value, dof, expected = chi2_contingency(data)

print("Chi-Square Statistic:", chi2)
print("P-Value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

# Decision
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: Variables are dependent.")
else:
    print("Fail to reject the null hypothesis: Variables are independent.")

Chi-Square Statistic: 15.041666666666668
P-Value: 0.00010516355403363098
Degrees of Freedom: 1
Expected Frequencies:
 [[20. 30.]
 [20. 30.]]
Reject the null hypothesis: Variables are dependent.


### Mann-Whitney U Test


In [431]:
from scipy.stats import mannwhitneyu

# Sample Data (Independent Groups)
group1 = [12, 15, 14, 10, 13]
group2 = [22, 25, 19, 30, 27]

# Perform Mann-Whitney U Test
u_stat, p_value = mannwhitneyu(group1, group2, alternative='two-sided')

print("Mann-Whitney U Statistic:", u_stat)
print("P-Value:", p_value)

# Decision
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: Groups differ.")
else:
    print("Fail to reject the null hypothesis: Groups do not differ.")

Mann-Whitney U Statistic: 0.0
P-Value: 0.007936507936507936
Reject the null hypothesis: Groups differ.


### Wilcoxxxon test 

In [433]:
from scipy.stats import wilcoxon

# Sample Data (Paired Groups)
before = [85, 90, 88, 92, 87]
after = [80, 88, 85, 90, 83]

# Perform Wilcoxon Signed-Rank Test
stat, p_value = wilcoxon(before, after)

print("Wilcoxon Statistic:", stat)
print("P-Value:", p_value)

# Decision
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: Groups differ.")
else:
    print("Fail to reject the null hypothesis: Groups do not differ.")

Wilcoxon Statistic: 0.0
P-Value: 0.0625
Fail to reject the null hypothesis: Groups do not differ.


### ANOVA

In [435]:
from scipy.stats import f_oneway

# Sample Data (Independent Groups)
group1 = [10, 12, 14, 11, 13]
group2 = [20, 21, 23, 19, 22]
group3 = [30, 29, 31, 28, 32]

# Perform One-Way ANOVA
f_stat, p_value = f_oneway(group1, group2, group3)

print("F-Statistic:", f_stat)
print("P-Value:", p_value)

# Decision
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: At least one group mean is different.")
else:
    print("Fail to reject the null hypothesis: All group means are the same.")

F-Statistic: 162.0
P-Value: 2.0751610723423055e-09
Reject the null hypothesis: At least one group mean is different.
