<a href="https://colab.research.google.com/github/hgarg97/ML-Code-Challenges/blob/main/MLCode_MachineLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Learning

## https://www.deep-ml.com/problems

## 14. Linear Regression Using Normal Equation


In [None]:
import numpy as np
def linear_regression_normal_equation(X: list[list[float]], y: list[float]) -> list[float]:
	# Your code here, make sure to round
	# X = np.array([1, 1], [1, 2], [1, 3])
	# y = np.array([1, 2, 3])

	# theta = ((X_T.X)^-1).X_T.y
	X = np.array(X)
	y = np.array(y).reshape(-1,1)
	X_T = X.T

	theta = np.linalg.inv(X_T.dot(X)).dot(X_T).dot(y)

	return np.round(theta, 4).flatten()

## 15. Linear Regression Using Gradient Descent


In [None]:
import numpy as np
def linear_regression_gradient_descent(X: np.ndarray, y: np.ndarray, alpha: float, iterations: int) -> np.ndarray:
	# Your code here, make sure to round
	m, n = X.shape
	theta = np.zeros((n, 1))

	for _ in range(iterations):
		predictions = X @ theta
		errors = predictions - y.reshape(-1, 1)
		updates = X.T @ errors / m
		theta -= alpha*updates
	return np.round(theta.flatten(), 4)

## 16. Feature Scaling Implementation

In [None]:
import numpy as np

def feature_scaling(data: np.ndarray) -> (np.ndarray, np.ndarray):

	# Standardization

	mean = np.mean(data, axis = 0)
	std = np.std(data, axis = 0)

	standardized_data = (data-mean) / std


	# Normalization

	min_val = np.min(data, axis = 0)
	max_val = np.max(data, axis = 0)

	normalized_data = (data-min_val) / (max_val - min_val)

	return np.round(standardized_data, 4), np.round(normalized_data, 4)

## 17. K-Means Clustering

In [None]:
# Works for 2-D Points

import numpy as np
from collections import defaultdict

def euclidean_dist(a, b):
	return np.sqrt((a[0]-b[0])**2 + (a[1]-b[1])**2)

def k_means_clustering(points: list[tuple[float, float]], k: int, initial_centroids: list[tuple[float, float]], max_iterations: int) -> list[tuple[float, float]]:

	centroids = initial_centroids.copy()

	for iteration in range(max_iterations):
		clusters = defaultdict(list)

		for point in points:
			distances = [euclidean_dist(point, centroid) for centroid in centroids]
			closest_centroid = distances.index(min(distances))
			clusters[closest_centroid].append(point)

		new_centroids = []

		for i in range(k):
			if clusters[i]:
				sum_x = sum(point[0] for point in clusters[i])
				sum_y = sum(point[1] for point in clusters[i])
				len_clusters = len(clusters[i])
				new_centroid = ((sum_x / len_clusters), (sum_y / len_clusters))

			else:
				new_centroid = centroids[i]

			new_centroids.append(new_centroid)

		if new_centroids == centroids:
			return centroids


		return new_centroids

In [None]:
# Works for all dimensions of tuples

# Works for 2-D Points

import numpy as np
from collections import defaultdict

def euclidean_dist(a, b):
	#return np.sqrt((a[0]-b[0])**2 + (a[1]-b[1])**2)
  	a_arr = np.array(a)
  	b_arr = np.array(b)
  	return np.linalg.norm(a_arr - b_arr)


def k_means_clustering(points: list[tuple[float, float]], k: int, initial_centroids: list[tuple[float, float]], max_iterations: int) -> list[tuple[float, float]]:

	centroids = initial_centroids.copy()

	for iteration in range(max_iterations):
		clusters = defaultdict(list)

		for point in points:
			distances = [euclidean_dist(point, centroid) for centroid in centroids]
			closest_centroid = distances.index(min(distances))
			clusters[closest_centroid].append(point)

		new_centroids = []

		for i in range(k):
			if clusters[i]:
				# sum_x = sum(point[0] for point in clusters[i])
				# sum_y = sum(point[1] for point in clusters[i])
				# len_clusters = len(clusters[i])
				# new_centroid = ((sum_x / len_clusters), (sum_y / len_clusters))

				# Determine the dimensionality of the point
				dim = len(clusters[i][0])
				# Compute the mean for each dimension
				new_centroid = tuple(
					sum(point[d] for point in clusters[i]) / len(clusters[i])
					for d in range(dim)
				)

			else:
				new_centroid = centroids[i]

			new_centroids.append(new_centroid)

		if new_centroids == centroids:
			return centroids


		return new_centroids

## 19. Pricipal Component Analysis (PCA)

In [None]:
import numpy as np
def pca(data: np.ndarray, k: int) -> np.ndarray:
	# STEPS

	# 1. Standardize the data
	# 2. Calculate the covariance matrix of (X, X)
	# 3. Calculate the eigenvalues and eigenvectors of this covariance matrix
	# 4. Sort the eigenvectors w.r.t. eigenvalues in decreasing order
	# 5. Get the first k eigenvectors

	# Step 1
	data_standardized = (data - np.mean(data, axis = 0)) / np.std(data, axis=0)

	# Step 2 (Transposing because cov needs column oriented matrix)
	cov_mat = np.cov(data_standardized.T)

	# Step 3
	eigenvalues, eigenvectors = np.linalg.eig(cov_mat)

	# Step 4
	idx = np.argsort(eigenvalues)[::-1]
	eigenvalues_sorted = eigenvalues[idx]
	eigenvectors_sorted = eigenvectors[:, idx]

	# Step 5
	principal_components = eigenvectors_sorted[:, :k]

	return np.round(principal_components, 4)