In [None]:
# Vectors are points in some finite-dimensional space

In [None]:
from typing import List
Vector = List[float]

grades = [95, 
            80,
            75,
            62]

In [None]:
from typing import Union

Vector = List[Union[int, float]]

In [5]:
def add(v: Vector, w: Vector) -> Vector:
    """Adds corresponding elements"""
    assert len(v) == len(w), 'vectors must be the same length'
    return [v_i + w_i for v_i, w_i in zip(v, w)]

assert add([1,2,3], [4,5,6]) == [5,7,9]

In [6]:
def subtract(v:Vector, w:Vector) -> Vector:
    """Subtracts corresponding elecments"""
    assert len(v) == len(w), 'vectors must be the same length'
    return [v_i - w_i for v_i, w_i in zip(v,w)]

assert subtract([5,7,9], [4,5,6]) == [1,2,3]

In [None]:
"""
sometimes we want to component wise sum a list of vecotrs-- that is, 
create a new vecotr whose first element is the sum of all the first elements, 
whose seconf element is the sum of all the second elements
    """

In [8]:
def vector_sum(vectors: List[Vector]) -> Vector:
        """Sums all corresponding elements"""
        # Check that vectors is not empty
        assert vectors, "no vectors provided"

        # Check the vectors are all the same size
        num_elements = len(vectors[0])
        assert all(len(v) == num_elements for v in vectors), "different sizes"

        # th i-th element of the result is the sum of every vector[i]
        return [sum(vector[i] for vector in vectors)
                for i in range(num_elements)]

assert vector_sum([[1,2], [3,4], [5,6], [7,8]]) == [16,20]

In [None]:
# Matrix --> a 2D collection of numbers
# Matrices are important to us to represent a dataset consisting of multiple vectors
# Matrices can be used to represent binary relationships

In [None]:
"""

Imagine you work for a movie recommendation app. Each user has preferences (genre scores), 
and each movie has genre attributes. You want to:
	1.	Represent users and movies as vectors
	2.	Use dot products to find how well a movie matches a user’s taste
	3.	Use distance to compare user similarity

"""

In [1]:
# Represent Users and Movies as Vectors

# Each user vector = preference for genres: [Action, Comedy, Drama]
user_1 = [5, 1, 2]
user_2 = [1, 4, 4]

# Each movie vector = how much of each genre it has
movie_1 = [5, 2, 1]
movie_2 = [1, 5, 3]

In [None]:
# Write Some Vector Functions

from typing import List
import math

Vector = List[float]

def dot(v:Vector, w: Vector) -> float:
    return sum(v_i * w_i for v_i, w_i in zip(v,w))

def magnitude(v: Vector) -> float:
    return math.sqrt(dot(v, v))

def distance(v: Vector, w: Vector) -> float:
    return magnitude([v_i - w_i for v_i, w_i in zip(v, w)])

def normalize(v: Vector) -> Vector:
    mag = magnitude(v)
    if mag == 0:
        return v  # avoid division by 0
    return [v_i / mag for v_i in v]

def cosine_similarity(v: Vector, w: Vector) -> float:
    return dot(v, w) / (magnitude(v) * magnitude(w))

# CHat says these operations are generelized

Refresher

A vector is just a list of numbers
Vector arithmetic is used in feature engineering, weight updates and movement through data space
Do product lets you know how alighed two vectors are. It's used in similarity, projections, linear regression
Magnitude = length of the vector
Distance = how far two points are from each other.
Magnitude and Distance are used in clustering, nearest neighbors, recommendation systems


In [4]:
# Score how well a movie matches a user

print("User 1 x Movie 1:", dot(user_1, movie_1))
print("User 1 x Movie 2:", dot(user_1, movie_2))

print("User 2 x Movie 1:", dot(user_2, movie_1))
print("User 2 x Movie 2:", dot(user_2, movie_2))

User 1 x Movie 1: 29
User 1 x Movie 2: 16
User 2 x Movie 1: 17
User 2 x Movie 2: 33


In [8]:
# Compare How Similar Two Users Are

print("Distance between User 1 and User2:", round(distance(user_1, user_2),2))

Distance between User 1 and User2: 5.39


In [9]:
# Normalizing a Vector

def normalize(v: Vector) -> Vector:
    mag = magnitude(v)
    if mag == 0:
        return v  # avoid division by 0
    return [v_i / mag for v_i in v]


# Try normalizing your user and movie vectors
norm_user_1 = normalize(user_1)
norm_user_2 = normalize(user_2)
norm_movie_1 = normalize(movie_1)
norm_movie_2 = normalize(movie_2)

print("Normalized User 1:", norm_user_1)
print("Normalized Movie 1:", norm_movie_1)

Normalized User 1: [0.9128709291752769, 0.18257418583505536, 0.3651483716701107]
Normalized Movie 1: [0.9128709291752769, 0.3651483716701107, 0.18257418583505536]


# Why normalize? When we care what someone likes, not how much they like it
# What is cosine similarity -> Cosine similarity measures how aligned two vectors are—i.e., how similar their direction is, not their size.

In [10]:
def cosine_similarity(v: Vector, w: Vector) -> float:
    return dot(v, w) / (magnitude(v) * magnitude(w))

cos_sim = cosine_similarity(norm_user_1, norm_movie_1)
print("Cosine similarity:", cos_sim)

Cosine similarity: 0.9666666666666667


Cosine Similarity
Interpretation
1.0 -> Perfect match (same direction)
~0.9 -> Strong similarity
~0.5 -> Somewhat similar
0 -> No similarity (orthogonal)
< 0 -> Opposite preferences


In [11]:
users = [user_1, user_2]
movies = [movie_1, movie_2]

def recommend_movie(user: Vector, movies: List[Vector]) -> Vector:
        best_score = float('-inf')
        best_movie = None
        for movie in movies:
            score = dot(user, movie)
            if score > best_score:
                best_score = score
                best_movie = movie
        return best_movie

#Recommend for both users
print("Top movie for User 1:", recommend_movie(user_1, movies))
print("Top movie for User 2:", recommend_movie(user_2, movies))

Top movie for User 1: [5, 2, 1]
Top movie for User 2: [1, 5, 3]
