<a href="https://colab.research.google.com/github/itsmepriyabrata/priyabrata_ai_python/blob/main/recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

collaborative flitering

In [None]:
from surprise import KNNBasic
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import train_test_split
from surprise import accuracy

data = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(data, test_size=0.25)

sim_options = {
    'name': 'cosine',
    'user_based': True
}

algo = KNNBasic(sim_options=sim_options)
algo.fit(trainset)
predictions = algo.test(testset)
accuracy.rmse(predictions)

user_id = str(196)
item_id = str(302)
pred = algo.predict(user_id, item_id, r_ui=4, verbose=True)


Content-based filtering

In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

movies = {
    'title': ['The Matrix', 'The Matrix Reloaded', 'The Matrix Revolutions', 'The Godfather', 'The Dark Knight', 'Inception'],
    'genre': ['Action, Sci-Fi', 'Action, Sci-Fi', 'Action, Sci-Fi', 'Crime, Drama', 'Action, Crime, Drama', 'Action, Adventure, Sci-Fi']
}

df = pd.DataFrame(movies)
print("Movie DataFrame:")
print(df)

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['genre'])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
print("\nCosine Similarity Matrix:")
print(cosine_sim)

def get_recommendations(title, cosine_sim=cosine_sim):
    idx = df[df['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:4]
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

recommendations = get_recommendations('The Matrix')
print("\nRecommendations for 'The Matrix':")
print(recommendations)


Movie DataFrame:
                    title                      genre
0              The Matrix             Action, Sci-Fi
1     The Matrix Reloaded             Action, Sci-Fi
2  The Matrix Revolutions             Action, Sci-Fi
3           The Godfather               Crime, Drama
4         The Dark Knight       Action, Crime, Drama
5               Inception  Action, Adventure, Sci-Fi

Cosine Similarity Matrix:
[[1.         1.         1.         0.         0.21060348 0.701038  ]
 [1.         1.         1.         0.         0.21060348 0.701038  ]
 [1.         1.         1.         0.         0.21060348 0.701038  ]
 [0.         0.         0.         1.         0.91471225 0.        ]
 [0.21060348 0.21060348 0.21060348 0.91471225 1.         0.14764104]
 [0.701038   0.701038   0.701038   0.         0.14764104 1.        ]]

Recommendations for 'The Matrix':
1       The Matrix Reloaded
2    The Matrix Revolutions
5                 Inception
Name: title, dtype: object


Matrix factorization techniques

In [5]:
import numpy as np

class MatrixFactorization:
    def __init__(self, R, K, alpha, beta, iterations):
        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    def train(self):
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])
        self.samples = [(i, j, self.R[i, j]) for i in range(self.num_users) for j in range(self.num_items) if self.R[i, j] > 0]
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
        return training_process

    def mse(self):
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        for i, j, r in self.samples:
            prediction = self.get_prediction(i, j)
            e = (r - prediction)
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i, :])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j, :])

    def get_prediction(self, i, j):
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction

    def full_matrix(self):
        return self.b + self.b_u[:, np.newaxis] + self.b_i[np.newaxis:, ] + self.P.dot(self.Q.T)

R = np.array([
    [5, 3, 0, 1],
    [4, 0, 0, 1],
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])

mf = MatrixFactorization(R, K=2, alpha=0.1, beta=0.01, iterations=20)
training_process = mf.train()
print(mf.full_matrix())


[[5.05723399 2.78316384 7.14249152 1.0587515 ]
 [3.89254245 1.6218153  5.78247828 1.00649681]
 [1.12315857 0.72153566 2.74280182 5.01059543]
 [0.99726485 0.34352795 2.65129873 3.98273841]
 [2.87701184 1.62466409 4.58310464 3.88803962]]
