In [1]:
import pandas as pd
import numpy as np
import random

In [2]:
url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
column_names = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(url, sep='\t', names=column_names)

In [3]:
epsilon = 0.1
n_arms = ratings['movie_id'].nunique()
movie_ids = ratings['movie_id'].unique()
value_estimates = np.zeros(n_arms)
movie_counts = np.zeros(n_arms)

In [4]:
def select_movie():
    if random.uniform(0, 1) < epsilon:
        return np.random.choice(movie_ids)
    else:
        return movie_ids[np.argmax(value_estimates)]

In [5]:
def update_estimates(movie_id, reward):
    movie_index = np.where(movie_ids == movie_id)[0][0]
    movie_counts[movie_index] += 1
    n = movie_counts[movie_index]
    value_estimates[movie_index] += (reward - value_estimates[movie_index]) / n

In [6]:
n_rounds = 10000
for _ in range(n_rounds):
    selected_movie = select_movie()
    sample_rating = ratings[ratings['movie_id'] == selected_movie].sample(1)['rating'].values[0]
    update_estimates(selected_movie, sample_rating)

In [8]:
top_movies = movie_ids[np.argsort(value_estimates)[-10:][::-1]]
print("Top recommended movies:", list(top_movies))

Top recommended movies: [914, 1653, 1467, 1643, 1159, 851, 1155, 1500, 1449, 753]
