Wczytanie danych

In [1]:
from surprise import Dataset
from surprise import Reader
import pandas as pd

In [4]:
import numpy as np
from surprise import Dataset

# Wczytanie danych MovieLens 100k
data = Dataset.load_builtin('ml-100k')

# Konwersja danych na macierz user-item
raw_data = data.build_full_trainset()
n_users = raw_data.n_users
n_items = raw_data.n_items

# Tworzenie macierzy user-item
user_item_matrix = np.zeros((n_users, n_items))

for user_id, item_id, rating in raw_data.all_ratings():
    user_item_matrix[int(user_id)][int(item_id)] = rating

# Standaryzacja: binarna macierz 1/0 (obejrzane/nieobejrzane)
user_item_matrix = (user_item_matrix > 0).astype(np.float32)


Definicja struktury RBM

In [5]:
class RBM:
    def __init__(self, visible_units, hidden_units, learning_rate=0.1):
        self.visible_units = visible_units
        self.hidden_units = hidden_units
        self.learning_rate = learning_rate

        # Inicjalizacja wag i biasów
        self.weights = np.random.normal(0, 0.01, size=(visible_units, hidden_units))
        self.visible_bias = np.zeros(visible_units)
        self.hidden_bias = np.zeros(hidden_units)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    def sample_hidden(self, visible):
        # Oblicz prawdopodobieństwo aktywacji ukrytej warstwy
        hidden_activations = np.dot(visible, self.weights) + self.hidden_bias
        hidden_probs = self.sigmoid(hidden_activations)
        return hidden_probs, (hidden_probs > np.random.random(hidden_probs.shape)).astype(np.float32)

    def sample_visible(self, hidden):
        # Oblicz prawdopodobieństwo aktywacji widocznej warstwy
        visible_activations = np.dot(hidden, self.weights.T) + self.visible_bias
        visible_probs = self.sigmoid(visible_activations)
        return visible_probs, (visible_probs > np.random.random(visible_probs.shape)).astype(np.float32)

    def train(self, data, epochs=10, batch_size=10):
        for epoch in range(epochs):
            for i in range(0, data.shape[0], batch_size):
                batch = data[i:i+batch_size]

                # Forward pass
                hidden_probs, hidden_states = self.sample_hidden(batch)

                # Reconstruction
                visible_probs, _ = self.sample_visible(hidden_states)
                hidden_probs_reconstructed, _ = self.sample_hidden(visible_probs)

                # Aktualizacja wag i biasów
                self.weights += self.learning_rate * (
                    np.dot(batch.T, hidden_probs) - np.dot(visible_probs.T, hidden_probs_reconstructed)
                ) / batch_size
                self.visible_bias += self.learning_rate * np.mean(batch - visible_probs, axis=0)
                self.hidden_bias += self.learning_rate * np.mean(hidden_probs - hidden_probs_reconstructed, axis=0)

            print(f"Epoch {epoch + 1}/{epochs} completed")


Trenowanie modelu RBM

In [6]:
# Parametry RBM
visible_units = user_item_matrix.shape[1]  # Liczba filmów
hidden_units = 128  # Liczba neuronów ukrytych (można dostosować)
learning_rate = 0.1
epochs = 10
batch_size = 32

# Inicjalizacja modelu RBM
rbm = RBM(visible_units, hidden_units, learning_rate)

# Trenowanie RBM
rbm.train(user_item_matrix, epochs=epochs, batch_size=batch_size)


Epoch 1/10 completed
Epoch 2/10 completed
Epoch 3/10 completed
Epoch 4/10 completed
Epoch 5/10 completed
Epoch 6/10 completed
Epoch 7/10 completed
Epoch 8/10 completed
Epoch 9/10 completed
Epoch 10/10 completed


Generowanie rekomendacji

In [7]:
def recommend_movies(rbm, user_vector, top_n=10):
    # Przewidzenie ocen dla użytkownika
    hidden_probs, hidden_states = rbm.sample_hidden(user_vector)
    visible_probs, _ = rbm.sample_visible(hidden_states)
    
    # Sortowanie według prawdopodobieństw
    recommendations = np.argsort(-visible_probs)
    
    return recommendations[:top_n]

# Przykład: Generowanie rekomendacji dla pierwszego użytkownika
user_id = 0
user_vector = user_item_matrix[user_id].reshape(1, -1)

recommendations = recommend_movies(rbm, user_vector)
print(f"Rekomendacje dla użytkownika {user_id}: {recommendations}")


Rekomendacje dla użytkownika 0: [[ 289   89   49 ... 1626 1545 1584]]


Ewaluacja modelu

In [8]:
from sklearn.metrics import mean_squared_error

def evaluate_rbm(rbm, test_data):
    predictions = []
    actuals = []
    
    for user_id in range(test_data.shape[0]):
        user_vector = test_data[user_id].reshape(1, -1)
        hidden_probs, hidden_states = rbm.sample_hidden(user_vector)
        visible_probs, _ = rbm.sample_visible(hidden_states)
        
        predictions.append(visible_probs)
        actuals.append(user_vector)
    
    # Obliczenie RMSE
    predictions = np.vstack(predictions)
    actuals = np.vstack(actuals)
    mse = mean_squared_error(actuals, predictions)
    rmse = np.sqrt(mse)
    return rmse

# Obliczenie RMSE dla zbioru testowego
rmse = evaluate_rbm(rbm, user_item_matrix)
print(f"RMSE modelu RBM: {rmse}")


RMSE modelu RBM: 0.19393752693677194



Przykład działania RBM na konkretnym użytkowniku

In [9]:
# Funkcja, która pokazuje ocenione filmy i rekomendacje
def show_user_recommendations(user_id, rbm, user_item_matrix, top_n=10):
    user_vector = user_item_matrix[user_id].reshape(1, -1)
    
    # Filmy już ocenione przez użytkownika
    watched = np.where(user_vector.flatten() > 0)[0]
    
    # Generowanie rekomendacji
    recommendations = recommend_movies(rbm, user_vector, top_n=top_n)
    
    print(f"Użytkownik {user_id} obejrzał filmy: {watched}")
    print(f"Rekomendacje dla użytkownika {user_id}: {recommendations}")

# Przykład dla użytkownika o ID 0
show_user_recommendations(0, rbm, user_item_matrix, top_n=5)


Użytkownik 0 obejrzał filmy: [   0   10   83   86   92  179  221  289  291  302  321  329  363  365
  377  380  389  423  431  432  438  466  487  491  512  521  522  528
  550  632  649  656  665  672  751  834  947 1006 1045]
Rekomendacje dla użytkownika 0: [[ 289   49   77 ... 1521 1087 1584]]


In [13]:
import numpy as np
import pandas as pd

# Wczytanie pliku z nazwami filmów
column_names = ['movie_id', 'title', 'release_date', 'video_release_date', 'url'] + [f'genre_{i}' for i in range(19)]
movies = pd.read_csv('../ml-100k/u.item', sep='|', header=None, encoding='latin-1', names=column_names, usecols=['movie_id', 'title'])

# Klasa RBM
class RBM:
    def __init__(self, visible_units, hidden_units, learning_rate=0.1):
        self.visible_units = visible_units
        self.hidden_units = hidden_units
        self.learning_rate = learning_rate
        self.weights = np.random.normal(0, 0.01, size=(visible_units, hidden_units))
        self.visible_bias = np.zeros(visible_units)
        self.hidden_bias = np.zeros(hidden_units)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sample_hidden(self, visible):
        hidden_activations = np.dot(visible, self.weights) + self.hidden_bias
        hidden_probs = self.sigmoid(hidden_activations)
        return hidden_probs, (hidden_probs > np.random.random(hidden_probs.shape)).astype(np.float32)

    def sample_visible(self, hidden):
        visible_activations = np.dot(hidden, self.weights.T) + self.visible_bias
        visible_probs = self.sigmoid(visible_activations)
        return visible_probs, (visible_probs > np.random.random(visible_probs.shape)).astype(np.float32)

    def train(self, data, epochs=10, batch_size=10):
        for epoch in range(epochs):
            for i in range(0, data.shape[0], batch_size):
                batch = data[i:i + batch_size]
                hidden_probs, hidden_states = self.sample_hidden(batch)
                visible_probs, _ = self.sample_visible(hidden_states)
                hidden_probs_reconstructed, _ = self.sample_hidden(visible_probs)
                self.weights += self.learning_rate * (
                    np.dot(batch.T, hidden_probs) - np.dot(visible_probs.T, hidden_probs_reconstructed)
                ) / batch_size
                self.visible_bias += self.learning_rate * np.mean(batch - visible_probs, axis=0)
                self.hidden_bias += self.learning_rate * np.mean(hidden_probs - hidden_probs_reconstructed, axis=0)
            print(f"Epoch {epoch + 1}/{epochs} completed")

# Wczytanie danych MovieLens
from surprise import Dataset

data = Dataset.load_builtin('ml-100k')
raw_data = data.build_full_trainset()
n_users = raw_data.n_users
n_items = raw_data.n_items

# Tworzenie macierzy użytkownik-przedmiot
user_item_matrix = np.zeros((n_users, n_items))
for user_id, item_id, rating in raw_data.all_ratings():
    user_item_matrix[int(user_id)][int(item_id)] = rating
user_item_matrix = (user_item_matrix > 0).astype(np.float32)

# Trenowanie RBM
visible_units = user_item_matrix.shape[1]
hidden_units = 128
learning_rate = 0.1
epochs = 10
batch_size = 32

rbm = RBM(visible_units, hidden_units, learning_rate)
rbm.train(user_item_matrix, epochs=epochs, batch_size=batch_size)

# Funkcja rekomendacji
def recommend_movies(rbm, user_vector, top_n=5):
    hidden_probs, hidden_states = rbm.sample_hidden(user_vector)
    visible_probs, _ = rbm.sample_visible(hidden_states)
    recommendations = np.argsort(-visible_probs.flatten())
    return recommendations[:top_n]

# Wyświetlanie rekomendacji dla użytkownika
def show_user_recommendations_with_titles(user_id, rbm, user_item_matrix, movies, top_n=5):
    user_vector = user_item_matrix[user_id].reshape(1, -1)
    watched = np.where(user_vector.flatten() > 0)[0] + 1  # Dopasowanie indeksów
    watched_titles = movies[movies['movie_id'].isin(watched)]['title'].tolist()
    recommendations = recommend_movies(rbm, user_vector, top_n=top_n) + 1  # Dopasowanie indeksów
    recommended_titles = movies[movies['movie_id'].isin(recommendations)]['title'].tolist()

    print(f"Użytkownik {user_id} obejrzał filmy:")
    for title in watched_titles:
        print(f" - {title}")
    print("\nRekomendacje dla użytkownika:")
    for i, title in enumerate(recommended_titles, start=1):
        print(f"{i}. {title}")

# Przykład dla użytkownika o ID 0
show_user_recommendations_with_titles(0, rbm, user_item_matrix, movies, top_n=5)


Epoch 1/10 completed
Epoch 2/10 completed
Epoch 3/10 completed
Epoch 4/10 completed
Epoch 5/10 completed
Epoch 6/10 completed
Epoch 7/10 completed
Epoch 8/10 completed
Epoch 9/10 completed
Epoch 10/10 completed
Użytkownik 0 obejrzał filmy:
 - Toy Story (1995)
 - Seven (Se7en) (1995)
 - Robert A. Heinlein's The Puppet Masters (1994)
 - Searching for Bobby Fischer (1993)
 - Welcome to the Dollhouse (1995)
 - Apocalypse Now (1979)
 - Star Trek: First Contact (1996)
 - Fierce Creatures (1997)
 - Rosewood (1997)
 - Ulee's Gold (1997)
 - Murder at 1600 (1997)
 - 187 (1997)
 - Ace Ventura: When Nature Calls (1995)
 - Dangerous Minds (1995)
 - Miracle on 34th Street (1994)
 - Muriel's Wedding (1994)
 - Fear of a Black Hat (1993)
 - Children of the Corn: The Gathering (1996)
 - Fantasia (1940)
 - Heathers (1989)
 - Amityville: A New Generation (1993)
 - Bronx Tale, A (1993)
 - Sunset Blvd. (1950)
 - East of Eden (1955)
 - Third Man, The (1949)
 - Down by Law (1986)
 - Cool Hand Luke (1967)
 - M