Importing Libraries and Mounting Google Drive

In [None]:
import gym
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
from sklearn.preprocessing import StandardScaler


Loading Datasets

In [None]:
# Load datasets
music_data = pd.read_csv('/content/drive/MyDrive/FYP/music_filtered_data.csv')
books_data = pd.read_csv('/content/drive/MyDrive/FYP/book_filtered_data.csv')
movies_data = pd.read_csv('/content/drive/MyDrive/FYP/movie_filtered_data.csv')

print(music_data.head())
print(books_data.head())
print(movies_data.head())


Preprocessing the Data

In [None]:
def preprocess_data(df):
    # Handle missing values
    df = df.fillna(method='ffill')
    
    # Identify numeric features
    numeric_features = df.select_dtypes(include=[np.number])
    
    if not numeric_features.empty:
        # Normalize numeric features
        scaler = StandardScaler()
        df[numeric_features.columns] = scaler.fit_transform(numeric_features)
    
    # One-hot encode categorical features
    df = pd.get_dummies(df)
    
    return df

# Preprocess the data
books_data = preprocess_data(books_data)
music_data = preprocess_data(music_data)
movies_data = preprocess_data(movies_data)

# Display the preprocessed data
print(books_data.head())
print(music_data.head())
print(movies_data.head())


Defining the Environment

In [None]:
class RecommenderEnv:
    def __init__(self, music_data, books_data, movies_data, emotional_states):
        self.music_data = music_data
        self.books_data = books_data
        self.movies_data = movies_data
        self.emotional_states = emotional_states
        self.state = None
        self.data = {
            'music': self.music_data,
            'books': self.books_data,
            'movies': self.movies_data
        }
        self.action_space = {
            'music': len(self.music_data),
            'books': len(self.books_data),
            'movies': len(self.movies_data)
        }

    def reset(self):
        self.state = np.random.choice(self.emotional_states, size=(1, len(self.emotional_states)))
        return self.state

    def step(self, action, item_type, user_feedback=None):
        action = action % self.action_space[item_type]  # Ensure the action is within the valid range
        next_state, reward, done = self._simulate_interaction(action, item_type, user_feedback)
        return next_state, reward, done

    def _simulate_interaction(self, action, item_type, user_feedback):
        next_state = np.random.choice(self.emotional_states, size=(1, len(self.emotional_states)))
        
        if user_feedback is not None:
            reward = 1 if user_feedback == 'like' else -1
        else:
            reward = np.random.rand()
        
        done = np.random.rand() < 0.1
        return next_state, reward, done


Defining the Actor and Critic Networks

In [None]:
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, action_dim)
    
    def forward(self, state):
        x = torch.relu(self.fc1(state))
        action_probs = torch.softmax(self.fc2(x), dim=-1)
        return action_probs

class Critic(nn.Module):
    def __init__(self, state_dim):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 1)
    
    def forward(self, state):
        x = torch.relu(self.fc1(state))
        value = self.fc2(x)
        return value


Training the Model

In [None]:
from torch.distributions import Categorical

def train_with_feedback(env, actor, critic, actor_optimizer, critic_optimizer, episodes=1000, gamma=0.99):
    item_types = ['music', 'books', 'movies']
    
    for episode in range(episodes):
        state = env.reset()
        state = torch.FloatTensor(state)
        done = False
        episode_reward = 0
        
        while not done:
            item_type = np.random.choice(item_types)
            action_probs = actor(state)
            m = Categorical(action_probs)
            action = m.sample()
            
            # Simulate user feedback
            user_feedback = np.random.choice(['like', 'dislike'])
            
            next_state, reward, done = env.step(action.item(), item_type, user_feedback)
            next_state = torch.FloatTensor(next_state)
            reward = torch.FloatTensor([reward])

            # Critic update
            value = critic(state)
            next_value = critic(next_state)
            target = reward + (1 - done) * gamma * next_value
            critic_loss = (target - value).pow(2).mean()
            critic_optimizer.zero_grad()
            critic_loss.backward()
            critic_optimizer.step()

            # Actor update
            advantage = target - value
            actor_loss = -m.log_prob(action) * advantage.detach()
            actor_optimizer.zero_grad()
            actor_loss.backward()
            actor_optimizer.step()

            state = next_state
            episode_reward += reward.item()
        
        if episode % 10 == 0:
            print(f'Episode {episode}, Reward: {episode_reward}')

# Example usage
emotional_states = np.arange(5)  # Assume 5 different emotional states

env = RecommenderEnv(music_data, books_data, movies_data, emotional_states)
state_dim = len(emotional_states)
action_dim = max(len(music_data), len(books_data), len(movies_data))

actor = Actor(state_dim, action_dim)
critic = Critic(state_dim)

actor_optimizer = optim.Adam(actor.parameters(), lr=0.001)
critic_optimizer = optim.Adam(critic.parameters(), lr=0.001)

train_with_feedback(env, actor, critic, actor_optimizer, critic_optimizer)


Interactive Recommendation System

In [None]:
def get_user_feedback():
    feedback = input("Did you like the recommendation? (yes/no): ").strip().lower()
    return 'like' if feedback == 'yes' else 'dislike'

def interactive_recommendation_system(env, actor):
    state = env.reset()
    state = torch.FloatTensor(state)
    done = False

    while not done:
        action_probs = actor(state)
        m = Categorical(action_probs)
        action = m.sample()

        item_type = np.random.choice(['music', 'books', 'movies'])  # Randomly choose item type
        action = action.item() % env.action_space[item_type]  # Ensure the action is within the valid range
        recommendation = env.data[item_type].iloc[action]
        print(f"Recommended {item_type[:-1]}: {recommendation}")

        user_feedback = get_user_feedback()
        next_state, reward, done = env.step(action, item_type, user_feedback)
        next_state = torch.FloatTensor(next_state)

        state = next_state

# Example usage for interactive system
interactive_recommendation_system(env, actor)
