<a href="https://colab.research.google.com/github/fionatjahjono/ml_project/blob/main/recommender_system_collaborative_filtering_ps133.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
!pip install scikit-surprise
from flask import Flask, request, jsonify
import pandas as pd
from surprise import Reader, Dataset
from surprise.model_selection import train_test_split, KFold, GridSearchCV
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, SVD
from surprise import accuracy


In [None]:
# Load datasets
event_table = pd.read_csv('/content/event_table.csv')
sentiment_analysis = pd.read_csv('/content/sentiment_analysis.csv')
category_table = pd.read_csv('/content/category_table.csv')

In [None]:
# Merge data for collaborative filtering
merged_data = pd.merge(sentiment_analysis[['event_id', 'user_id', 'rating']], event_table[['event_id', 'event_name', 'category_id']],
                       on='event_id')
merged_data = pd.merge(merged_data, category_table, on='category_id')

In [None]:
# Create Surprise dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(merged_data[['user_id', 'event_name', 'rating']], reader)

In [None]:
# Split data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# Define a cross-validation iterator
kf = KFold(n_splits=5)

In [None]:
# Train models with GridSearchCV to find the best parameters
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=kf)

gs.fit(data)

In [None]:
# Best RMSE score
print(gs.best_score['rmse'])

In [None]:
# Combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

In [None]:
# We can now use the algorithm that yields the best rmse:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

In [None]:
# Function to generate event recommendations based on popularity
def generate_popularity_recommendations(n=10):
    event_popularity = merged_data.groupby(['event_name', 'category_name'])['rating'].agg(['mean', 'count']).sort_values(by='mean', ascending=False)
    top_events = event_popularity.head(n)
    return top_events

In [None]:
# Flask app
app = Flask(__name__)

@app.route('/recommend', methods=['POST'])
def recommend():
    user_id = request.json['user_id']
    n = request.json.get('n', 10)
    # Generate and print popularity recommendations
    popularity_recommendations = generate_popularity_recommendations(n)
    return jsonify(popularity_recommendations.to_dict())

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
# Test data before Flask
event_table = pd.read_csv('/content/event_table.csv')
sentiment_analysis = pd.read_csv('/content/sentiment_analysis.csv')
category_table = pd.read_csv('/content/category_table.csv')

In [None]:
# Merge data for collaborative filtering
merged_data = pd.merge(sentiment_analysis[['event_id', 'user_id', 'rating']], event_table[['event_id', 'event_name', 'category_id']],
                       on='event_id')
merged_data = pd.merge(merged_data, category_table, on='category_id')

In [None]:
# Create Surprise dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(merged_data[['user_id', 'event_name', 'rating']], reader)

In [None]:
# Split data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# Define a cross-validation iterator
kf = KFold(n_splits=5)

In [None]:
# Train models with GridSearchCV to find the best parameters
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=kf)

gs.fit(data)

In [None]:
# Best RMSE score
print(gs.best_score['rmse'])

In [None]:
# Combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

In [None]:
# We can now use the algorithm that yields the best rmse:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

In [None]:
# Function to generate event recommendations based on popularity
def generate_popularity_recommendations(n=10):
    event_popularity = merged_data.groupby(['event_name', 'category_name'])['rating'].agg(['mean', 'count']).sort_values(by='mean', ascending=False)
    top_events = event_popularity.head(n)
    return top_events

In [None]:
# Generate and print popularity recommendations
popularity_recommendations = generate_popularity_recommendations()
print('\nTop Event Popularity Recommendations:')
print(popularity_recommendations)