In [3]:
%load_ext autoreload
%autoreload 2

In [1]:
import sys
sys.path.append('../lib')  # Import the lib directory to access the functions

import data_generation as dg  # Importing the functions
import pandas as pd

# Load the movie dataset
df_movies = pd.read_csv('../data/imdb_clean.csv')  # Ensure the CSV file is available
df_movies.index = df_movies.index + 1  # Ensure the index starts at 1 to simulate movie_id

# Create a DataFrame of users with usernames and passwords
num_users = 1000
df_users = dg.generate_users(num_users)

# Define available emotions
emotions = ['Happy', 'Sad', 'Excited', 'Relaxed', 'Scared', 'Inspired', 'Sweet']

# Classify users into active and less active
active_users, less_active_users = dg.classify_users(df_users)

# Generate interactions
df_interactions = dg.generate_interactions(50000, df_movies, df_users, active_users, less_active_users, emotions)

# Generate favorites (30% of the viewed movies)
df_favorites = dg.generate_favorites(df_interactions)

# Generate ratings (50% of the favorite movies)
df_ratings = dg.generate_ratings(df_favorites)

# Save the generated data in CSV files
df_users.to_csv('../data/users.csv', index=False)
df_interactions.to_csv('../data/interactions.csv', index=False)
df_favorites.to_csv('../data/favorites.csv', index=False)
df_ratings.to_csv('../data/ratings.csv', index=False)

# MySQL connection
conn, cursor = dg.connect_to_mysql()

# Insert the generated data into MySQL
dg.insert_users(df_users, conn, cursor)
dg.insert_interactions(df_interactions, conn, cursor)
dg.insert_favorites(df_favorites, conn, cursor)
dg.insert_ratings(df_ratings, conn, cursor)

# Close the MySQL connection
conn.close()