In [5]:
import pymysql
import numpy as np
from scipy.spatial.distance import cosine

def fetch_records_and_albums(connection):
    with connection.cursor() as cursor:
        cursor.execute("SELECT user_id, album_id FROM user_record")
        records = cursor.fetchall()
        cursor.execute("""
            SELECT id, music_acousticness, music_danceability, music_energy, 
                   music_liveness, music_loudness,
                   music_tempo, music_valence 
            FROM album
        """)
        albums = cursor.fetchall()
    return records, albums

def calculate_average_features(user_records, album_dict, feature_indices):
    feature_sums = np.zeros(len(feature_indices))
    count = 0

    for user_id, album_id in user_records:
        if album_id in album_dict:
            feature_vector = np.array(album_dict[album_id])
            selected_features = feature_vector[feature_indices]
            feature_sums += selected_features
            count += 1

    if count == 0:
        return np.zeros(len(feature_indices))
    else:
        return feature_sums / count

def main():
    connection = pymysql.connect(
        host='mitidb.cvm64ss6y2xv.ap-northeast-2.rds.amazonaws.com',
        user='minseo',
        password='Alstj!!809',
        database='mitiDB'
    )

    features_group1_indices = [1, 2, 4, 5, 6]  # danceability, energy, tempo, loudness, valence
    features_group2_indices = [0, 3]           # acousticness, liveness

    try:
        records, albums = fetch_records_and_albums(connection)
        album_dict = {album[0]: np.array(album[1:]) for album in albums}

        average_features_group1 = calculate_average_features(records, album_dict, features_group1_indices)
        average_features_group2 = calculate_average_features(records, album_dict, features_group2_indices)

        similarities_group1 = []
        similarities_group2 = []

        for album_id, features in album_dict.items():
            similarity_group1 = 1 - cosine(average_features_group1, features[features_group1_indices])
            similarity_group2 = 1 - cosine(average_features_group2, features[features_group2_indices])
            similarities_group1.append((album_id, similarity_group1))
            similarities_group2.append((album_id, similarity_group2))

        sorted_group1 = sorted(similarities_group1, key=lambda x: x[1], reverse=True)
        sorted_group2 = sorted(similarities_group2, key=lambda x: x[1], reverse=True)

        top_albums_set = set()
        combined_sorted = sorted_group1 + sorted_group2

        for album_id, _ in combined_sorted:
            if album_id in top_albums_set:
                continue
            top_albums_set.add(album_id)
            if len(top_albums_set) == 20:
                break

        with connection.cursor() as cursor:
            for user_id, _ in records:
                for album_id in top_albums_set:
                    try:
                        cursor.execute(
                            "INSERT INTO customized_rec (user_id, album_id) VALUES (%s, %s)",
                            (user_id, album_id)
                        )
                    except pymysql.IntegrityError:
                        continue

            connection.commit()

        print("Top 20 albums by cosine similarity have been inserted into customized_rec table.")

    except pymysql.MySQLError as e:
        print(f"Error: {e}")

    finally:
        connection.close()

if __name__ == "__main__":
    main()


Top 20 albums by cosine similarity have been inserted into customized_rec table.
