In [15]:
# Cell 1: Imports and Configurations
import pandas as pd
from annoy import AnnoyIndex
import os

# Constants
data_path = 'data/processed/processed_data.csv'
num_features = 11

In [16]:
# Cell 2: Functions for Building the Annoy Index
def build_annoy_index():
    print("Building Annoy index...")

    # Ensure the models directory exists
    os.makedirs('models', exist_ok=True)

    # Load processed data
    combined_df = pd.read_csv(data_path)

    # Extract features for Annoy
    feature_columns = ['listeners', 'playcount', 'danceability', 'energy', 'valence', 'popularity', 'tempo', 'acousticness', 'instrumentalness', 'liveness', 'speechiness']
    features = combined_df[feature_columns].fillna(0).values

    # Initialize Annoy index
    index = AnnoyIndex(num_features, 'angular')

    # Add items to the index
    for i, feature_vector in enumerate(features):
        index.add_item(i, feature_vector)

    # Build the index
    index.build(10)  # Number of trees
    index.save('models/annoy_index.ann')
    print("Annoy index built and saved successfully.")


In [17]:
# Cell 3: Build the Annoy Index
build_annoy_index()

Building Annoy index...
Annoy index built and saved successfully.


In [18]:
# Cell 4: Generating Recommendations
def generate_annoy_recommendations(item_id, num_recommendations=10):
    # Load the Annoy index
    index = AnnoyIndex(num_features, 'angular')
    index.load('models/annoy_index.ann')
    
    # Get nearest neighbors
    nearest_neighbors = index.get_nns_by_item(item_id, num_recommendations + 1)
    
    # Load processed data
    combined_df = pd.read_csv(data_path)
    
    # Get recommendations
    recommendations = combined_df.iloc[nearest_neighbors]
    
    return recommendations

def get_recommendations(artist_name, num_recommendations=10):
    # Load processed data
    combined_df = pd.read_csv(data_path)

    # Find the index of the artist
    artist_index = combined_df.index[combined_df['name'] == artist_name.lower().strip()].tolist()
    if not artist_index:
        print(f"Artist {artist_name} not found in the dataset.")
        return

    # Generate recommendations
    recommendations = generate_annoy_recommendations(artist_index[0], num_recommendations)
    print("Recommendations:\n", recommendations)

# Example usage
get_recommendations("Elis Regina", 5)

Recommendations:
            name  listeners  playcount  \
0   elis regina   4.358899   4.358899   
2             0  -0.229416  -0.229416   
19            0  -0.229416  -0.229416   
4             0  -0.229416  -0.229416   
15            0  -0.229416  -0.229416   
11            0  -0.229416  -0.229416   

                                                  bio  \
0   Elis Regina Carvalho Costa (March 17, 1945, Po...   
2                                                   0   
19                                                  0   
4                                                   0   
15                                                  0   
11                                                  0   

                           album_name release_date  total_tracks  \
0                           Trem Azul   2023-03-17            25   
2                       Dois Na Bossa   2022-08-19            10   
19                             Um Dia   2012-01-24            27   
4   Falso Brilhante (R