In [None]:



import pandas as pd import numpy as np import scipy.sparse as sp from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler import networkx as n



import pandas as pd
import numpy as np
import scipy.sparse as sp
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import networkx as nx
import surprise

class AdvancedMusicRecommendationSystem:
    def __init__(self, data_path):
        """
        Advanced recommendation system with multiple recommendation strategies
        """
        self.raw_data = pd.read_csv(data_path)
        self.processed_data = None
        self.recommendation_models = {}
    
    def preprocess_data(self):
        """
        Comprehensive data preprocessing for recommendation
        """
        df = self.raw_data.copy()
        
        # Clean and prepare data
        df['Artist'] = df['Artist'].fillna('Unknown')
        df['Genre'] = df['Genre'].fillna('Unknown')
        
        # Create combined text features for content-based filtering
        df['text_features'] = (
            df['Artist'] + ' ' + 
            df['Genre'] + ' ' + 
            df['Album']
        )
        
        # Popularity score calculation
        df['Popularity_Score'] = (
            df['Popularity'] * 
            (1 + np.log(df['Duration'] / df['Duration'].mean()))
        )
        
        # Temporal relevance
        df['Release_Date'] = pd.to_datetime(df['Release Date'])
        current_year = pd.Timestamp.now().year
        df['Age_Factor'] = 1 / (1 + (current_year - df['Release_Date'].dt.year))
        
        # Collaborative filtering preparation
        user_item_matrix = df.pivot_table(
            index='Artist', 
            columns='Title', 
            values='Popularity', 
            fill_value=0
        )
        
        # Network analysis preparation
        artist_genre_graph = self._create_artist_genre_network(df)
        
        self.processed_data = {
            'dataframe': df,
            'user_item_matrix': user_item_matrix,
            'artist_genre_graph': artist_genre_graph
        }
        
        return df
    
    def _create_artist_genre_network(self, df):
        """
        Create a network graph of artists and genres
        """
        G = nx.Graph()
        
        # Add artists and genres as nodes
        artists = df['Artist'].unique()
        genres = df['Genre'].unique()
        
        G.add_nodes_from(artists, type='artist')
        G.add_nodes_from(genres, type='genre')
        
        # Create edges based on genre associations
        for genre in genres:
            genre_artists = df[df['Genre'] == genre]['Artist'].unique()
            for artist in genre_artists:
                G.add_edge(artist, genre)
        
        return G
    
    def content_based_recommender(self, seed_track, top_n=5):
        """
        Content-based recommendation using TF-IDF
        """
        df = self.processed_data['dataframe']
        
        # TF-IDF Vectorization
        tfidf = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf.fit_transform(df['text_features'])
        
        # Find the index of the seed track
        seed_index = df[df['Title'] == seed_track].index[0]
        
        # Compute cosine similarity
        cosine_sim = cosine_similarity(tfidf_matrix[seed_index], tfidf_matrix)
        
        # Get top similar tracks
        similar_indices = cosine_sim[0].argsort()[::-1][1:top_n+1]
        recommendations = df.iloc[similar_indices]
        
        return recommendations[['Title', 'Artist', 'Genre', 'Popularity_Score']]
    
    def collaborative_filtering(self, target_artist, top_n=5):
        """
        Collaborative filtering using surprise library
        """
        df = self.processed_data['dataframe']
        
        # Prepare data for surprise library
        reader = surprise.Reader(rating_scale=(0, 100))
        data = surprise.Dataset.load_from_df(
            df[['Artist', 'Title', 'Popularity']].values, 
            reader
        )
        
        # Split data
        trainset, testset = surprise.model_selection.train_test_split(
            data, test_size=0.2
        )
        
        # Use SVD algorithm
        algo = surprise.SVD()
        algo.fit(trainset)
        
        # Find recommendations for target artist
        artist_tracks = df[df['Artist'] == target_artist]['Title'].unique()
        
        recommendations = []
        for track in artist_tracks:
            track_recommendations = []
            for other_track in df['Title'].unique():
                if other_track not in artist_tracks:
                    predicted_rating = algo.predict(target_artist, other_track).est
                    track_recommendations.append((other_track, predicted_rating))
            
            # Sort and get top recommendations
            track_recommendations.sort(key=lambda x: x[1], reverse=True)
            recommendations.extend(track_recommendations[:top_n])
        
        # Remove duplicates and get top N
        unique_recommendations = list(dict.fromkeys(recommendations))[:top_n]
        
        return pd.DataFrame(
            unique_recommendations, 
            columns=['Title', 'Predicted_Popularity']
        )
    
    def network_based_recommender(self, seed_artist, top_n=5):
        """
        Network-based recommendations using graph centrality
        """
        G = self.processed_data['artist_genre_graph']
        df = self.processed_data['dataframe']
        
        # Calculate betweenness centrality
        centrality = nx.betweenness_centrality(G)
        
        # Find artists connected to seed artist via genres
        connected_artists = []
        for artist in G.nodes():
            if artist != seed_artist and nx.has_path(G, seed_artist, artist):
                # Calculate path length and centrality
                path_length = nx.shortest_path_length(G, seed_artist, artist)
                artist_centrality = centrality.get(artist, 0)
                
                connected_artists.append((
                    artist, 
                    artist_centrality / path_length
                ))
        
        # Sort and get top recommendations
        connected_artists.sort(key=lambda x: x[1], reverse=True)
        top_artists = [artist for artist, _ in connected_artists[:top_n]]
        
        # Get tracks from top artists
        recommendations = df[df['Artist'].isin(top_artists)]
        
        return recommendations[['Title', 'Artist', 'Genre', 'Popularity_Score']]
    
    def hybrid_recommender(self, seed_track, top_n=5):
        """
        Combine multiple recommendation strategies
        """
        # Content-based recommendations
        content_based = self.content_based_recommender(seed_track, top_n)
        
        # Collaborative filtering recommendations
        seed_artist = self.processed_data['dataframe'][
            self.processed_data['dataframe']['Title'] == seed_track
        ]['Artist'].values[0]
        
        collaborative = self.collaborative_filtering(seed_artist, top_n)
        
        # Network-based recommendations
        network_based = self.network_based_recommender(seed_artist, top_n)
        
        # Combine and rank recommendations
        combined_recommendations = pd.concat([
            content_based, 
            collaborative, 
            network_based
        ])
        
        # Aggregate and rank
        recommendation_scores = combined_recommendations.groupby('Title').agg({
            'Popularity_Score': 'mean',
            'Artist': 'first',
            'Genre': 'first'
        }).reset_index()
        
        return recommendation_scores.sort_values(
            'Popularity_Score', 
            ascending=False
        ).head(top_n)

# Example usage
def main():
    recommender = AdvancedMusicRecommendationSystem('music_data.csv')
    recommender.preprocess_data()
    
    # Example recommendations
    print("Content-Based Recommendations:")
    print(recommender.content_based_recommender('Include name this.', top_n=5))
    
    print("\nCollaborative Filtering Recommendations:")
    print(recommender.collaborative_filtering('Patrick Anderson', top_n=5))
    
    print("\nNetwork-Based Recommendations:")
    print(recommender.network_based_recommender('Patrick Anderson', top_n=5))
    
    print("\nHybrid Recommendations:")
    print(recommender.hybrid_recommender('Include name this.', top_n=5))

if __name__ == "__main__":
    main()
