In [7]:
# [Cell 1] - Import required libraries
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
from datetime import datetime
import numpy as np

In [10]:
# [Cell 2] - Load and process Spotify data with genres
def get_track_genres(sp, track_name, artist_name):
    """
    Get genre information for a track using Spotify API with error handling
    """
    try:
        # Search for the track
        results = sp.search(q=f"track:{track_name} artist:{artist_name}", type='track', limit=1)
        
        if not results['tracks']['items']:
            return []
        
        # Get artist ID
        artist_id = results['tracks']['items'][0]['artists'][0]['id']
        
        # Get artist details including genres
        artist = sp.artist(artist_id)
        return artist['genres']
    
    except Exception as e:
        print(f"Error getting genres for {track_name} by {artist_name}: {str(e)}")
        return []

def process_spotify_data(client_id, client_secret):
    """
    Process Spotify data with API calls and error handling
    """
    # Initialize Spotify client
    client_credentials_manager = SpotifyClientCredentials(
        client_id=client_id,
        client_secret=client_secret
    )
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    
    # Read streaming history
    with open('StreamingHistory_music_2.json', 'r', encoding='utf-8') as f:
        spotify_data = json.load(f)
    
    # Convert to DataFrame
    df = pd.DataFrame(spotify_data)
    
    # Get unique artist-track combinations
    unique_tracks = df[['artistName', 'trackName']].drop_duplicates()
    
    # Process tracks in batches with delay to avoid timeout
    batch_size = 10
    processed_tracks = []
    
    for i in range(0, len(unique_tracks), batch_size):
        batch = unique_tracks.iloc[i:i+batch_size]
        
        for _, row in batch.iterrows():
            genres = get_track_genres(sp, row['trackName'], row['artistName'])
            processed_tracks.append({
                'artist': row['artistName'],
                'track': row['trackName'],
                'genres': genres
            })
        
        # Add delay between batches
        time.sleep(1)
    
    # Create genre mapping dictionary
    genre_mapping = {
        (track['artist'], track['track']): track['genres']
        for track in processed_tracks
    }
    
    # Add genres to original DataFrame
    df['date'] = pd.to_datetime(df['endTime']).dt.date
    df['genres'] = df.apply(lambda row: genre_mapping.get((row['artistName'], row['trackName']), []), axis=1)
    
    return df

In [11]:
# [Cell 3] - Process spending data
def get_spending_data():
    """
    Read and process bank transaction data
    """
    bank_df = pd.read_excel('HesapHareketleri_28.11.2024_0124383 (1).xlsx', 
                           skiprows=8,
                           names=['Date', 'Time', 'Amount', 'Balance', 'Description', 'Receipt'])
    
    bank_df = bank_df[bank_df['Date'].str.match(r'\d{2}\.\d{2}\.\d{4}', na=False)]
    bank_df['date'] = pd.to_datetime(bank_df['Date'], format='%d.%m.%Y').dt.date
    bank_df['Amount'] = pd.to_numeric(bank_df['Amount'], errors='coerce')
    
    daily_spending = bank_df.groupby('date')['Amount'].sum().reset_index()
    return daily_spending

In [12]:
# [Cell 4] - Analyze genre-spending relationships
def analyze_genre_spending(spotify_df, spending_df):
    """
    Analyze relationship between genres and spending
    """
    # Explode genres list to get one row per genre
    genre_df = spotify_df.explode('genres')
    genre_df = genre_df.rename(columns={'genres': 'genre'})
    
    # Remove rows where genre is None
    genre_df = genre_df.dropna(subset=['genre'])
    
    # Calculate daily listening time per genre
    genre_daily = genre_df.groupby(['date', 'genre'])['msPlayed'].sum().reset_index()
    genre_daily['minutes_played'] = genre_daily['msPlayed'] / (1000 * 60)
    
    # Merge with spending data
    merged_df = pd.merge(genre_daily, spending_df, on='date', how='inner')
    
    # Calculate statistics for each genre
    genre_stats = []
    min_days = 5  # Minimum number of days to consider
    
    for genre in merged_df['genre'].unique():
        genre_data = merged_df[merged_df['genre'] == genre]
        if len(genre_data) >= min_days:
            correlation = genre_data['minutes_played'].corr(genre_data['Amount'])
            avg_spending = genre_data['Amount'].mean()
            
            genre_stats.append({
                'genre': genre,
                'correlation': correlation,
                'total_minutes': genre_data['minutes_played'].sum(),
                'avg_spending': avg_spending,
                'days_listened': len(genre_data)
            })
    
    return pd.DataFrame(genre_stats)

In [13]:
# [Cell 5] - Create visualizations
def create_visualizations(genre_stats_df):
    """
    Create genre analysis visualizations
    """
    # Sort by absolute correlation
    genre_stats_df['abs_correlation'] = abs(genre_stats_df['correlation'])
    genre_stats_df = genre_stats_df.sort_values('abs_correlation', ascending=False)
    
    # Select top genres by listening time
    top_genres = genre_stats_df.nlargest(10, 'total_minutes')
    
    # Create figure
    plt.style.use('default')
    fig, axes = plt.subplots(2, 1, figsize=(12, 12))
    
    # Plot 1: Correlations
    sns.barplot(data=top_genres,
                x='correlation',
                y='genre',
                ax=axes[0])
    axes[0].set_title('Top 10 Genre-Spending Correlations')
    axes[0].set_xlabel('Correlation Coefficient')
    axes[0].axvline(x=0, color='black', linestyle='-', alpha=0.2)
    
    # Plot 2: Average spending by genre
    sns.barplot(data=top_genres,
                x='avg_spending',
                y='genre',
                ax=axes[1])
    axes[1].set_title('Average Daily Spending by Genre')
    axes[1].set_xlabel('Average Spending (TL)')
    
    plt.tight_layout()
    return fig

In [1]:
# [Cell 2] - Load and process Spotify data with genres
def get_track_genres(sp, track_name, artist_name):
    """
    Get genre information for a track using Spotify API with error handling
    """
    try:
        # Search for the track
        results = sp.search(q=f"track:{track_name} artist:{artist_name}", type='track', limit=1)
        
        if not results['tracks']['items']:
            print(f"No results found for {track_name} by {artist_name}")
            return None
        
        # Get artist ID
        artist_id = results['tracks']['items'][0]['artists'][0]['id']
        
        # Get artist details including genres
        artist = sp.artist(artist_id)
        print(f"Found genres for {artist_name}: {artist['genres']}")
        return artist['genres']
    
    except Exception as e:
        print(f"Error getting genres for {track_name} by {artist_name}: {str(e)}")
        return None

def process_spotify_data(client_id, client_secret):
    """
    Process Spotify data with API calls and error handling
    """
    # Initialize Spotify client with custom settings
    client_credentials_manager = SpotifyClientCredentials(
        client_id=client_id,
        client_secret=client_secret
    )
    sp = spotipy.Spotify(
        client_credentials_manager=client_credentials_manager,
        requests_timeout=30
    )
    
    # Read streaming history
    with open('StreamingHistory_music_2.json', 'r', encoding='utf-8') as f:
        spotify_data = json.load(f)
    
    # Convert to DataFrame
    df = pd.DataFrame(spotify_data)
    
    # Get unique artist-track combinations
    unique_tracks = df[['artistName', 'trackName']].drop_duplicates()
    print(f"Total unique tracks to process: {len(unique_tracks)}")
    
    # Process tracks in batches
    batch_size = 5  # Process 5 tracks at a time
    processed_tracks = []
    
    for i in range(0, len(unique_tracks), batch_size):
        batch = unique_tracks.iloc[i:i+batch_size]
        batch_num = i//batch_size + 1
        total_batches = len(unique_tracks)//batch_size + 1
        print(f"\nProcessing batch {batch_num}/{total_batches}")
        
        for _, row in batch.iterrows():
            genres = get_track_genres(sp, row['trackName'], row['artistName'])
            if genres:
                processed_tracks.append({
                    'artist': row['artistName'],
                    'track': row['trackName'],
                    'genres': genres
                })
            time.sleep(1)  # Wait 1 second between tracks
        
        time.sleep(2)  # Wait 2 seconds between batches
        
        # Save progress after each batch
        with open('genre_progress.json', 'w', encoding='utf-8') as f:
            json.dump(processed_tracks, f, ensure_ascii=False, indent=2)
    
    print(f"\nSuccessfully processed {len(processed_tracks)} tracks")
    
    # Create genre mapping dictionary
    genre_mapping = {
        (track['artist'], track['track']): track['genres']
        for track in processed_tracks
        if track['genres']
    }
    
    # Add genres to original DataFrame
    df['date'] = pd.to_datetime(df['endTime']).dt.date
    df['genres'] = df.apply(lambda row: genre_mapping.get((row['artistName'], row['trackName']), []), axis=1)
    
    return df# [Cell 1] - Import required libraries
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
from datetime import datetime
import numpy as np

# [Cell 2] - Load and process Spotify data with genres
def get_track_genres(sp, track_name, artist_name):
    """
    Get genre information for a track using Spotify API with error handling
    """
    try:
        time.sleep(1)  # Add delay between API calls
        # Search for the track
        results = sp.search(q=f"track:{track_name} artist:{artist_name}", type='track', limit=1)
        
        if not results['tracks']['items']:
            print(f"No results found for {track_name} by {artist_name}")
            return None
        
        # Get artist ID
        artist_id = results['tracks']['items'][0]['artists'][0]['id']
        
        time.sleep(1)  # Add delay between API calls
        # Get artist details including genres
        artist = sp.artist(artist_id)
        return artist['genres']
    
    except Exception as e:
        print(f"Error getting genres for {track_name} by {artist_name}: {str(e)}")
        return None

def process_spotify_data(client_id, client_secret):
    """
    Process Spotify data with API calls and error handling
    """
    # Initialize Spotify client with custom settings
    client_credentials_manager = SpotifyClientCredentials(
        client_id=client_id,
        client_secret=client_secret
    )
    sp = spotipy.Spotify(
        client_credentials_manager=client_credentials_manager,
        requests_timeout=30  # Increased timeout for all requests
    )
    
    # Read streaming history
    with open('StreamingHistory_music_2.json', 'r', encoding='utf-8') as f:
        spotify_data = json.load(f)
    
    # Convert to DataFrame
    df = pd.DataFrame(spotify_data)
    
    # Get unique artist-track combinations and apply some filtering
    unique_tracks = df[['artistName', 'trackName']].drop_duplicates()
    print(f"Total unique tracks to process: {len(unique_tracks)}")
    
    # Process tracks in smaller batches with longer delay
    batch_size = 3  # Even smaller batch size
    processed_tracks = []
    
    for i in range(0, len(unique_tracks), batch_size):
        batch = unique_tracks.iloc[i:i+batch_size]
        batch_num = i//batch_size + 1
        total_batches = len(unique_tracks)//batch_size + 1
        print(f"\nProcessing batch {batch_num}/{total_batches}")
        
        for _, row in batch.iterrows():
            print(f"Getting genres for: {row['trackName']} by {row['artistName']}")
            genres = get_track_genres(sp, row['trackName'], row['artistName'])
            if genres:
                processed_tracks.append({
                    'artist': row['artistName'],
                    'track': row['trackName'],
                    'genres': genres
                })
        
        time.sleep(3)  # Even longer delay between batches
    
    print(f"\nSuccessfully processed {len(processed_tracks)} tracks")
    
    # Create genre mapping dictionary
    genre_mapping = {
        (track['artist'], track['track']): track['genres']
        for track in processed_tracks
        if track['genres']  # Only include tracks where we successfully got genres
    }
    
    # Add genres to original DataFrame
    df['date'] = pd.to_datetime(df['endTime']).dt.date
    df['genres'] = df.apply(lambda row: genre_mapping.get((row['artistName'], row['trackName']), []), axis=1)
    
    return df

# [Cell 3] - Process spending data
def get_spending_data():
    """
    Read and process bank transaction data
    """
    bank_df = pd.read_excel('HesapHareketleri_28.11.2024_0124383 (1).xlsx', 
                           skiprows=8,
                           names=['Date', 'Time', 'Amount', 'Balance', 'Description', 'Receipt'])
    
    bank_df = bank_df[bank_df['Date'].str.match(r'\d{2}\.\d{2}\.\d{4}', na=False)]
    bank_df['date'] = pd.to_datetime(bank_df['Date'], format='%d.%m.%Y').dt.date
    bank_df['Amount'] = pd.to_numeric(bank_df['Amount'], errors='coerce')
    
    daily_spending = bank_df.groupby('date')['Amount'].sum().reset_index()
    return daily_spending

# [Cell 4] - Analyze genre-spending relationships
def analyze_genre_spending(spotify_df, spending_df):
    """
    Analyze relationship between genres and spending
    """
    # Explode genres list to get one row per genre
    genre_df = spotify_df.explode('genres')
    genre_df = genre_df.rename(columns={'genres': 'genre'})
    
    # Remove rows where genre is None
    genre_df = genre_df.dropna(subset=['genre'])
    
    # Calculate daily listening time per genre
    genre_daily = genre_df.groupby(['date', 'genre'])['msPlayed'].sum().reset_index()
    genre_daily['minutes_played'] = genre_daily['msPlayed'] / (1000 * 60)
    
    # Merge with spending data
    merged_df = pd.merge(genre_daily, spending_df, on='date', how='inner')
    
    # Calculate statistics for each genre
    genre_stats = []
    min_days = 5  # Minimum number of days to consider
    
    for genre in merged_df['genre'].unique():
        genre_data = merged_df[merged_df['genre'] == genre]
        if len(genre_data) >= min_days:
            correlation = genre_data['minutes_played'].corr(genre_data['Amount'])
            avg_spending = genre_data['Amount'].mean()
            
            genre_stats.append({
                'genre': genre,
                'correlation': correlation,
                'total_minutes': genre_data['minutes_played'].sum(),
                'avg_spending': avg_spending,
                'days_listened': len(genre_data)
            })
    
    return pd.DataFrame(genre_stats)

# [Cell 5] - Create visualizations
def create_visualizations(genre_stats_df):
    """
    Create genre analysis visualizations
    """
    # Sort by absolute correlation
    genre_stats_df['abs_correlation'] = abs(genre_stats_df['correlation'])
    genre_stats_df = genre_stats_df.sort_values('abs_correlation', ascending=False)
    
    # Select top genres by listening time
    top_genres = genre_stats_df.nlargest(10, 'total_minutes')
    
    # Create figure
    plt.style.use('default')
    fig, axes = plt.subplots(2, 1, figsize=(12, 12))
    
    # Plot 1: Correlations
    sns.barplot(data=top_genres,
                x='correlation',
                y='genre',
                ax=axes[0])
    axes[0].set_title('Top 10 Genre-Spending Correlations')
    axes[0].set_xlabel('Correlation Coefficient')
    axes[0].axvline(x=0, color='black', linestyle='-', alpha=0.2)
    
    # Plot 2: Average spending by genre
    sns.barplot(data=top_genres,
                x='avg_spending',
                y='genre',
                ax=axes[1])
    axes[1].set_title('Average Daily Spending by Genre')
    axes[1].set_xlabel('Average Spending (TL)')
    
    plt.tight_layout()
    return fig

# [Cell 6] - Main execution
def main():
    # Spotify API credentials
    CLIENT_ID = '82d3f15f60ee4fd08d64f23a2b2bd8de'
    CLIENT_SECRET = '5b892d88f74a4465af1a66d81c571d15'
    
    try:
        # Get Spotify data with genres
        spotify_df = process_spotify_data(CLIENT_ID, CLIENT_SECRET)
        
        # Get spending data
        spending_df = get_spending_data()
        
        # Analyze relationships
        genre_stats = analyze_genre_spending(spotify_df, spending_df)
        
        # Create and save visualizations
        fig = create_visualizations(genre_stats)
        plt.savefig('genre_spending_analysis.png', dpi=300, bbox_inches='tight')
        plt.close()
        
        # Print results
        print("\nGenre-Spending Analysis Results:")
        print(f"\nTotal genres analyzed: {len(genre_stats)}")
        print("\nTop positive correlations:")
        print(genre_stats.nlargest(5, 'correlation')[['genre', 'correlation', 'avg_spending']].round(2))
        print("\nTop negative correlations:")
        print(genre_stats.nsmallest(5, 'correlation')[['genre', 'correlation', 'avg_spending']].round(2))
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()

Total unique tracks to process: 1896

Processing batch 1/633
Getting genres for: Zor by Lvbel C5
Getting genres for: Beni Her Yerden Engellemiş by cakal
Getting genres for: Generationwhy by ZHU

Processing batch 2/633
Getting genres for: Hate It Or Love It by The Game
Getting genres for: Cuma by cakal
Getting genres for: Soktuğu Duruma Bak by Murat Dalkılıç

Processing batch 3/633
Getting genres for: Anlamazdın by Ayla Dikmen
Getting genres for: Uzun Lafın Kısası by Ozan Doğulu
Getting genres for: Birtanem - Night Club Mix by Yaşar

Processing batch 4/633
Getting genres for: Bir İhtimal Biliyorum by Gülşen
Getting genres for: Marlon Brando by Zeynep Bastık
Getting genres for: Salty Sip by Hayzei

Processing batch 5/633
Getting genres for: Vent by NOIT
Getting genres for: Something About You by Hayden James
Getting genres for: Sığamıyorum by Bengü

Processing batch 6/633
Getting genres for: Jest Oldu by Mustafa Sandal
Getting genres for: Kalbim (Slow Versiyon) by Bengü
Getting genres fo

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'track:5% TINT artist:Travis Scott', 'limit': 1, 'offset': 0, 'type': 'track', 'market': None} returned 400 due to Invalid string


Error getting genres for 5% TINT by Travis Scott: http status: 400, code:-1 - https://api.spotify.com/v1/search?q=track%3A5%25+TINT+artist%3ATravis+Scott&limit=1&offset=0&type=track:
 Invalid string, reason: None
Getting genres for: I KNOW ? by Travis Scott
Getting genres for: Break from Toronto by PARTYNEXTDOOR

Processing batch 425/633
Getting genres for: OUT WEST (feat. Young Thug) by JACKBOYS
Getting genres for: Sprinter by Dave
Getting genres for: ASTROTHUNDER by Travis Scott

Processing batch 426/633
Getting genres for: BAND4BAND (feat. Lil Baby) by Central Cee
Getting genres for: LOT OF ME by Lil Tecca
Getting genres for: Raindrops (Insane) [with Travis Scott] by Metro Boomin

Processing batch 427/633
Getting genres for: sdp interlude by Travis Scott
Getting genres for: Let Me Know (I Wonder Why Freestyle) by Juice WRLD
Getting genres for: Surround Sound (feat. 21 Savage & Baby Tate) by JID

Processing batch 428/633
Getting genres for: goosebumps by Travis Scott
Getting genres f

In [2]:
# [Cell 1] - Import required libraries
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
from datetime import datetime
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
import requests
import numpy as np

class SpotifyGenreAnalyzer:
    def __init__(self, client_id, client_secret):
        # Configure retry strategy
        retry_strategy = Retry(
            total=5,
            backoff_factor=1,
            status_forcelist=[429, 500, 502, 503, 504]
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)
        
        # Initialize Spotify client with retry strategy
        self.client_credentials_manager = SpotifyClientCredentials(
            client_id=client_id,
            client_secret=client_secret
        )
        self.sp = spotipy.Spotify(
            client_credentials_manager=self.client_credentials_manager,
            requests_timeout=30
        )
        self.sp._session.mount("https://", adapter)
        
        # Initialize cache for genres
        self.genre_cache = {}
        
    def get_track_genres(self, track_name, artist_name):
        """Get genre information with caching and error handling"""
        cache_key = f"{artist_name}:{track_name}"
        
        # Check cache first
        if cache_key in self.genre_cache:
            return self.genre_cache[cache_key]
        
        try:
            # Clean track name and artist name
            track_name = track_name.replace('%', '')
            artist_name = artist_name.replace('%', '')
            
            # Search for the track
            results = self.sp.search(
                q=f"track:{track_name} artist:{artist_name}",
                type='track',
                limit=1
            )
            
            if not results['tracks']['items']:
                print(f"No results found for {track_name} by {artist_name}")
                return None
            
            # Get artist ID and details
            artist_id = results['tracks']['items'][0]['artists'][0]['id']
            artist = self.sp.artist(artist_id)
            genres = artist['genres']
            
            # Cache the result
            self.genre_cache[cache_key] = genres
            print(f"Found genres for {artist_name}: {genres}")
            
            return genres
            
        except Exception as e:
            if "429" in str(e):
                print("Rate limit hit, waiting 30 seconds...")
                time.sleep(30)
                return self.get_track_genres(track_name, artist_name)
            print(f"Error getting genres for {track_name} by {artist_name}: {str(e)}")
            return None
    
    def process_streaming_history(self):
        """Process streaming history with optimized batch processing"""
        # Load streaming history
        with open('StreamingHistory_music_2.json', 'r', encoding='utf-8') as f:
            spotify_data = json.load(f)
        
        # Convert to DataFrame and get unique tracks
        df = pd.DataFrame(spotify_data)
        df['msPlayed'] = pd.to_numeric(df['msPlayed'])
        
        # Sort by most played and get unique tracks
        track_stats = df.groupby(['trackName', 'artistName'])['msPlayed'].sum().reset_index()
        track_stats = track_stats.sort_values('msPlayed', ascending=False)
        
        # Process tracks
        processed_tracks = []
        
        for i, row in track_stats.iterrows():
            print(f"\nProcessing track {i+1}/{len(track_stats)}")
            genres = self.get_track_genres(row['trackName'], row['artistName'])
            
            if genres:
                processed_tracks.append({
                    'artist': row['artistName'],
                    'track': row['trackName'],
                    'genres': genres,
                    'total_ms': row['msPlayed']
                })
            
            # Save progress after each track
            if (i + 1) % 10 == 0:
                with open('genre_progress.json', 'w', encoding='utf-8') as f:
                    json.dump(processed_tracks, f, ensure_ascii=False, indent=2)
            
            # Controlled delay
            time.sleep(1)
        
        # Final save
        with open('genre_progress.json', 'w', encoding='utf-8') as f:
            json.dump(processed_tracks, f, ensure_ascii=False, indent=2)
        
        return processed_tracks

def main():
    # Initialize analyzer with your credentials
    analyzer = SpotifyGenreAnalyzer(
        client_id='82d3f15f60ee4fd08d64f23a2b2bd8de',
        client_secret='5b892d88f74a4465af1a66d81c571d15'
    )
    
    try:
        # Process streaming history
        processed_tracks = analyzer.process_streaming_history()
        print(f"\nSuccessfully processed {len(processed_tracks)} tracks")
        
    except Exception as e:
        print(f"An error occurred in main execution: {str(e)}")
        raise

if __name__ == "__main__":
    main()


Processing track 398/1896
Found genres for Murat Dalkılıç: ['turkish pop']

Processing track 1619/1896
Found genres for Fred again..: ['edm', 'pop dance', 'progressive electro house']

Processing track 1430/1896
Found genres for Murat Dalkılıç: ['turkish pop']

Processing track 1339/1896
Found genres for Bengü: ['turkish pop']

Processing track 1853/1896
Found genres for niteboi: ['drift phonk']

Processing track 849/1896
Found genres for Hande Yener: ['turkish pop']

Processing track 1733/1896
Found genres for AFRO: []

Processing track 1887/1896
Found genres for Sezen Aksu: ['turkish pop']

Processing track 585/1896
Found genres for ZHU: ['edm', 'electro house']

Processing track 258/1896
Found genres for Ozan Çolakoğlu: ['turkish pop']

Processing track 257/1896
Found genres for Teoman: ['turkish rock']

Processing track 1444/1896
Found genres for Yalın: ['turkish pop']

Processing track 125/1896
Found genres for Serdar Ortaç: ['turkish pop']

Processing track 1622/1896
Found genre

In [4]:
# [Cell 1] - Import libraries and load data
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np

def analyze_genre_spending():
    # Load collected genre data
    with open('genre_progress.json', 'r', encoding='utf-8') as f:
        genre_data = json.load(f)
    
    # Create genre mapping
    genre_mapping = {
        (track['artist'], track['track']): track['genres']
        for track in genre_data
        if track['genres']
    }
    
    # Load streaming history
    with open('StreamingHistory_music_2.json', 'r', encoding='utf-8') as f:
        streaming_data = json.load(f)
    
    # Process streaming data
    streaming_df = pd.DataFrame(streaming_data)
    streaming_df['date'] = pd.to_datetime(streaming_df['endTime']).dt.date
    streaming_df['genres'] = streaming_df.apply(
        lambda row: genre_mapping.get((row['artistName'], row['trackName']), []),
        axis=1
    )
    
    # Explode genres
    genre_df = streaming_df.explode('genres')
    genre_df = genre_df[genre_df['genres'].notna()]
    
    # Calculate daily listening time per genre
    genre_daily = genre_df.groupby(['date', 'genres'])['msPlayed'].sum().reset_index()
    genre_daily['minutes_played'] = genre_daily['msPlayed'] / (1000 * 60)
    
    # Load spending data
    bank_df = pd.read_excel('HesapHareketleri_28.11.2024_0124383 (1).xlsx', 
                           skiprows=8,
                           names=['Date', 'Time', 'Amount', 'Balance', 'Description', 'Receipt'])
    
    bank_df = bank_df[bank_df['Date'].str.match(r'\d{2}\.\d{2}\.\d{4}', na=False)]
    bank_df['date'] = pd.to_datetime(bank_df['Date'], format='%d.%m.%Y').dt.date
    bank_df['Amount'] = pd.to_numeric(bank_df['Amount'], errors='coerce')
    
    # Merge data
    merged_df = pd.merge(genre_daily, bank_df[['date', 'Amount']], on='date', how='inner')
    
    # Calculate genre statistics
    genre_stats = []
    min_days = 5
    
    for genre in merged_df['genres'].unique():
        genre_data = merged_df[merged_df['genres'] == genre]
        if len(genre_data) >= min_days:
            correlation = genre_data['minutes_played'].corr(genre_data['Amount'])
            avg_spending = genre_data['Amount'].mean()
            total_minutes = genre_data['minutes_played'].sum()
            
            genre_stats.append({
                'genre': genre,
                'correlation': correlation,
                'avg_spending': avg_spending,
                'total_minutes': total_minutes,
                'days_counted': len(genre_data)
            })
    
    return pd.DataFrame(genre_stats)

def create_visualizations(genre_stats_df):
    # Sort and select top genres
    genre_stats_df['abs_correlation'] = abs(genre_stats_df['correlation'])
    top_genres = genre_stats_df.nlargest(15, 'total_minutes')
    
    # Create figure
    plt.style.use('default')
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 15))
    
    # Plot 1: Correlations
    corr_plot = sns.barplot(
        data=top_genres,
        x='correlation',
        y='genre',
        ax=ax1,
        palette='RdBu_r'
    )
    ax1.set_title('Genre-Spending Correlations\n(Top 15 Most Listened Genres)', fontsize=14)
    ax1.set_xlabel('Correlation Coefficient', fontsize=12)
    ax1.axvline(x=0, color='black', linestyle='-', alpha=0.2)
    
    # Add correlation values
    for i, v in enumerate(top_genres['correlation']):
        ax1.text(v, i, f'{v:.2f}', va='center', fontsize=10)
    
    # Plot 2: Average spending
    spend_plot = sns.barplot(
        data=top_genres,
        x='avg_spending',
        y='genre',
        ax=ax2,
        palette='viridis'
    )
    ax2.set_title('Average Daily Spending by Genre', fontsize=14)
    ax2.set_xlabel('Average Spending (TL)', fontsize=12)
    
    # Add spending values
    for i, v in enumerate(top_genres['avg_spending']):
        ax2.text(v, i, f'{v:,.0f} TL', va='center', fontsize=10)
    
    plt.tight_layout()
    return fig

def main():
    # Analyze data
    genre_stats = analyze_genre_spending()
    
    # Create visualizations
    fig = create_visualizations(genre_stats)
    plt.savefig('genre_spending_results.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # Print results
    print("\nTop 5 genres most positively correlated with spending:")
    print(genre_stats.nlargest(5, 'correlation')[['genre', 'correlation', 'avg_spending']].round(2))
    
    print("\nTop 5 genres most negatively correlated with spending:")
    print(genre_stats.nsmallest(5, 'correlation')[['genre', 'correlation', 'avg_spending']].round(2))
    
    print("\nGenres with highest average daily spending:")
    print(genre_stats.nlargest(5, 'avg_spending')[['genre', 'avg_spending', 'correlation']].round(2))

if __name__ == "__main__":
    main()

  c /= stddev[:, None]
  c /= stddev[None, :]

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  corr_plot = sns.barplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  spend_plot = sns.barplot(



Top 5 genres most positively correlated with spending:
                genre  correlation  avg_spending
178      austrian pop         1.00        603.23
275        mahraganat         1.00       -396.77
199      metropopolis         0.99        384.13
209  stomp and holler         0.99        384.13
82      trap italiana         0.80        157.79

Top 5 genres most negatively correlated with spending:
               genre  correlation  avg_spending
165  electronic trap        -0.94       -234.93
28     karadeniz pop        -0.74        789.64
266  dancehall queen        -0.72        724.33
267     uk dancehall        -0.72        724.33
0       albanian pop        -0.64       1556.17

Genres with highest average daily spending:
                 genre  avg_spending  correlation
240  christian hip hop       1928.57         0.26
261              grime       1606.50         0.60
262         london rap       1606.50         0.60
0         albanian pop       1556.17        -0.64
200   moder