In [8]:
# Import required libraries with explicit seaborn import
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np

def create_improved_visualizations(data):
    """
    Create enhanced visualizations with better readability and clarity
    """
    # Set Seaborn style and color palette
    sns.set_style("darkgrid")
    sns.set_context("notebook", font_scale=1.2)
    
    # Create figure
    fig = plt.figure(figsize=(20, 16))
    
    # Use Seaborn color palette
    palette = sns.color_palette("husl", 2)
    colors = {'main_blue': palette[0], 'main_red': palette[1]}
    
    # 1. Enhanced Scatter Plot
    ax1 = fig.add_subplot(221)
    sns.regplot(data=data, 
                x='repetition_rate', 
                y='Amount',
                scatter_kws={'alpha': 0.6, 's': 80, 'color': colors['main_blue']},
                line_kws={'color': colors['main_red'], 'linewidth': 2},
                ax=ax1)
    
    ax1.set_title('Relationship Between Song Repetition and Spending', 
                  fontsize=16, pad=20)
    ax1.set_xlabel('Repetition Rate\n(0: All New Songs â†’ 1: All Repeated Songs)', 
                  fontsize=12)
    ax1.set_ylabel('Daily Spending (TL)', fontsize=12)
    
    # Add correlation with styled box
    correlation = data['repetition_rate'].corr(data['Amount'])
    ax1.text(0.05, 0.95, 
             f'Correlation: {correlation:.3f}', 
             transform=ax1.transAxes,
             bbox=dict(facecolor='white', 
                      edgecolor=colors['main_blue'],
                      boxstyle='round,pad=0.5',
                      alpha=0.9),
             fontsize=12)
    
    # 2. Enhanced Time Series
    ax2 = fig.add_subplot(222)
    data_sorted = data.sort_values('date')
    
    # Create twin axes with clear separation
    ax2_twin = ax2.twinx()
    
    # Plot with enhanced styling
    line1 = ax2.plot(data_sorted['date'], data_sorted['repetition_rate'], 
                     color=colors['main_blue'], 
                     linewidth=2,
                     label='Repetition Rate')
    ax2.set_ylabel('Repetition Rate', 
                   color=colors['main_blue'], 
                   fontsize=12,
                   fontweight='bold')
    ax2.tick_params(axis='y', labelcolor=colors['main_blue'])
    
    line2 = ax2_twin.plot(data_sorted['date'], data_sorted['Amount'],
                         color=colors['main_red'], 
                         linewidth=2,
                         label='Spending')
    ax2_twin.set_ylabel('Spending (TL)',
                       color=colors['main_red'], 
                       fontsize=12,
                       fontweight='bold')
    ax2_twin.tick_params(axis='y', labelcolor=colors['main_red'])
    
    # Combine legends with better placement
    lines = line1 + line2
    labels = ['Song Repetition', 'Daily Spending']
    ax2.legend(lines, labels, 
              loc='upper center',
              bbox_to_anchor=(0.5, 1.15),
              ncol=2,
              fontsize=12)
    
    ax2.set_title('Trends in Song Repetition and Spending Over Time', 
                  fontsize=16, 
                  pad=30)
    
    # 3. Enhanced Box Plot
    ax3 = fig.add_subplot(223)
    
    # Create meaningful categories
    data['repetition_category'] = pd.cut(
        data['repetition_rate'],
        bins=[0, 0.25, 0.5, 0.75, 1],
        labels=['Mostly New\n(0-25%)', 'Some Repeats\n(25-50%)', 
                'Many Repeats\n(50-75%)', 'Mostly Repeats\n(75-100%)']
    )
    
    sns.boxplot(data=data, 
                x='repetition_category', 
                y='Amount',
                palette='Blues',
                ax=ax3)
    
    ax3.set_title('Spending Distribution by Song Repetition Level', 
                  fontsize=16, 
                  pad=20)
    ax3.set_xlabel('Song Repetition Category', fontsize=12)
    ax3.set_ylabel('Daily Spending (TL)', fontsize=12)
    ax3.tick_params(axis='x', rotation=0)
    
    # 4. Enhanced Violin Plot
    ax4 = fig.add_subplot(224)
    
    # Create meaningful spending quartiles
    data['spending_quartile'] = pd.qcut(
        data['Amount'], 
        q=4,
        labels=['Lowest\nSpending\n(Q1)', 'Below\nAverage\n(Q2)', 
                'Above\nAverage\n(Q3)', 'Highest\nSpending\n(Q4)']
    )
    
    sns.violinplot(data=data, 
                   x='spending_quartile', 
                   y='repetition_rate',
                   palette='Blues',
                   ax=ax4)
    
    ax4.set_title('Song Repetition Patterns by Spending Level', 
                  fontsize=16, 
                  pad=20)
    ax4.set_xlabel('Spending Category', fontsize=12)
    ax4.set_ylabel('Repetition Rate', fontsize=12)
    ax4.tick_params(axis='x', rotation=0)
    
    # Adjust layout
    plt.tight_layout()
    return fig

def main():
    # Load and process data (same as before)
    with open('StreamingHistory_music_2.json', 'r', encoding='utf-8') as f:
        streaming_data = json.load(f)
    
    df = pd.DataFrame(streaming_data)
    df['date'] = pd.to_datetime(df['endTime']).dt.date
    
    # Calculate daily metrics
    daily_metrics = []
    for date in df['date'].unique():
        daily_data = df[df['date'] == date]
        total_plays = len(daily_data)
        unique_tracks = len(daily_data[['trackName', 'artistName']].drop_duplicates())
        repetition_rate = 1 - (unique_tracks / total_plays)
        
        daily_metrics.append({
            'date': date,
            'repetition_rate': repetition_rate
        })
    
    daily_df = pd.DataFrame(daily_metrics)
    
    # Load spending data
    bank_df = pd.read_excel('HesapHareketleri_28.11.2024_0124383 (1).xlsx', 
                           skiprows=8,
                           names=['Date', 'Time', 'Amount', 'Balance', 'Description', 'Receipt'])
    
    bank_df = bank_df[bank_df['Date'].str.match(r'\d{2}\.\d{2}\.\d{4}', na=False)]
    bank_df['date'] = pd.to_datetime(bank_df['Date'], format='%d.%m.%Y').dt.date
    bank_df['Amount'] = pd.to_numeric(bank_df['Amount'], errors='coerce')
    
    daily_spending = bank_df.groupby('date')['Amount'].sum().reset_index()
    
    # Merge data
    merged_df = pd.merge(daily_df, daily_spending, on='date', how='inner')
    
    # Create and save visualizations
    fig = create_improved_visualizations(merged_df)
    plt.savefig('repetition_spending_analysis_improved.png', dpi=300, bbox_inches='tight')
    plt.close()

if __name__ == "__main__":
    main()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=data,

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.violinplot(data=data,
