In [47]:
#  Import required libraries
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.stats import pearsonr

In [48]:
# Read and prepare Spotify data
def prepare_spotify_data():
    """
    Reads and processes Spotify streaming history data
    Returns daily listening statistics
    """
    with open('StreamingHistory_music_2.json', 'r', encoding='utf-8') as f:
        spotify_data = json.load(f)
    
    spotify_df = pd.DataFrame(spotify_data)
    
    spotify_df['endTime'] = pd.to_datetime(spotify_df['endTime'])
    spotify_df['date'] = spotify_df['endTime'].dt.date
    
    daily_listening = spotify_df.groupby('date').agg({
        'msPlayed': 'sum'
    }).reset_index()
    
    daily_listening['minutes_played'] = daily_listening['msPlayed'] / (1000 * 60)
    
    return daily_listening[['date', 'minutes_played']]

In [49]:
# Read and prepare bank transaction data
def prepare_bank_data():
    """
    Reads and processes bank transaction data
    Returns daily spending amounts
    """
    bank_df = pd.read_excel('HesapHareketleri_28.11.2024_0124383 (1).xlsx', 
                           skiprows=8,
                           names=['Date', 'Time', 'Amount', 'Balance', 'Description', 'Receipt'])
    
    bank_df = bank_df[bank_df['Date'].str.match(r'\d{2}\.\d{2}\.\d{4}', na=False)]
    
    bank_df['date'] = pd.to_datetime(bank_df['Date'], format='%d.%m.%Y').dt.date
    bank_df['Amount'] = pd.to_numeric(bank_df['Amount'], errors='coerce')
    bank_df = bank_df.dropna(subset=['Amount'])
    
    daily_spending = bank_df.groupby('date')['Amount'].sum().reset_index()
    
    return daily_spending

In [50]:
# Analysis and visualization
def analyze_relationship(listening_df, spending_df):
    """
    Analyzes and visualizes the relationship between listening time and spending
    """
    merged_df = pd.merge(listening_df, spending_df, on='date', how='inner')
    
    # Calculate correlation
    correlation, p_value = pearsonr(merged_df['minutes_played'], merged_df['Amount'])
    
    # Create visualizations
    plt.style.use('default')
    fig, axes = plt.subplots(2, 1, figsize=(12, 12))
    
    # 1. Scatter plot with regression line
    axes[0].scatter(merged_df['minutes_played'], merged_df['Amount'], alpha=0.5)
    z = np.polyfit(merged_df['minutes_played'], merged_df['Amount'], 1)
    p = np.poly1d(z)
    axes[0].plot(merged_df['minutes_played'], p(merged_df['minutes_played']), "r--", alpha=0.8)
    axes[0].set_xlabel('Daily Listening Time (minutes)')
    axes[0].set_ylabel('Daily Spending Amount (TL)')
    axes[0].set_title('Relationship Between Music Listening and Spending')
    axes[0].grid(True, alpha=0.3)
    
    # Add correlation information
    correlation_text = f'Correlation: {correlation:.3f}\np-value: {p_value:.3f}'
    axes[0].text(0.05, 0.95, correlation_text, transform=axes[0].transAxes, 
                verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    # 2. Time series plot
    ax2 = axes[1]
    merged_df = merged_df.sort_values('date')
    
    # Create twin axes for different scales
    ax3 = ax2.twinx()
    
    # Plot listening time
    line1 = ax2.plot(merged_df['date'], merged_df['minutes_played'], 'b-', label='Listening Time')
    ax2.set_xlabel('Date')
    ax2.set_ylabel('Listening Time (minutes)', color='b')
    ax2.tick_params(axis='y', labelcolor='b')
    
    # Plot spending
    line2 = ax3.plot(merged_df['date'], merged_df['Amount'], 'r-', label='Spending', alpha=0.7)
    ax3.set_ylabel('Spending Amount (TL)', color='r')
    ax3.tick_params(axis='y', labelcolor='r')
    
    # Combine legends
    lines = line1 + line2
    labels = [l.get_label() for l in lines]
    ax2.legend(lines, labels, loc='upper left')
    
    ax2.set_title('Daily Listening Time and Spending Over Time')
    plt.tight_layout()
    
    # Calculate additional statistics
    stats_dict = {
        'Correlation': correlation,
        'P-value': p_value,
        'Average Daily Listening (min)': merged_df['minutes_played'].mean(),
        'Average Daily Spending (TL)': merged_df['Amount'].mean(),
        'Total Days': len(merged_df)
    }
    
    return fig, stats_dict

In [51]:
# Main execution
def main():
    # Process data
    listening_data = prepare_spotify_data()
    spending_data = prepare_bank_data()
    
    # Analyze relationship
    fig, stats = analyze_relationship(listening_data, spending_data)
    
    # Print results
    print("\nAnalysis Results:")
    for key, value in stats.items():
        print(f"{key}: {value:.2f}")
    
    # Save visualization
    plt.savefig('listening_spending_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()

if __name__ == "__main__":
    main()


Analysis Results:
Correlation: 0.08
P-value: 0.54
Average Daily Listening (min): 192.45
Average Daily Spending (TL): -74.37
Total Days: 58.00


In [54]:
# Visualization functions
def create_visualizations(df):
    """
    Creates various visualizations for the analysis
    """
    # Set style
    plt.style.use('seaborn')
    
    # Create figure with subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 1. Scatter plot
    sns.scatterplot(data=df, x='minutes_played', y='Tutar', ax=axes[0,0])
    axes[0,0].set_title('Dinleme Süresi ve Harcama İlişkisi')
    axes[0,0].set_xlabel('Günlük Dinleme (dakika)')
    axes[0,0].set_ylabel('Günlük Harcama (TL)')
    
    # 2. Time series plot
    df_sorted = df.sort_values('date')
    axes[0,1].plot(df_sorted['date'], df_sorted['minutes_played'], label='Dinleme Süresi')
    axes[0,1].set_title('Günlük Dinleme Süresi Trendi')
    axes[0,1].tick_params(axis='x', rotation=45)
    
    # 3. Distribution plots
    sns.histplot(data=df, x='minutes_played', ax=axes[1,0])
    axes[1,0].set_title('Günlük Dinleme Süresi Dağılımı')
    
    sns.histplot(data=df, x='Tutar', ax=axes[1,1])
    axes[1,1].set_title('Günlük Harcama Dağılımı')
    
    plt.tight_layout()
    return fig