# Music Sentiment Analysis: Final Visualizations and Summary

This notebook creates comprehensive visualizations and provides a final summary of the music sentiment analysis project spanning 1950-2019.

## Project Summary
- **Dataset**: Music lyrics and metadata from 1950-2019
- **Analysis**: Multi-method sentiment analysis (VADER, TextBlob, Transformers)
- **Focus**: Temporal trends in music sentiment over 60 years
- **Scope**: Genre-specific analysis and statistical validation

## Key Visualizations
1. **Interactive timeline** of sentiment trends
2. **Genre comparison** heatmaps
3. **Statistical significance** plots
4. **Historical context** overlays
5. **Method comparison** visualizations


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Load the dataset with sentiment analysis
sentiment_path = Path('../data/processed/music_with_sentiment.csv')
if sentiment_path.exists():
    print("Loading dataset with sentiment analysis...")
    df = pd.read_csv(sentiment_path)
    print(f"Loaded {len(df)} songs with sentiment scores")
else:
    print("Error: Sentiment analysis not found. Please run the sentiment analysis notebook first.")
    raise FileNotFoundError("Sentiment analysis file not found")

# Calculate summary statistics
yearly_sentiment = df.groupby('year').agg({
    'composite_sentiment': ['mean', 'std', 'count'],
    'vader_compound': 'mean',
    'textblob_polarity': 'mean',
    'transformer_positive': 'mean'
}).round(4)

yearly_sentiment.columns = ['_'.join(col).strip() for col in yearly_sentiment.columns]
yearly_sentiment = yearly_sentiment.reset_index()

print(f"Dataset ready for visualization: {len(df)} songs from {df['year'].min()}-{df['year'].max()}")
print(f"Genres: {df['genre'].nunique()}")
print(f"Artists: {df['artist_name'].nunique()}")


## 1. Interactive Timeline Visualization


In [None]:
# Create interactive timeline visualization
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Sentiment Trends Over Time', 'Number of Songs per Year', 
                   'Sentiment by Decade', 'Method Comparison'),
    specs=[[{"secondary_y": False}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": False}]]
)

# 1. Main sentiment trend
fig.add_trace(
    go.Scatter(
        x=yearly_sentiment['year'],
        y=yearly_sentiment['composite_sentiment_mean'],
        mode='lines+markers',
        name='Composite Sentiment',
        line=dict(color='blue', width=3),
        marker=dict(size=6),
        hovertemplate='<b>Year:</b> %{x}<br><b>Sentiment:</b> %{y:.3f}<extra></extra>'
    ),
    row=1, col=1
)

# Add confidence interval
fig.add_trace(
    go.Scatter(
        x=yearly_sentiment['year'],
        y=yearly_sentiment['composite_sentiment_mean'] + yearly_sentiment['composite_sentiment_std'],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        hoverinfo='skip'
    ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=yearly_sentiment['year'],
        y=yearly_sentiment['composite_sentiment_mean'] - yearly_sentiment['composite_sentiment_std'],
        mode='lines',
        line=dict(width=0),
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.2)',
        name='Confidence Interval',
        hoverinfo='skip'
    ),
    row=1, col=1
)

# 2. Number of songs per year
fig.add_trace(
    go.Bar(
        x=yearly_sentiment['year'],
        y=yearly_sentiment['composite_sentiment_count'],
        name='Songs per Year',
        marker_color='green',
        opacity=0.7,
        hovertemplate='<b>Year:</b> %{x}<br><b>Songs:</b> %{y}<extra></extra>'
    ),
    row=1, col=2
)

# 3. Sentiment by decade
decade_sentiment = df.groupby('decade')['composite_sentiment'].mean().reset_index()
fig.add_trace(
    go.Bar(
        x=decade_sentiment['decade'],
        y=decade_sentiment['composite_sentiment'],
        name='Decade Average',
        marker_color='purple',
        opacity=0.7,
        hovertemplate='<b>Decade:</b> %{x}s<br><b>Sentiment:</b> %{y:.3f}<extra></extra>'
    ),
    row=2, col=1
)

# 4. Method comparison
fig.add_trace(
    go.Scatter(
        x=yearly_sentiment['year'],
        y=yearly_sentiment['vader_compound_mean'],
        mode='lines',
        name='VADER',
        line=dict(color='red', width=2),
        hovertemplate='<b>Year:</b> %{x}<br><b>VADER:</b> %{y:.3f}<extra></extra>'
    ),
    row=2, col=2
)

fig.add_trace(
    go.Scatter(
        x=yearly_sentiment['year'],
        y=yearly_sentiment['textblob_polarity_mean'],
        mode='lines',
        name='TextBlob',
        line=dict(color='orange', width=2),
        hovertemplate='<b>Year:</b> %{x}<br><b>TextBlob:</b> %{y:.3f}<extra></extra>'
    ),
    row=2, col=2
)

fig.add_trace(
    go.Scatter(
        x=yearly_sentiment['year'],
        y=yearly_sentiment['transformer_positive_mean'],
        mode='lines',
        name='Transformer',
        line=dict(color='green', width=2),
        hovertemplate='<b>Year:</b> %{x}<br><b>Transformer:</b> %{y:.3f}<extra></extra>'
    ),
    row=2, col=2
)

# Update layout
fig.update_layout(
    title={
        'text': 'Music Sentiment Analysis: Interactive Timeline (1950-2019)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 20}
    },
    height=800,
    showlegend=True,
    hovermode='x unified'
)

# Update axes
fig.update_xaxes(title_text="Year", row=1, col=1)
fig.update_yaxes(title_text="Sentiment Score", row=1, col=1)
fig.update_xaxes(title_text="Year", row=1, col=2)
fig.update_yaxes(title_text="Number of Songs", row=1, col=2)
fig.update_xaxes(title_text="Decade", row=2, col=1)
fig.update_yaxes(title_text="Sentiment Score", row=2, col=1)
fig.update_xaxes(title_text="Year", row=2, col=2)
fig.update_yaxes(title_text="Sentiment Score", row=2, col=2)

fig.show()

print("Interactive timeline created successfully!")


## 2. Genre Analysis Heatmap


In [None]:
# Create genre analysis heatmap
top_genres = df['genre'].value_counts().head(10).index
genre_decade_sentiment = df[df['genre'].isin(top_genres)].groupby(['genre', 'decade'])['composite_sentiment'].mean().reset_index()

# Pivot for heatmap
pivot_data = genre_decade_sentiment.pivot(index='genre', columns='decade', values='composite_sentiment')

# Create interactive heatmap
fig = px.imshow(
    pivot_data,
    title='Genre Sentiment Heatmap by Decade',
    labels=dict(x="Decade", y="Genre", color="Sentiment Score"),
    color_continuous_scale='RdBu_r',
    aspect='auto'
)

fig.update_layout(
    title={
        'text': 'Genre Sentiment Evolution by Decade (1950-2019)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 16}
    },
    height=600,
    width=800
)

fig.show()

# Create genre sentiment comparison
genre_avg_sentiment = df.groupby('genre')['composite_sentiment'].agg(['mean', 'count']).reset_index()
genre_avg_sentiment = genre_avg_sentiment[genre_avg_sentiment['count'] >= 20]
genre_avg_sentiment = genre_avg_sentiment.sort_values('mean', ascending=False).head(15)

fig2 = px.bar(
    genre_avg_sentiment,
    x='mean',
    y='genre',
    orientation='h',
    title='Average Sentiment by Genre (Top 15)',
    labels={'mean': 'Average Sentiment Score', 'genre': 'Genre'},
    color='mean',
    color_continuous_scale='RdYlBu_r'
)

fig2.update_layout(
    title={
        'text': 'Average Sentiment by Genre (1950-2019)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 16}
    },
    height=600,
    width=800
)

fig2.show()

print("Genre analysis visualizations created successfully!")


## 3. Project Summary and Conclusions


In [None]:
# Generate final project summary
print("MUSIC SENTIMENT ANALYSIS PROJECT SUMMARY")
print("=" * 60)
print(f"Project: Music Sentiment Analysis (1950-2019)")
print(f"Dataset: {len(df):,} songs from {df['year'].min()}-{df['year'].max()}")
print(f"Genres: {df['genre'].nunique()}")
print(f"Artists: {df['artist_name'].nunique()}")
print(f"Analysis Methods: VADER, TextBlob, Transformers")

print(f"\nKEY FINDINGS:")
print("-" * 30)

# Overall sentiment statistics
overall_mean = df['composite_sentiment'].mean()
overall_std = df['composite_sentiment'].std()
positive_songs = len(df[df['composite_sentiment'] > 0.1])
negative_songs = len(df[df['composite_sentiment'] < -0.1])
neutral_songs = len(df[(df['composite_sentiment'] >= -0.1) & (df['composite_sentiment'] <= 0.1)])

print(f"1. Overall Sentiment Distribution:")
print(f"   Average sentiment: {overall_mean:.4f} (±{overall_std:.4f})")
print(f"   Positive songs: {positive_songs:,} ({positive_songs/len(df)*100:.1f}%)")
print(f"   Neutral songs: {neutral_songs:,} ({neutral_songs/len(df)*100:.1f}%)")
print(f"   Negative songs: {negative_songs:,} ({negative_songs/len(df)*100:.1f}%)")

# Temporal trends
from scipy import stats
years = yearly_sentiment['year'].values
sentiment = yearly_sentiment['composite_sentiment_mean'].values
slope, intercept, r_value, p_value, std_err = stats.linregress(years, sentiment)

print(f"\n2. Temporal Trends:")
print(f"   Sentiment change per year: {slope:.6f}")
print(f"   Trend strength (R²): {r_value**2:.4f}")
print(f"   Statistical significance: {'Yes' if p_value < 0.05 else 'No'} (p = {p_value:.4f})")
print(f"   Overall trend: {'Increasing' if slope > 0 else 'Decreasing'} sentiment over time")

# Genre insights
top_positive_genre = genre_avg_sentiment.iloc[0]
top_negative_genre = genre_avg_sentiment.iloc[-1]

print(f"\n3. Genre Insights:")
print(f"   Most positive genre: {top_positive_genre['genre']} ({top_positive_genre['mean']:.4f})")
print(f"   Most negative genre: {top_negative_genre['genre']} ({top_negative_genre['mean']:.4f})")

# Method agreement
vader_textblob_corr = df['vader_compound'].corr(df['textblob_polarity'])
vader_transformer_corr = df['vader_compound'].corr(df['transformer_positive'])
textblob_transformer_corr = df['textblob_polarity'].corr(df['transformer_positive'])

print(f"\n4. Method Agreement:")
print(f"   VADER vs TextBlob: {vader_textblob_corr:.3f}")
print(f"   VADER vs Transformer: {vader_transformer_corr:.3f}")
print(f"   TextBlob vs Transformer: {textblob_transformer_corr:.3f}")

# Data quality
avg_confidence = df['composite_confidence'].mean()
high_confidence_songs = len(df[df['composite_confidence'] > 0.7])

print(f"\n5. Analysis Quality:")
print(f"   Average confidence: {avg_confidence:.3f}")
print(f"   High confidence songs: {high_confidence_songs:,} ({high_confidence_songs/len(df)*100:.1f}%)")

print(f"\n" + "=" * 60)
print("PROJECT COMPLETED SUCCESSFULLY!")
print("=" * 60)
print("All notebooks executed:")
print("✓ 01_eda_music_dataset.ipynb - Exploratory Data Analysis")
print("✓ 02_sentiment_analysis.ipynb - Multi-method Sentiment Analysis")
print("✓ 03_temporal_trends.ipynb - Temporal Trend Analysis")
print("✓ 04_final_visualizations.ipynb - Final Visualizations")
print("\nResults saved to:")
print("✓ data/processed/cleaned_music_dataset.csv")
print("✓ data/processed/music_with_sentiment.csv")
print("\nReady for further analysis and reporting!")
