In [1]:
pwd

'/Users/spartan/Downloads/kafka_2.13-3.8.0/spotify_realtime/enhanced_spotify_streaming'

In [3]:
# spotify_dashboard.py

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from kafka import KafkaConsumer
import json
from datetime import datetime
import time
from collections import deque
import threading

# Initialize session state
if 'track_history' not in st.session_state:
    st.session_state.track_history = deque(maxlen=100)
    st.session_state.artist_counts = {}
    st.session_state.popularity_history = []
    st.session_state.energy_history = []
    st.session_state.danceability_history = []
    st.session_state.forgotten_items = set()
    st.session_state.processing = True

class SpotifyDashboard:
    def __init__(self):
        st.set_page_config(
            page_title="Spotify Real-Time Analytics",
            page_icon="🎵",
            layout="wide"
        )
        
        # Dashboard title
        st.title("🎵 Spotify Real-Time Analytics Dashboard")
        
        # Initialize Kafka consumer in a separate thread
        self.kafka_thread = threading.Thread(
            target=self.consume_kafka_messages,
            daemon=True
        )
        self.kafka_thread.start()
        
        # Create dashboard layout
        self.create_dashboard_layout()
    
    def consume_kafka_messages(self):
        """Consume messages from Kafka in background"""
        try:
            consumer = KafkaConsumer(
                'spotify_stream',
                bootstrap_servers=['localhost:9092'],
                auto_offset_reset='latest',
                value_deserializer=lambda x: json.loads(x.decode('utf-8')),
                group_id='dashboard_group'
            )
            
            for message in consumer:
                if not st.session_state.processing:
                    break
                    
                track_data = message.value
                st.session_state.track_history.append(track_data)
                
                # Update artist counts
                artist = track_data['artist']
                st.session_state.artist_counts[artist] = \
                    st.session_state.artist_counts.get(artist, 0) + 1
                
                # Update metric histories
                st.session_state.popularity_history.append(track_data['popularity'])
                st.session_state.energy_history.append(track_data['energy'])
                st.session_state.danceability_history.append(track_data['danceability'])
                
        except Exception as e:
            st.error(f"Error consuming Kafka messages: {e}")
    
    def create_dashboard_layout(self):
        """Create the main dashboard layout"""
        # Create tabs
        tabs = st.tabs([
            "Real-Time Metrics",
            "Artist Analysis",
            "Track Features",
            "Unlearning Status"
        ])
        
        # Real-Time Metrics Tab
        with tabs[0]:
            self.create_realtime_metrics()
        
        # Artist Analysis Tab
        with tabs[1]:
            self.create_artist_analysis()
        
        # Track Features Tab
        with tabs[2]:
            self.create_track_features()
        
        # Unlearning Status Tab
        with tabs[3]:
            self.create_unlearning_status()
    
    def create_realtime_metrics(self):
        """Create real-time metrics section"""
        st.header("Real-Time Metrics")
        
        # Create three columns
        col1, col2, col3 = st.columns(3)
        
        with col1:
            # Current track info
            if st.session_state.track_history:
                latest_track = list(st.session_state.track_history)[-1]
                st.metric(
                    "Now Playing",
                    latest_track['name'],
                    latest_track['artist']
                )
        
        with col2:
            # Unique artists count
            st.metric(
                "Unique Artists",
                len(st.session_state.artist_counts)
            )
        
        with col3:
            # Average popularity
            if st.session_state.popularity_history:
                avg_popularity = np.mean(st.session_state.popularity_history)
                st.metric("Average Popularity", f"{avg_popularity:.2f}")
        
        # Create popularity trend chart
        if st.session_state.popularity_history:
            fig = px.line(
                y=st.session_state.popularity_history[-50:],
                title="Popularity Trend",
                labels={'value': 'Popularity', 'index': 'Tracks'}
            )
            st.plotly_chart(fig, use_container_width=True)
    
    def create_artist_analysis(self):
        """Create artist analysis section"""
        st.header("Artist Analysis")
        
        # Artist distribution
        if st.session_state.artist_counts:
            fig = px.bar(
                x=list(st.session_state.artist_counts.keys()),
                y=list(st.session_state.artist_counts.values()),
                title="Artist Track Distribution",
                labels={'x': 'Artist', 'y': 'Number of Tracks'}
            )
            fig.update_layout(xaxis_tickangle=-45)
            st.plotly_chart(fig, use_container_width=True)
        
        # Artist metrics table
        if st.session_state.track_history:
            artist_df = pd.DataFrame(list(st.session_state.track_history))
            artist_metrics = artist_df.groupby('artist').agg({
                'popularity': 'mean',
                'energy': 'mean',
                'danceability': 'mean'
            }).round(2)
            
            st.subheader("Artist Metrics")
            st.dataframe(artist_metrics)
    
    def create_track_features(self):
        """Create track features section"""
        st.header("Track Features Analysis")
        
        # Feature distributions
        if st.session_state.track_history:
            track_df = pd.DataFrame(list(st.session_state.track_history))
            
            col1, col2 = st.columns(2)
            
            with col1:
                # Energy vs Danceability scatter plot
                fig = px.scatter(
                    track_df,
                    x='energy',
                    y='danceability',
                    color='popularity',
                    hover_data=['name', 'artist'],
                    title="Energy vs Danceability"
                )
                st.plotly_chart(fig, use_container_width=True)
            
            with col2:
                # Feature correlation heatmap
                corr = track_df[['popularity', 'energy', 'danceability']].corr()
                fig = px.imshow(
                    corr,
                    title="Feature Correlations",
                    labels=dict(color="Correlation")
                )
                st.plotly_chart(fig, use_container_width=True)
    
    def create_unlearning_status(self):
        """Create unlearning status section"""
        st.header("Machine Unlearning Status")
        
        # Unlearning controls
        st.subheader("Forget Items")
        col1, col2 = st.columns(2)
        
        with col1:
            track_to_forget = st.selectbox(
                "Select Track to Forget",
                options=[t['name'] for t in st.session_state.track_history]
            )
            if st.button("Forget Track"):
                st.session_state.forgotten_items.add(track_to_forget)
                st.success(f"Track '{track_to_forget}' forgotten")
        
        with col2:
            if st.session_state.artist_counts:
                artist_to_forget = st.selectbox(
                    "Select Artist to Forget",
                    options=list(st.session_state.artist_counts.keys())
                )
                if st.button("Forget Artist"):
                    st.session_state.forgotten_items.add(artist_to_forget)
                    st.success(f"Artist '{artist_to_forget}' forgotten")
        
        # Unlearning status
        st.subheader("Forgotten Items")
        if st.session_state.forgotten_items:
            st.write(list(st.session_state.forgotten_items))
        else:
            st.write("No items forgotten yet")

if __name__ == "__main__":
    dashboard = SpotifyDashboard()

2024-11-26 02:04:37.866 
  command:

    streamlit run /opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]


AttributeError: st.session_state has no attribute "track_history". Did you forget to initialize it? More info: https://docs.streamlit.io/library/advanced-features/session-state#initialization