## MSDS 696  Notebook 6 Plotly Dash Interactive Dashboard
## Project Title:
Create and Build A Data Engineering Pipeline to Collect, Process, and Store Spotify Data. This is intended to be a fun project to look at who the most popular artists are, what their most popular tracks are, and look at some characteristics of the songs.

### Mary J Hollon
### Due 8-22-2024¶

### Notebook Purpose

The purpose of this notebook is to create an interactive dashboard using Plotly Dash. Help to create this dashboard and charts can be found here: https://dash.plotly.com/tutorial?_gl=1*2auftc*_gcl_au*OTA3Nzc1MDMyLjE3MjMzOTU1Nzc.*_ga*OTc0MTI2Mjk2LjE3MjMzOTU1ODM.*_ga_6G7EE0JNSC*MTcyNDE5MDc0OC41LjEuMTcyNDE5MTY3Ni42MC4wLjA. and here: https://plotly.com/python/plotly-express/

In [1]:
# Import all necessary libraries
# Import dash_table from dash
 # Import Input and Output

import dash
from dash import dcc, html, dash_table
from dash.dependencies import Input, Output
import pandas as pd
import sqlite3
import plotly.express as px

In [3]:
# Create the Dash app
app = dash.Dash(__name__)


# Step 1: Load Data from the SQLite Database
# -------------------------------------------
# Connect to the SQLite database and execute a SQL query to join the Artists and Tracks tables.
# The query retrieves track details, artist details, and relevant features for analysis.

# Load the data from the SQLite database
conn = sqlite3.connect('spotify_music.db')  # database name

# SQL query to perform a JOIN between the Artists and Tracks tables to get all the necessary data for dashboard
query = """
SELECT 
    T.track_name, T.year, T.energy, T.danceability, T.instrumentalness, T.valence, T.tempo, 
    T.tempo_category, T.danceability_category, T.valence_category, T.energy_category, T.danceability_valence_interaction,
    T.loudness_scaled, T.custom_score, A.artist_name, A.popularity, A.simplified_genre 
FROM Tracks T
JOIN Artists A ON T.artist_id = A.artist_id;
"""

# Load the query results into a pandas DataFrame
df = pd.read_sql_query(query, conn)
conn.close()

# Ensure the year column is in the correct format
df['year'] = pd.to_datetime(df['year'], format='%Y').dt.year


# Step 2: Layout of the Dashboard
# ---------------------------------
# Define the structure and components of the dashboard, including dropdowns, graphs, and tables.
# Layout of the dashboard with multiple plots in a grid

app.layout = html.Div([
    html.H1("Spotify Music Data Dashboard"),
    
    # Dropdown to select the year
    dcc.Dropdown(
        id='year-dropdown',
        options=[{'label': str(year), 'value': year} for year in df['year'].unique()],
        value=df['year'].min(),  # Default value
        clearable=False,
        style={'width': '50%', 'margin': 'auto'}
    ),
    
    # Treemap on its own line
    html.Div([
        html.Div(dcc.Graph(id='plot-treemap'), style={'width': '100%', 'display': 'inline-block'}),
    ]),
    
    # Grid layout with multiple plots
    html.Div([
        # First row, single plot (Top 10 Artists by Popularity)
        html.Div(dcc.Graph(id='plot-top-artists'), style={'width': '100%', 'display': 'inline-block'}),
    ]),

    html.Div([
        # Second row, single plot (Tempo by Simplified Genre)
        html.Div(dcc.Graph(id='plot-genre-tempo'), style={'width': '50%', 'display': 'inline-block'}),
        
        # Second row, single plot (Danceability by Simplified Genre)
        html.Div(dcc.Graph(id='plot-genre-danceability'), style={'width': '50%', 'display': 'inline-block'}),
    ]),

    # New row for Valence and Energy vs. Simplified Genre
    html.Div([
        # Valence Category vs. Simplified Genre
        html.Div(dcc.Graph(id='plot-valence-category'), style={'width': '50%', 'display': 'inline-block'}),
        
        # Energy Category vs. Simplified Genre
        html.Div(dcc.Graph(id='plot-energy-category'), style={'width': '50%', 'display': 'inline-block'}),
    ]),

   # Table at the bottom of the dashboard
html.Div([
    html.H3("Top Hits by Most Popular Artists"),
    dash_table.DataTable(
        id='hits-table',
        columns=[
            {"name": "Track Name", "id": "track_name"},
            {"name": "Artist Name", "id": "artist_name"},
            {"name": "Simplified Genre", "id": "simplified_genre"},
            {"name": "Popularity", "id": "popularity"},
        ],
        style_data_conditional=[
            {
                'if': {'column_id': 'popularity'},
                'backgroundColor': 'rgba(0, 128, 0, 0.7)',
                'color': 'white',
            },
            {
                'if': {
                    'filter_query': '{popularity} >= 80',
                    'column_id': 'popularity'
                },
                'backgroundColor': 'rgba(0, 128, 0, 1)',
                'color': 'white',
            },
            {
                'if': {
                    'filter_query': '{popularity} < 80 && {popularity} >= 60',
                    'column_id': 'popularity'
                },
                'backgroundColor': 'rgba(144, 238, 144, 1)',
                'color': 'black',
            },
        ],
        style_header={
            'backgroundColor': 'black',
            'color': 'white',
            'fontWeight': 'bold',
        },
        style_table={'height': '300px', 'overflowY': 'auto'},
        style_cell={
            'textAlign': 'left',
            'backgroundColor': 'black',
            'color': 'white',
        },
        style_cell_conditional=[
            {
                'if': {'column_id': 'track_name'},
                'width': '150px',  # Adjust the width as needed
                'whiteSpace': 'normal',  # Allows wrapping of text within the cell
                'overflow': 'hidden',
                'textOverflow': 'ellipsis',  # Add ellipsis (...) for overflowed text
            },
            {
                'if': {'column_id': 'artist_name'},
                'width': '120px',
            },
            {
                'if': {'column_id': 'simplified_genre'},
                'width': '120px',
            },
            {
                'if': {'column_id': 'popularity'},
                'width': '80px',
            },
        ]
    )
], style={'width': '100%', 'margin': 'auto'}),])


# Step 3: Define the Callback Function
# --------------------------------------
# The callback function updates all the graphs and tables based on the selected year.

@app.callback(
    [Output('plot-treemap', 'figure'),
     Output('hits-table', 'data'),
     Output('plot-top-artists', 'figure'),
     Output('plot-genre-tempo', 'figure'),
     Output('plot-genre-danceability', 'figure'),
     Output('plot-valence-category', 'figure'),
     Output('plot-energy-category', 'figure')],
    [Input('year-dropdown', 'value')]
)
def update_plots(selected_year):
    # Filter the data by the selected year
    filtered_df = df[df['year'] == selected_year]
    
    # Treemap: Simplified Genre Distribution
    genre_distribution = filtered_df['simplified_genre'].value_counts().reset_index()
    genre_distribution.columns = ['simplified_genre', 'count']

    fig_treemap = px.treemap(
        genre_distribution,
        path=['simplified_genre'],
        values='count',
        title=f"Treemap of Simplified Genres Distribution for {selected_year}",
        color='count',
        color_continuous_scale=px.colors.sequential.Viridis
    )
    
    fig_treemap.update_layout(
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white',
        title=dict(font=dict(color='white'))
    )
    
    # Update the hits table data
    top_hits_df = filtered_df[['track_name', 'artist_name', 'simplified_genre', 'popularity']].sort_values(by='popularity', ascending=False).head(50)
    hits_table_data = top_hits_df.to_dict('records')
    
    # Top Artists by Popularity
    top_artists_df = filtered_df.groupby('artist_name').agg({
        'popularity': 'mean',
        'danceability_valence_interaction': 'mean'
    }).reset_index().sort_values(by='popularity', ascending=False).head(10)
    
    fig_top_artists = px.bar(
        top_artists_df,
        x='popularity',
        y='artist_name',
        orientation='h',  # Horizontal bar plot
        color='danceability_valence_interaction',
        title=f"Top 10 Artists by Popularity for {selected_year}",
        color_continuous_scale=px.colors.sequential.Viridis
    )
    
    fig_top_artists.update_layout(
        xaxis_title="Popularity",
        yaxis_title="Artist Name",
        xaxis=dict(showgrid=False, color='white'),
        yaxis=dict(showgrid=False, color='white', categoryorder='total ascending'),
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white',
        title=dict(font=dict(color='white'))
    )
    
    # Tempo by Simplified Genre
    genre_tempo_df = filtered_df.groupby(['simplified_genre', 'tempo_category']).size().reset_index(name='count')
    
    fig_genre_tempo = px.bar(
        genre_tempo_df,
        x='simplified_genre',
        y='count',
        color='tempo_category',
        title=f"Tempo Distribution<br>by Simplified Genre for {selected_year}",
       color_continuous_scale=px.colors.sequential.Viridis
        
    )
    
    fig_genre_tempo.update_layout(
        xaxis_title="Simplified Genre",
        yaxis_title="Count",
        xaxis=dict(categoryorder='total descending', showgrid=False, color='white'),
        yaxis=dict(showgrid=False, color='white'),
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white',
        title=dict(font=dict(color='white'))
    )
    
    # Danceability by Simplified Genre
    artist_tempo_df = filtered_df.groupby(['simplified_genre', 'danceability_category']).size().reset_index(name='count').sort_values(by='count', ascending=False).head(15)
    
    fig_artist_tempo = px.bar(
        artist_tempo_df,
        x='simplified_genre',
        y='count',
        color='danceability_category',
        title=f"Danceability Distribution<br>by Simplified Genre for {selected_year}",
        color_continuous_scale=px.colors.sequential.Viridis
    )
    
    fig_artist_tempo.update_layout(
        xaxis_title="Simplified Genre",
        yaxis_title="Count",
        xaxis=dict(categoryorder='total descending', showgrid=False, color='white'),
        yaxis=dict(showgrid=False, color='white'),
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white',
        title=dict(font=dict(color='white'))
    )

    # Valence Category vs. Simplified Genre
    valence_genre_df = filtered_df.groupby(['simplified_genre', 'valence_category']).size().reset_index(name='count')

    fig_valence_category = px.bar(
        valence_genre_df,
        x='simplified_genre',
        y='count',
        color='valence_category',
        title=f"Valence Category Distribution vs.<br> Simplified Genre for {selected_year}",
        color_continuous_scale=px.colors.sequential.Viridis
    )
    
    fig_valence_category.update_layout(
        title=dict(
            text=f"Valence Category Distribution vs.<br> Simplified Genre for {selected_year}",
            font=dict(size=18)
        ),
        xaxis_title="Simplified Genre",
        yaxis_title="Count",
        xaxis=dict(categoryorder='total descending', showgrid=False, color='white'),
        yaxis=dict(showgrid=False, color='white'),
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white'
    )
    
    # Energy Category vs. Simplified Genre
    energy_genre_df = filtered_df.groupby(['simplified_genre', 'energy_category']).size().reset_index(name='count')

    fig_energy_category = px.bar(
        energy_genre_df,
        x='simplified_genre',
        y='count',
        color='energy_category',
        title=f"Energy Category Distribution by<br>Simplified Genre for {selected_year}",
        color_continuous_scale=px.colors.sequential.Viridis
    )

    fig_energy_category.update_layout(
        title=dict(
            text=f"Energy Category Distribution by<br>Simplified Genre for {selected_year}",
            font=dict(size=18)
        ),
        xaxis_title="Simplified Genre",
        yaxis_title="Count",
        xaxis=dict(categoryorder='total descending', showgrid=False, color='white'),
        yaxis=dict(showgrid=False, color='white'),
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white'
    )

    return (
        fig_treemap, 
        hits_table_data, 
        fig_top_artists, 
        fig_genre_tempo, 
        fig_artist_tempo, 
        fig_valence_category, 
        fig_energy_category
    )



# Step 4: Run the App
# ---------------------
# Run the Dash app on the local server. The app will be accessible at http://127.0.0.1:8050/

if __name__ == '__main__':
    app.run_server(debug=True, port=8050)



## END of Notebook