# Lineup Guide: Festival Schedule Planner for Tomorrowland Belgium Weekend 2

## Project Outline
### Read Data:
- Extract the stage, host, artist, and day information
### API Integration:
- Use Spotipy to get artist popularity, top 3 songs, and genre from the Spotify API
### Dashboard:
- Create an interactive dashboard
- Allow users to filter by day, stage, and popularity
- Display artist info that match filter

In [3]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import os
from dotenv import load_dotenv
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc

In [4]:
# Load the Excel file
excel_data = pd.ExcelFile("../Data Science Projects/Tomorrowland_W2_2024.xlsx")

# Parse the data from each sheet/day
lineup_data = {}
for sheet_name in excel_data.sheet_names:
    df = excel_data.parse(sheet_name, header=[0, 1])
    lineup_data[sheet_name] = df

In [5]:
# Function to parse the lineup and extract artist and stage host information
def parse_lineup(data):
    artists = []
    for day, df in data.items():
        for stage in df.columns.levels[0]:
            # Extract the stage data
            stage_data = df[stage].dropna().values.flatten()
            
            # Extract the stage host for the current stage
            stage_host = df.columns.get_level_values(1)[df.columns.get_level_values(0) == stage].unique()
            
            if len(stage_host) > 0:
                stage_host = stage_host[0]  
            else:
                stage_host = 'Unknown'
            
            # Iterate over the artists in the stage data
            for artist in stage_data:
                if artist:
                    artists.append({
                        'day': day,
                        'stage': stage,
                        'stage_host': stage_host,
                        'artist': artist
                    })
    
    return pd.DataFrame(artists)

lineup_df = parse_lineup(lineup_data)

In [6]:
# Spotify API credentials from .env
load_dotenv()
client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')
redirect_uri = os.getenv('REDIRECT_URI')

# SpotifyOAuth with the credentials
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri, scope="user-library-read"))

In [7]:
# Function to extract artist popularity, genre, and top 3 tracks from Spotify API
def get_spotify_data(artist_name):
    try:
        results = sp.search(q=artist_name, type='artist')
        if results['artists']['items']:
            artist = results['artists']['items'][0]
            top_tracks = sp.artist_top_tracks(artist['id'], country='US')['tracks']
            return {
                'popularity': artist['popularity'],
                'genres': artist['genres'],
                'top_tracks': [track['name'] for track in top_tracks[:3]]  # Limit to top 3 tracks
            }
    except spotipy.exceptions.SpotifyException as e:
        if e.http_status == 429:
            retry_after = int(e.headers.get('Retry-After', 1))
            print(f"Rate limited. Retrying after {retry_after} seconds.")
            time.sleep(retry_after)
            return get_spotify_data(artist_name)
        else:
            print(f"Spotify API error: {e}")
    return {
        'popularity': None,
        'genres': [],
        'top_tracks': []
    }

def enrich_artist_data(artists_df):
    # Ensure the DataFrame has columns for the new data
    artists_df['popularity'] = None
    artists_df['genres'] = None
    artists_df['top_tracks'] = None

    total_artists = len(artists_df)
    for i, artist in enumerate(artists_df['artist']):
        spotify_data = get_spotify_data(artist)
        artists_df.at[i, 'popularity'] = spotify_data['popularity']
        artists_df.at[i, 'genres'] = ', '.join(spotify_data['genres'])  
        artists_df.at[i, 'top_tracks'] = ', '.join(spotify_data['top_tracks'])  
    return artists_df

In [8]:
# Run the enrichment process. This will take a while ~5mins
enriched_lineup_df = enrich_artist_data(lineup_df)

In [9]:
# Save or further process your enriched DataFrame as needed
enriched_lineup_df.to_csv('enriched_lineup.csv', index=False)

In [10]:
# Create Dash app with Bootstrap theme
app = Dash(__name__, external_stylesheets=[dbc.themes.QUARTZ])

# Layout of the app
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("Festival Lineup Optimizer", className='text-center text-primary mb-4'), width=12)
    ]),
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4("Filter by Day", className="card-title"),
                    dcc.Dropdown(
                        id='day-dropdown',
                        options=[{'label': day, 'value': day} for day in enriched_lineup_df['day'].unique()],
                        multi=True,
                        placeholder="Select days",
                        className='mb-4'
                    ),
                    html.H4("Filter by Stage", className="card-title"),
                    dcc.Dropdown(
                        id='stage-dropdown',
                        options=[{'label': stage, 'value': stage} for stage in enriched_lineup_df['stage'].unique()],
                        multi=True,
                        placeholder="Select stages",
                        className='mb-4'
                    ),
                    html.H4("Filter by Popularity", className="card-title"),
                    dcc.Slider(
                        id='popularity-slider',
                        min=0,
                        max=100,
                        step=1,
                        marks={i: str(i) for i in range(0, 101, 10)},
                        value=0,
                        className='mb-4'
                    ),
                ])
            ])
        ], width=4),
        dbc.Col([
            html.Div(id='artist-output')
        ], width=8)
    ])
], fluid=True, className="p-4")

# Callback to update artist list based on filters
@app.callback(
    Output('artist-output', 'children'),
    [  
        Input('day-dropdown', 'value'),
        Input('stage-dropdown', 'value'),
        Input('popularity-slider', 'value')
    ]
)
def update_artists(days, stages, popularity_threshold):
    filtered_df = enriched_lineup_df
    if days:
        filtered_df = filtered_df[filtered_df['day'].isin(days)]
    if stages:
        filtered_df = filtered_df[filtered_df['stage'].isin(stages)]
    filtered_df = filtered_df[filtered_df['popularity'] >= popularity_threshold]
    
    return dbc.ListGroup([
        dbc.ListGroupItem(f"Artist: {row['artist']} - Popularity: {row['popularity']} - Genres: {row['genres']} - Top Tracks: {row['top_tracks']}")
        for _, row in filtered_df.iterrows()
    ])

if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)