![banner](./imgs/banner.png)

In [256]:
from dotenv import load_dotenv
import requests
import pandas as pd
import numpy as np
from datetime import date
import os
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split

load_dotenv()
pd.set_option('display.max_columns', None)

In [58]:
url = ('https://accounts.spotify.com/api/token')
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
payload = {'grant_type': 'client_credentials', 'client_id': os.environ["client_id"], 'client_secret': os.environ["client_secret"]}

r = requests.post(url, headers=headers, params=payload)
access_token = r.json().get('access_token')

In [59]:
url_busca = ('https://api.spotify.com/v1/playlists/5oKUZ4iFD04mtcuA8LTgma/tracks')
headers = {'Authorization': f'Bearer {access_token}'}
payload = {'limit': '50'}
response = requests.get(url_busca, headers=headers, params=payload)
api_response = response.json()

In [216]:
tracks_data = []

In [217]:
for item in api_response['items']:
        track = item['track']
        
        track_info = {
            'track_id': track.get('id'),
            'track_name': track.get('name'),
            'track_popularity': track.get('popularity'),
            'track_duration_ms': track.get('duration_ms'),
            'track_explicit': track.get('explicit'),
            'track_spotify_url': track.get('external_urls', {}).get('spotify'),
            'album_id': track.get('album', {}).get('id'),
            'album_name': track.get('album', {}).get('name'),
            'album_type': track.get('album', {}).get('album_type'),
            'album_release_date': track.get('album', {}).get('release_date'),
            'album_total_tracks': track.get('album', {}).get('total_tracks'),
            'artist_id': track.get('artists', [{}])[0].get('id') if track.get('artists') else None,
            'artist_name': track.get('artists', [{}])[0].get('name') if track.get('artists') else None,
            'total_available_markets': len(track.get('available_markets', []))
        }

        if track.get('artists'):
            all_artists = [artist.get('name') for artist in track.get('artists') if artist.get('name')]
            track_info['all_artists'] = ', '.join(all_artists)
        else:
            track_info['all_artists'] = None
            
        tracks_data.append(track_info)

df = pd.DataFrame(tracks_data)


In [255]:
df.head(3)

Unnamed: 0,track_id,track_name,track_popularity,track_duration_ms,track_explicit,track_spotify_url,album_id,album_name,album_type,album_release_date,album_total_tracks,artist_id,artist_name,total_available_markets,all_artists,track_duration_min,period_launch
0,7yRlmQTjZ5pdtlq7ska9mj,Artista Genérico,84,150861,Explicits,https://open.spotify.com/track/7yRlmQTjZ5pdtlq...,21FP96bn6KdYTSe5zCbhZT,Eu Venci o Mundo,album,2025-06-26,16,4YqwRbMLqGHRHLS1w2ZKse,Veigh,185,"Veigh, Supernova Ent",2.51,16
1,5Nj2UVsxXAvhfYNpcre3al,FAMOSINHA,87,132922,Explicits,https://open.spotify.com/track/5Nj2UVsxXAvhfYN...,1hOcmOgX024gR2IkB7Xg26,FAMOSINHA,single,2025-02-28,1,4Kv35Xnw1ODjO2nIHLhdBY,Dj Caio Vieira,185,"Dj Caio Vieira, MC Meno K, Mc Rodrigo do CN",2.22,134
2,7EknynLJTt9YMm1HL37s5D,P do Pecado - Ao Vivo,86,192367,No-Explicits,https://open.spotify.com/track/7EknynLJTt9YMm1...,2JyYavPMFsaTRZFrc77bc3,P do Pecado (Ao Vivo),single,2025-04-29,1,6vTqEFbTtTRJsuIpzZgjxi,Grupo Menos É Mais,183,"Grupo Menos É Mais, Simone Mendes",3.21,74


In [219]:
df.describe()

Unnamed: 0,track_popularity,track_duration_ms,album_total_tracks,total_available_markets,track_duration_min
count,50.0,50.0,50.0,50.0,50.0
mean,80.4,171790.02,8.6,183.8,2.8628
std,5.198116,50680.013476,7.431318,2.996597,0.843997
min,68.0,103727.0,1.0,164.0,1.73
25%,78.0,146867.75,1.0,183.0,2.45
50%,80.5,162580.0,9.0,185.0,2.71
75%,83.75,183630.75,15.75,185.0,3.0575
max,95.0,459692.0,28.0,185.0,7.66


In [218]:
df['track_duration_min'] = (df['track_duration_ms'] / 60000).round(2)

In [220]:
duration_indicators = make_subplots(
    rows=1, cols=4,
    specs=[[{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}]]
)

duration_indicators.add_trace(go.Indicator(value=df['track_duration_min'].min(), number={'suffix': 'min', 'valueformat': '.2f'}, title='Minimum'), row=1, col=1)
duration_indicators.add_trace(go.Indicator(value=df['track_duration_min'].mean(), number={'suffix': 'min', 'valueformat': '.2f'}, title='Mean'), row=1, col=2)
duration_indicators.add_trace(go.Indicator(value=df['track_duration_min'].median(), number={'suffix': 'min', 'valueformat': '.2f'}, title='Median'), row=1, col=3)
duration_indicators.add_trace(go.Indicator(value=df['track_duration_min'].max(), number={'suffix': 'min', 'valueformat': '.2f'}, title='Maximum'), row=1, col=4)

duration_indicators.update_layout(
    title_text="<b>Track Duration</b>", 
    title_font_size=50, 
    title_x=0.5, 
    height=250, 
    margin=dict(t=130, b=30)
)

duration_indicators.show()

In [221]:
duration_box = px.box(df, x='track_duration_min')

duration_box.update_layout(
    title_text="<b>Distribution: Track Duration</b>", 
    title_font_size=50, 
    title_x=0.5, 
    xaxis_title="Track Duration In Minutes", 
    margin=dict(t=130, b=80)
)

duration_box.show()

In [222]:
popularity_indicators = make_subplots(
    rows=1, cols=4,
    specs=[[{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}]]
)

popularity_indicators.add_trace(go.Indicator(value=df['track_popularity'].min(), title="Minimum"), row=1, col=1)
popularity_indicators.add_trace(go.Indicator(value=df['track_popularity'].mean(), title="Mean"), row=1, col=2)
popularity_indicators.add_trace(go.Indicator(value=df['track_popularity'].median(), title="Median"), row=1, col=3)
popularity_indicators.add_trace(go.Indicator(value=df['track_popularity'].max(), title="Maximum"), row=1, col=4)

popularity_indicators.update_layout(
    title_text="<b>Track Popularity</b>", 
    title_font_size=50, 
    title_x=0.5, 
    height=250, 
    margin=dict(t=130, b=30)
)

popularity_indicators.show()

In [223]:
popularity_violin = px.violin(df, x='track_popularity')

popularity_violin.update_layout(
    title_text="<b>Distribution: Track Popularity</b>", 
    title_font_size=50, 
    title_x=0.5, 
    xaxis_title="Track Popularity Score (0-100)", 
    margin=dict(t=130, b=80)
)

popularity_violin.show()

In [224]:
correlation_scatter = px.scatter(df, x="track_duration_min", y="track_popularity",size='track_duration_min', color='track_popularity')

correlation_scatter.update_layout(
    title_text="<b>Track Duration vs Track Popularity</b>", 
    title_font_size=50, 
    title_x=0.5, 
    xaxis_title="Track Duration In Minutes", 
    yaxis_title="Track Popularity Score (0-100)", 
    coloraxis_colorbar=dict(title=""), 
    margin=dict(t=130, b=80)
)

correlation_scatter.show()

In [280]:
X = df[['track_duration_min']]
y = df['track_popularity']

model = LinearRegression() 
model.fit(X, y)
y_pred = model.predict(X) 

r2 = r2_score(y, y_pred)
correlation = X.iloc[:, 0].corr(y)

In [281]:
regression_indicators = make_subplots(
    rows=1, 
    cols=4,
    specs=[[{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}]]
)

regression_indicators.add_trace(go.Indicator(value=r2, title="R²", mode="number", number={'valueformat': '.3f'}), row=1, col=1)
regression_indicators.add_trace(go.Indicator(value=correlation, title="Correlation", mode="number", number={'valueformat': '.3f'}), row=1, col=2)
regression_indicators.add_trace(go.Indicator(value=model.coef_[0], title="Coefficient", mode="number", number={'valueformat': '.3f'}), row=1, col=3)
regression_indicators.add_trace(go.Indicator(value=model.intercept_, title="Intercept", mode="number", number={'valueformat': '.1f'}), row=1, col=4)


regression_indicators.update_layout(
    title_text="<b>Track Duration vs Popularity Analysis</b>", 
    title_font_size=50, 
    title_x=0.5, 
    height=250, 
    margin=dict(t=130, b=30)
)

regression_indicators.show()

In [291]:
regression_scatter = px.scatter(df, x=X.iloc[:, 0], y=y,size=X.iloc[:, 0], color=y)

regression_scatter.add_scatter(
    x=X.iloc[:, 0],
    y=y_pred,
    mode='lines',
    name='Linear Regression',
    line=dict(color='red', width=3)
)

regression_scatter.update_layout(
    title_text="<b>Duration vs Popularity - Linear Regression</b>", 
    title_font_size=40, 
    title_x=0.5, 
    xaxis_title="Track Duration In Minutes", 
    yaxis_title="Track Popularity Score (0-100)", 
    coloraxis_colorbar=dict(title=""),
    showlegend=False,
    margin=dict(t=130, b=80)
)

regression_scatter.show()

In [225]:
df['album_release_date'] = pd.to_datetime(df['album_release_date'])
df['period_launch'] = (pd.to_datetime(date.today()) - df['album_release_date']).dt.days

In [226]:
release_indicators = make_subplots(
    rows=1, 
    cols=4,
    specs=[[{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}]]
)

release_indicators.add_trace(go.Indicator(value=df['period_launch'].min(), title="Minimum"), row=1, col=1)
release_indicators.add_trace(go.Indicator(value=df['period_launch'].mean(), title="Mean"), row=1, col=2)
release_indicators.add_trace(go.Indicator(value=df['period_launch'].median(), title="Median"), row=1, col=3)
release_indicators.add_trace(go.Indicator(value=df['period_launch'].max(), title="Maximum"), row=1, col=4)

release_indicators.update_layout(
    title_text="<b>Track Launch Days</b>", 
    title_font_size=50, 
    title_x=0.5, 
    height=250, 
    margin=dict(t=130, b=30)
)

release_indicators.show()

In [227]:
release_box = px.box(df, x='period_launch')

release_box.update_layout(
    title_text="<b>Distribution: Track Launch</b>", 
    title_font_size=50, 
    title_x=0.5, 
    xaxis_title="Days Since Launch Date", 
    margin=dict(t=130, b=80)
)

release_box.show()

In [228]:
correlation_scatter = px.scatter(df, x="period_launch", y="track_popularity", color='track_popularity')

correlation_scatter.update_layout(
    title_text="<b>Track Launch Days vs Track Popularity</b>", 
    title_font_size=50, 
    title_x=0.5, 
    xaxis_title="Days Since Track Launch", 
    yaxis_title="Track Popularity Score (0-100)", 
    coloraxis_colorbar=dict(title=""), 
    margin=dict(t=130, b=80)
)

correlation_scatter.show()

In [293]:
X2 = df[['period_launch']]
y2 = df['track_popularity']

model2 = LinearRegression() 
model2.fit(X2, y2)
y_pred2 = model2.predict(X2) 

r2_2 = r2_score(y2, y_pred2)
correlation2 = X2.iloc[:, 0].corr(y2)

In [294]:
regression_indicators2 = make_subplots(
    rows=1, 
    cols=4,
    specs=[[{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}]]
)

regression_indicators2.add_trace(go.Indicator(value=r2_2, title="R²", mode="number", number={'valueformat': '.3f'}), row=1, col=1)
regression_indicators2.add_trace(go.Indicator(value=correlation2, title="Correlation", mode="number", number={'valueformat': '.3f'}), row=1, col=2)
regression_indicators2.add_trace(go.Indicator(value=model2.coef_[0], title="Coefficient", mode="number", number={'valueformat': '.3f'}), row=1, col=3)
regression_indicators2.add_trace(go.Indicator(value=model2.intercept_, title="Intercept", mode="number", number={'valueformat': '.1f'}), row=1, col=4)


regression_indicators2.update_layout(
    title_text="<b>Track Launch vs Popularity Analysis</b>", 
    title_font_size=50, 
    title_x=0.5, 
    height=250, 
    margin=dict(t=130, b=30)
)

regression_indicators2.show()

In [299]:
regression2_scatter = px.scatter(df, x=X2.iloc[:, 0], y=y2, color=y2)

regression2_scatter.add_scatter(
    x=X2.iloc[:, 0],
    y=y_pred2,
    mode='lines',
    name='Linear Regression',
    line=dict(color='red', width=3)
)

regression2_scatter.update_layout(
    title_text="<b>Launch vs Popularity - Linear Regression</b>", 
    title_font_size=40, 
    title_x=0.5, 
    xaxis_title="Days Since Track Launch", 
    yaxis_title="Track Popularity Score (0-100)", 
    coloraxis_colorbar=dict(title=""),
    showlegend=False,
    margin=dict(t=130, b=80)
)

regression2_scatter.show()

In [230]:
tracks_explicit = df['track_explicit'].value_counts()

In [229]:
df['track_explicit'] = df['track_explicit'].map({
    True: 'Explicits', 
    False: 'No-Explicits'
})

In [237]:
explicit_pie = px.pie(values=tracks_explicit.values, names=tracks_explicit.index, hole=0.4, color_discrete_sequence=['#accdf5', '#3e8ff1'])

explicit_pie.update_traces(
    textposition='inside',
    textinfo='percent+label',
    hovertemplate='<b>%{label}</b><br>Quantity: %{value}<br>Percent: %{percent}<extra></extra>'
)

explicit_pie.update_layout(
    showlegend=False,
    title_text='<b>Explicits vs No Explicits</b>',
    title_font_size=50, 
    title_x=0.5,
    margin=dict(t=130, b=40)
)
explicit_pie.show()

In [242]:
top_5_artists = df['artist_name'].value_counts().head(5).index.tolist()
df_top5 = df[df['artist_name'].isin(top_5_artists)]
sunburst_data = df_top5.groupby(['artist_name', 'track_name']).size().reset_index(name='count')

In [253]:
sunburst = px.sunburst(sunburst_data, path=['artist_name', 'track_name'])

sunburst.update_layout(
    height=650,
    title_text='<b>Top 5 Artist With More Tracks</b>',
    title_font_size=50, 
    title_x=0.5,
    margin=dict(t=130, b=40)
)
sunburst.show()

In [257]:
df['num_artists'] = df['all_artists'].str.count(',') + 1

In [301]:
X3 = df[['num_artists']]
y3 = df['track_popularity']

model3 = LinearRegression()
model3.fit(X3, y3)
y_pred3 = model3.predict(X3)

r2_3 = r2_score(y3, y_pred3)
correlation3 = X3.iloc[:, 0].corr(y3)

In [304]:
regression_indicators3 = make_subplots(
    rows=1, 
    cols=4,
    specs=[[{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}]]
)

regression_indicators3.add_trace(go.Indicator(value=r2_3, title="R²", mode="number", number={'valueformat': '.3f'}), row=1, col=1)
regression_indicators3.add_trace(go.Indicator(value=correlation3, title="Correlation", mode="number", number={'valueformat': '.3f'}), row=1, col=2)
regression_indicators3.add_trace(go.Indicator(value=model3.coef_[0], title="Coefficient", mode="number", number={'valueformat': '.3f'}), row=1, col=3)
regression_indicators3.add_trace(go.Indicator(value=model3.intercept_, title="Intercept", mode="number", number={'valueformat': '.1f'}), row=1, col=4)


regression_indicators3.update_layout(
    title_text="<b>Qty of Artists vs Popularity Analysis</b>", 
    title_font_size=50, 
    title_x=0.5, 
    height=250, 
    margin=dict(t=130, b=30)
)

regression_indicators3.show()

In [306]:
regression3_scatter = px.scatter(df, x=X3.iloc[:, 0], y=y3, color=y3)

regression3_scatter.add_scatter(
    x=X3.iloc[:, 0],
    y=y_pred3,
    mode='lines',
    name='Linear Regression',
    line=dict(color='red', width=3)
)

regression3_scatter.update_layout(
    title_text="<b>Qty of Artists vs Popularity - Linear Regression</b>", 
    title_font_size=40, 
    title_x=0.5, 
    xaxis_title="Quantity of Artist", 
    yaxis_title="Track Popularity Score (0-100)", 
    coloraxis_colorbar=dict(title=""),
    showlegend=False,
    margin=dict(t=130, b=80)
)

regression3_scatter.show()