## Spotify analysis on Italy

In [4]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import re, glob
import os, sys
from scipy import stats

import spotipy      
from spotipy.oauth2 import SpotifyClientCredentials

# Import Keys from the Dashboard client
from config import ckey, skey


In [5]:
# Setting up Spotify API info
cid = ckey
secret = skey

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## Instructions
* TIP: This next section assumes that you have already downloaded the csv files with the Top200 charts for the country you are working on:

* Create a subfolder in the folder this notebook is located in called "input_files". Add the owid-covid-data.csv file there, you'll need that later. Then make another subfolder inside input_files called "spotify_top200_charts". Save the csv files you download there.
* Go to https://spotifycharts.com
* Choose the country you want to work on.
* Download Weekly Top200 charts for 2019 and 2020, 1 chart per month. We agreed as a group to download the chart from last week of each month, to keep things consistent. Save them in the "spotify_top200_charts" folder you set up.

In [6]:
# Create a Dataframe from weekly chart data

path= r"input_files/spotify_top200_charts/*.csv"

# Set up main dataframe with data from FIRST file in each folder
country_tracks_df = pd.read_csv(glob.glob(path)[0], header=1) 

# This line extracts the year and month from the file name, 
# and sets up new columns called 'Year' and 'Moth' with that info
string = str(glob.glob(path)[0])
year_month =  re.findall(r"ly-(\d\d\d\d)-(\d\d)-\d\d", string)                                         
country_tracks_df[['Year']] = year_month[0][0]
country_tracks_df[['Month']] = year_month[0][1]

for file in glob.glob(path)[1:]:
        temp_df = pd.read_csv(file, header=1)
        string = str(file) 
        year_month = re.findall(r"ly-(\d\d\d\d)-(\d\d)-\d\d", string)
        
        # Print (year_month)
        temp_df[['Year']] = year_month[0][0]
        temp_df[['Month']] = year_month[0][1]
        country_tracks_df = pd.concat([country_tracks_df,temp_df])
             
country_tracks_df = country_tracks_df.sort_values(['Year','Month'])                                  
country_tracks_df       


Unnamed: 0,Position,Track Name,Artist,Streams,URL,Year,Month
0,1,IL CIELO NELLA STANZA (feat. NSTASIA),Salmo,2885889,https://open.spotify.com/track/2fkVZYFv9hOejIs...,2018,12
1,2,Happy Birthday,Sfera Ebbasta,2182720,https://open.spotify.com/track/7Cw97917dvg5xm6...,2018,12
2,3,Torna a casa,Måneskin,1857579,https://open.spotify.com/track/3590AAEoqH50z4U...,2018,12
3,4,La fine del mondo,Anastasio,1836968,https://open.spotify.com/track/3FhWEh7VTAOYK4e...,2018,12
4,5,All I Want for Christmas Is You,Mariah Carey,1599867,https://open.spotify.com/track/0bYg9bo50gSsH3L...,2018,12
...,...,...,...,...,...,...,...
195,196,È sempre bello,Coez,215634,https://open.spotify.com/track/6IdiuMw1FSAvU3e...,2021,01
196,197,BABY (feat. Rosa Chemical),BLOODY VINYL,215551,https://open.spotify.com/track/3jtMheZu2P0tbWz...,2021,01
197,198,Lacri-ma,Gazzelle,215159,https://open.spotify.com/track/0N6QZADGv1zv3Jb...,2021,01
198,199,Cyborg (feat. Geolier),Guè Pequeno,214811,https://open.spotify.com/track/54wRUYPMEHLttFd...,2021,01


'2019'

Unnamed: 0,Position,Track Name,Artist,Streams,URL
0,1,È sempre bello,Coez,3438470,https://open.spotify.com/track/18LXrvm337xqvFx...
1,2,IL CIELO NELLA STANZA (feat. NSTASIA),Salmo,2571770,https://open.spotify.com/track/2fkVZYFv9hOejIs...
2,3,Calma - Remix,Pedro Capó,2051691,https://open.spotify.com/track/5iwz1NiezX7WWjn...
3,4,Happy Birthday,Sfera Ebbasta,1680645,https://open.spotify.com/track/7Cw97917dvg5xm6...
4,5,Holding out for You (feat. Zara Larsson),Fedez,1584324,https://open.spotify.com/track/4EZhkTdGDdlLoHI...
...,...,...,...,...,...
195,196,Imposible,Luis Fonsi,235055,https://open.spotify.com/track/3ao3OVxHlb3C08v...
196,197,Sister (Pastiglie),Chadia Rodriguez,234699,https://open.spotify.com/track/3PJFpY6SkQ0juJA...
197,198,God's Plan,Drake,233303,https://open.spotify.com/track/6DCZcSspjsKoFjz...
198,199,Centro (feat. Coez),MadMan,233119,https://open.spotify.com/track/6DGl9TtxYTpMFrm...


In [9]:
print(glob.glob(path)[0])


input_files/spotify_top200_charts/regional-it-weekly-2019-01-11--2019-01-18.csv


In [10]:
# Get Track ID's

# Set up list of tacks to iterate through 
track_names = country_tracks_df['Track Name'].to_list()   
# Empty list to record track IDs into                
track_ids = []                                                    
 # Heads up: with 4800 tracks to process, this takes a while 
for track in track_names:                                        
    song_results = sp.search(q=track, type='track', limit=1)      
    try:
        # Prevents program from blowing up - few tracks
        track_ids.append(song_results['tracks']['items'][0]['id']) 
        # Just to let you know it's working lack track ids
        print (f"{track} song ID : {song_results['tracks']['items'][0]['id']}")   
    except IndexError:                                            
        track_ids.append(np.nan)

t. Dermot Kennedy) song ID : 6ft4hAq6yde8jPZY2i5zLr
Whoopty song ID : 5vGLcdRuSbUhD8ScwsGSdA
VENT'ANNI song ID : 34r4voojt51Kk6ax5FJI1M
Cuore Nero - prod. Frenetik&Orang3 song ID : 0ROJX4yr64AlknCaX71JDQ
Chico (feat. Rose Villain & Luchè) song ID : 3Xhl7OqWAFgVxQKrWhyVVO
A Un Passo Dalla Luna song ID : 3YNcQUeVOpM3SDmwBeGfMK
MIA song ID : 116H0KvKr2Zl4RPuVBruDO
The Business song ID : 6f3Slt0GbA2bPZlz0aIFXN
Estate song ID : 18NxSIKcBgidhFiY37s9tC
M' Manc (con Geolier & Sfera Ebbasta) song ID : 0MLu1cFYL4ikdRTdasCQT9
$€ Freestyle song ID : 5BbdKBZO0TH0GhfxUfyhL9
guccy bag song ID : 755xh1CnKw8zg2m3Uv4TEX
Blue Jeans (feat. Calcutta) song ID : 3teW1cB7e8HMmtHUeWF7Je
WILDPIRATA (feat. Tedua) – prod. Garelli & Chryverde song ID : 6iFxX0SdHKxUhiyLW9PtCy
Hypnotized song ID : 3LN41NUdHkyNqQhi9gExMm
Crepe song ID : 6L671UzJPyAzbHEhaNhDUJ
Hollywood (feat. Diplo) song ID : 0WXU7I74NEN1DMLIT406Wx
Take You Dancing song ID : 59qrUpoplZxbIZxk6X0Bm3
Therefore I Am song ID : 5YopkHj2UkfCdUNA5oPsI7
Barce

## Add an "output_files" folder to export 

In [16]:
# Add Track IDs to dataframe
# Add new column with track IDs
country_tracks_df['Track ID'] = track_ids                  

In [17]:
# Drop empty songs and export dataframe to csv to back it up

# Use .dropna() to remove rows with missing data
clean_country_tracks_df = country_tracks_df.dropna(how='any')    
# Back up to .csv       
clean_country_tracks_df.to_csv("output_files/1_tracks_with_track_ids.csv", index = False)  

In [14]:
clean_country_tracks_df

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Year,Month,Track ID
0,1,IL CIELO NELLA STANZA (feat. NSTASIA),Salmo,2885889,https://open.spotify.com/track/2fkVZYFv9hOejIs...,2018,12,2fkVZYFv9hOejIsLzZy8ad
1,2,Happy Birthday,Sfera Ebbasta,2182720,https://open.spotify.com/track/7Cw97917dvg5xm6...,2018,12,2bnTgPFD75vbq1AhxLJfMu
2,3,Torna a casa,Måneskin,1857579,https://open.spotify.com/track/3590AAEoqH50z4U...,2018,12,3590AAEoqH50z4UmhMIY85
3,4,La fine del mondo,Anastasio,1836968,https://open.spotify.com/track/3FhWEh7VTAOYK4e...,2018,12,3c5ZPBOMDO1HLHRrQEwpNW
4,5,All I Want for Christmas Is You,Mariah Carey,1599867,https://open.spotify.com/track/0bYg9bo50gSsH3L...,2018,12,0bYg9bo50gSsH3LtXe2SQn
...,...,...,...,...,...,...,...,...
195,196,È sempre bello,Coez,215634,https://open.spotify.com/track/6IdiuMw1FSAvU3e...,2021,01,6IdiuMw1FSAvU3e6bgWQVX
196,197,BABY (feat. Rosa Chemical),BLOODY VINYL,215551,https://open.spotify.com/track/3jtMheZu2P0tbWz...,2021,01,3jtMheZu2P0tbWzAYYWhvS
197,198,Lacri-ma,Gazzelle,215159,https://open.spotify.com/track/0N6QZADGv1zv3Jb...,2021,01,0N6QZADGv1zv3JbVIX6ctx
198,199,Cyborg (feat. Geolier),Guè Pequeno,214811,https://open.spotify.com/track/54wRUYPMEHLttFd...,2021,01,54wRUYPMEHLttFdLImOlAt


In [18]:
# Continue from the backup csv file in case there is some kind of interruption to the notebook and you lose the 
# data from the API calls.

country_track_ids_df = pd.read_csv("output_files/1_tracks_with_track_ids.csv")
country_track_ids_df

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Year,Month,Track ID
0,1,IL CIELO NELLA STANZA (feat. NSTASIA),Salmo,2885889,https://open.spotify.com/track/2fkVZYFv9hOejIs...,2018,12,2fkVZYFv9hOejIsLzZy8ad
1,2,Happy Birthday,Sfera Ebbasta,2182720,https://open.spotify.com/track/7Cw97917dvg5xm6...,2018,12,2bnTgPFD75vbq1AhxLJfMu
2,3,Torna a casa,Måneskin,1857579,https://open.spotify.com/track/3590AAEoqH50z4U...,2018,12,3590AAEoqH50z4UmhMIY85
3,4,La fine del mondo,Anastasio,1836968,https://open.spotify.com/track/3FhWEh7VTAOYK4e...,2018,12,3c5ZPBOMDO1HLHRrQEwpNW
4,5,All I Want for Christmas Is You,Mariah Carey,1599867,https://open.spotify.com/track/0bYg9bo50gSsH3L...,2018,12,0bYg9bo50gSsH3LtXe2SQn
...,...,...,...,...,...,...,...,...
3963,196,È sempre bello,Coez,215634,https://open.spotify.com/track/6IdiuMw1FSAvU3e...,2021,1,6IdiuMw1FSAvU3e6bgWQVX
3964,197,BABY (feat. Rosa Chemical),BLOODY VINYL,215551,https://open.spotify.com/track/3jtMheZu2P0tbWz...,2021,1,3jtMheZu2P0tbWzAYYWhvS
3965,198,Lacri-ma,Gazzelle,215159,https://open.spotify.com/track/0N6QZADGv1zv3Jb...,2021,1,0N6QZADGv1zv3JbVIX6ctx
3966,199,Cyborg (feat. Geolier),Guè Pequeno,214811,https://open.spotify.com/track/54wRUYPMEHLttFd...,2021,1,54wRUYPMEHLttFdLImOlAt


In [24]:
# Use API again to get audio features

danceability = []                                     # Set up empty lists to store data in
energy = []
valence = []
loudness = []
key = []
mode = []
speechiness = []
acousticness = []
instrumentalness =[]
liveness = []
tempo = []
time_signature = []

for track in country_track_ids_df['Track ID']:           # Heads up: this takes a long time
    
    try:
        feat_results = sp.audio_features([track])
        danceability.append(feat_results[0]['danceability'])
        energy.append(feat_results[0]['energy'])
        valence.append(feat_results[0]['valence'])
        loudness.append(feat_results[0]['loudness'])
        key.append(feat_results[0]['key'])
        mode.append(feat_results[0]['mode'])
        speechiness.append(feat_results[0]['speechiness'])
        acousticness.append(feat_results[0]['acousticness'])
        instrumentalness.append(feat_results[0]['instrumentalness'])
        liveness.append(feat_results[0]['liveness'])
        tempo.append(feat_results[0]['tempo'])
        time_signature.append(feat_results[0]['time_signature'])


        #print (f"{track} Valence Score: {feat_results[0]['valence']}")  # Just to let you see it working                                 
    
    except TypeError:                                 # Covers you in case there is missing data
        danceability.append(np.nan)
        energy.append(np.nan)
        valence.append(np.nan)
        loudness.append(np.nan)
        key.append(np.nan)
        mode.append(np.nan)
        speechiness.append(np.nan)
        acousticness.append(np.nan)
        instrumentalness.append(np.nan)
        liveness.append(np.nan)
        tempo.append(np.nan)
        time_signature.append(np.nan)


In [25]:
# Add audio features to dataframe

# Add new columns with audio features
country_track_ids_df['Danceability'] = danceability     
country_track_ids_df['Valence'] = energy
country_track_ids_df['Energy'] = valence

country_track_ids_df['Loudness'] = loudness     
country_track_ids_df['Key'] = key
country_track_ids_df['Mode'] = mode
country_track_ids_df['Speechiness'] = speechiness     
country_track_ids_df['Acousticness'] = acousticness
country_track_ids_df['Instrumentalness'] = instrumentalness
country_track_ids_df['Liveness'] = liveness     
country_track_ids_df['Tempo'] = tempo
country_track_ids_df['Time_Signature'] = time_signature

In [26]:
# Back up dataframe again to .csv

 # Use .dropna() to remove rows with missing data
clean_country_track_ids_df = country_track_ids_df.dropna(how='any')     
#Back up the dataframe to csv again    
clean_country_track_ids_df.to_csv("output_files/2_tracks_with_audio_features.csv", index=False) 

clean_country_track_ids_df

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Year,Month,Track ID,Danceability,Valence,Energy,Loudness,Key,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Tempo,Time_Signature
0,1,IL CIELO NELLA STANZA (feat. NSTASIA),Salmo,2885889,https://open.spotify.com/track/2fkVZYFv9hOejIs...,2018,12,2fkVZYFv9hOejIsLzZy8ad,0.755,0.501,0.267,-9.551,1,1,0.1890,0.0867,0.000001,0.1460,134.998,4
1,2,Happy Birthday,Sfera Ebbasta,2182720,https://open.spotify.com/track/7Cw97917dvg5xm6...,2018,12,2bnTgPFD75vbq1AhxLJfMu,0.866,0.387,0.790,-11.011,4,1,0.0345,0.4030,0.000000,0.1450,94.976,3
2,3,Torna a casa,Måneskin,1857579,https://open.spotify.com/track/3590AAEoqH50z4U...,2018,12,3590AAEoqH50z4UmhMIY85,0.425,0.638,0.450,-3.184,1,0,0.0759,0.4260,0.000000,0.1770,81.396,4
3,4,La fine del mondo,Anastasio,1836968,https://open.spotify.com/track/3FhWEh7VTAOYK4e...,2018,12,3c5ZPBOMDO1HLHRrQEwpNW,0.830,0.667,0.406,-8.125,8,0,0.1680,0.3250,0.000006,0.1170,114.989,4
4,5,All I Want for Christmas Is You,Mariah Carey,1599867,https://open.spotify.com/track/0bYg9bo50gSsH3L...,2018,12,0bYg9bo50gSsH3LtXe2SQn,0.336,0.627,0.350,-7.463,7,1,0.0384,0.1640,0.000000,0.0708,150.273,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3963,196,È sempre bello,Coez,215634,https://open.spotify.com/track/6IdiuMw1FSAvU3e...,2021,1,6IdiuMw1FSAvU3e6bgWQVX,0.692,0.630,0.499,-6.792,8,1,0.1640,0.0732,0.000000,0.0865,110.919,5
3964,197,BABY (feat. Rosa Chemical),BLOODY VINYL,215551,https://open.spotify.com/track/3jtMheZu2P0tbWz...,2021,1,3jtMheZu2P0tbWzAYYWhvS,0.843,0.731,0.627,-4.636,1,1,0.1050,0.0469,0.000000,0.1980,129.954,4
3965,198,Lacri-ma,Gazzelle,215159,https://open.spotify.com/track/0N6QZADGv1zv3Jb...,2021,1,0N6QZADGv1zv3JbVIX6ctx,0.645,0.719,0.574,-5.515,3,0,0.0226,0.1360,0.000883,0.1880,100.004,4
3966,199,Cyborg (feat. Geolier),Guè Pequeno,214811,https://open.spotify.com/track/54wRUYPMEHLttFd...,2021,1,54wRUYPMEHLttFdLImOlAt,0.830,0.606,0.740,-5.917,7,1,0.2830,0.1320,0.000000,0.0877,139.972,4


In [34]:
# Continue from the backup csv file in case there is some kind of interruption to the notebook and you lose the 
# data from the API calls.

country_tracks_data_df = pd.read_csv("output_files/2_tracks_with_audio_features.csv")

country_tracks_data_df.head()

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Year,Month,Track ID,Danceability,Valence,Energy,Loudness,Key,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Tempo,Time_Signature
0,1,IL CIELO NELLA STANZA (feat. NSTASIA),Salmo,2885889,https://open.spotify.com/track/2fkVZYFv9hOejIs...,2018,12,2fkVZYFv9hOejIsLzZy8ad,0.755,0.501,0.267,-9.551,1,1,0.189,0.0867,1e-06,0.146,134.998,4
1,2,Happy Birthday,Sfera Ebbasta,2182720,https://open.spotify.com/track/7Cw97917dvg5xm6...,2018,12,2bnTgPFD75vbq1AhxLJfMu,0.866,0.387,0.79,-11.011,4,1,0.0345,0.403,0.0,0.145,94.976,3
2,3,Torna a casa,Måneskin,1857579,https://open.spotify.com/track/3590AAEoqH50z4U...,2018,12,3590AAEoqH50z4UmhMIY85,0.425,0.638,0.45,-3.184,1,0,0.0759,0.426,0.0,0.177,81.396,4
3,4,La fine del mondo,Anastasio,1836968,https://open.spotify.com/track/3FhWEh7VTAOYK4e...,2018,12,3c5ZPBOMDO1HLHRrQEwpNW,0.83,0.667,0.406,-8.125,8,0,0.168,0.325,6e-06,0.117,114.989,4
4,5,All I Want for Christmas Is You,Mariah Carey,1599867,https://open.spotify.com/track/0bYg9bo50gSsH3L...,2018,12,0bYg9bo50gSsH3LtXe2SQn,0.336,0.627,0.35,-7.463,7,1,0.0384,0.164,0.0,0.0708,150.273,4


## Stats
* Danceability: A value of 0.0 is least danceable and 1.0 is most danceable.
* Acousticness: A measure from 0.0 to 1.0 of whether the track is acoustic.
* Energy: Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity.
* Instrumentalness: Predicts whether a track contains no vocals. The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content.
* Liveness: Detects the presence of an audience in the recording. Higher liveness values represent an increased probability that the track was performed live.
* Loudness: The overall loudness of a track in decibels (dB). Loudness values are averaged across the entire track. Values typical range between -60 and 0 db.

* Speechiness: Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g. talk show, audio book, poetry), the closer to 1.0 the attribute value.
* Tempo: The overall estimated tempo of a track in beats per minute (BPM). In musical terminology, tempo is the speed or pace of a given piece and derives directly from the average beat duration.
* Valence: A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry).



In [37]:
country_tracks_data_df.pivot()


AttributeError: 'DataFrame' object has no attribute 'column'

In [35]:
stats = country_tracks_data_df.describe()
stats

Unnamed: 0,Position,Streams,Year,Month,Danceability,Valence,Energy,Loudness,Key,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Tempo,Time_Signature
count,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0,3968.0
mean,100.230595,569204.3,2019.101562,4.205141,0.682665,0.649155,0.483845,-6.507061,5.218498,0.602067,0.12889,0.246267,0.016008,0.174507,120.275976,3.965726
std,57.814416,491993.5,0.701123,4.022467,0.13604,0.151804,0.217106,2.770585,3.670496,0.489533,0.108246,0.226275,0.104766,0.135991,28.640095,0.277434
min,1.0,209308.0,2018.0,1.0,0.102,0.029,0.036,-32.465,0.0,0.0,0.0226,9e-06,0.0,0.0257,62.007,1.0
25%,50.0,300173.0,2019.0,1.0,0.593,0.55125,0.323,-7.513,1.0,0.0,0.0481,0.0699,0.0,0.0927,96.052,4.0
50%,100.0,404589.0,2019.0,3.0,0.7045,0.669,0.48,-6.097,5.0,1.0,0.07805,0.175,0.0,0.124,120.042,4.0
75%,150.0,629501.2,2019.0,10.0,0.777,0.762,0.643,-5.001,8.0,1.0,0.187,0.362,5e-06,0.208,140.038,4.0
max,200.0,8223614.0,2021.0,12.0,0.939,0.986,0.981,-1.284,11.0,1.0,0.622,0.996,0.961,0.921,211.842,5.0


In [42]:
# Use groupby to get average valence of the 200 songs in each month

country_tracks_data_groupby = country_tracks_data_df.groupby(["Year", 'Month'], as_index=False)['Tempo'].mean()

country_tracks_data_groupby

Unnamed: 0,Year,Month,Tempo
0,2018,12,120.904982
1,2019,1,120.734573
2,2019,2,120.148231
3,2019,3,120.38682
4,2019,10,118.657473
5,2019,11,117.65071
6,2021,1,121.659398


In [43]:
# Set up some basic plt formatting configurations

plt.rc('font', size=12)
plt.rc('axes', labelsize=15)
plt.rc('axes', titlesize=20)