## Table of Contents
<ul>
<li><a href="#Data Gathering">Data Gathering</a></li>
<li><a href="#Data Assessment">Data Assessment</a></li>
<li><a href="#Data Cleaning">Data Cleaning</a></li>
<li><a href="#Storing Data">Storing Data</a></li>
</ul>

<a id='Intro'></a>
# Revelation in Sound: Analyzing Nathaniel Bassey's Tracks on Spotify

---
---
---

In [1]:
# Importing necessary libraries

import spotipy
import pandas as pd
import numpy as np
from timeit import default_timer as timer
from datetime import timedelta
from pandas.api.types import CategoricalDtype

<a id='Data Gathering'></a>
# Data Gathering

In [25]:
# Spotify API tokens for access
from spotipy.oauth2 import SpotifyClientCredentials

client_id= 'CLIENT_ID'
client_secret = 'CLIENT_SECRET'
client_credentials_manager = SpotifyClientCredentials(
                                client_id = client_id, client_secret = client_secret)

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [26]:
# function to know how long it'll take to scrape the data

def format_time(seconds):
    minutes = seconds // 60
    seconds = seconds % 60
    if minutes > 0:
        return f"{minutes} minutes, {seconds} seconds"
    else:
        return f"{seconds} seconds"

In [27]:
# create empty lists to store data
artist_name = []
track_name = []
track_id = []
album_name = []
album_id = []
release_date = []
duration_ms = []
popularity = []
explicit = []
danceability = []
energy = []
key = []
loudness = []
genres = []
mode = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
time_signature = []
featured_artists = []

In [28]:
# start time
start_time = timer()

# For Nathaniel Bassey as a main artist
# loop through results, using offset to get all tracks
for i in range(0, 1000, 50):
    try:
        track_results = sp.search(q='artist:nathaniel bassey', type='track', limit=50, offset=i)
        for i, t in enumerate(track_results['tracks']['items']):
    
            # get track details
            artist_name.append(t['artists'][0]['name'])
            track_name.append(t['name'])
            track_id.append(t['id'])
            album_name.append(t['album']['name'])
            album_id.append(t['album']['id'])
            release_date.append(t['album']['release_date'])
            popularity.append(t['popularity'])
            explicit.append(t['explicit'])

            # get audio features for track
            audio_features = sp.audio_features(t['id'])[0]
            danceability.append(audio_features['danceability'])
            duration_ms.append(audio_features['duration_ms'])
            energy.append(audio_features['energy'])
            key.append(audio_features['key'])
            loudness.append(audio_features['loudness'])
            mode.append(audio_features['mode'])
            speechiness.append(audio_features['speechiness'])
            acousticness.append(audio_features['acousticness'])
            instrumentalness.append(audio_features['instrumentalness'])
            liveness.append(audio_features['liveness'])
            valence.append(audio_features['valence'])
            tempo.append(audio_features['tempo'])
            time_signature.append(audio_features['time_signature'])


            # get featured artists
            if len(t['artists']) > 1:
                feat_artists = []
                for j in range(1, len(t['artists'])):
                    feat_artists.append(t['artists'][j]['name'])
                featured_artists.append(feat_artists)
            else:
                featured_artists.append([])
    except ReadTimeout as e:
        print(f"Error: {e}. Retrying in 5 seconds...")
        time.sleep(5)  # Retry after a short delay
            
                
                
# create dataframe from lists
df = pd.DataFrame({
    'artist_name': artist_name,
    'track_name': track_name,
    'track_id': track_id,
    'album_name': album_name,
    'album_id': album_id,
    'release_date': release_date,
    'duration_ms': duration_ms,
    'popularity': popularity,
    'explicit': explicit,
    'danceability': danceability,
    'energy': energy,
    'key': key,
    'loudness': loudness,
    'mode': mode,
    'speechiness': speechiness,
    'acousticness': acousticness,
    'instrumentalness': instrumentalness,
    'liveness': liveness,
    'valence': valence,
    'tempo': tempo,
    'time_signature': time_signature,
    'featured_artists': featured_artists
})



# end time and print
end_time = timer()
elapsed_time = int(end_time - start_time)
print(f"Elapsed time: {format_time(elapsed_time)}")

Elapsed time: 41 seconds


In [29]:
# store dataframe as csv file
df.to_csv('Nathaniel Bassey Spotify Tracks.csv', index=False)

---
<a id='Data Assessment'></a>
# Data Assessment

In [60]:
# loading the metadata of the tracks that exist as at the time this notebook is being written

df = pd.read_csv('Nathaniel Bassey Spotify Tracks.csv')

In [61]:
df.info()

shape = df.shape
print(f"\n\n The data set has {shape[0]} rows and {shape[1]} columns.")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   artist_name       150 non-null    object 
 1   track_name        150 non-null    object 
 2   track_id          150 non-null    object 
 3   album_name        150 non-null    object 
 4   album_id          150 non-null    object 
 5   release_date      150 non-null    object 
 6   duration_ms       150 non-null    int64  
 7   popularity        150 non-null    int64  
 8   explicit          150 non-null    bool   
 9   danceability      150 non-null    float64
 10  energy            150 non-null    float64
 11  key               150 non-null    int64  
 12  loudness          150 non-null    float64
 13  mode              150 non-null    int64  
 14  speechiness       150 non-null    float64
 15  acousticness      150 non-null    float64
 16  instrumentalness  150 non-null    float64
 1

In [62]:
# View first 15 rows with all columns
pd.options.display.max_columns = None 
df.head(15)

Unnamed: 0,artist_name,track_name,track_id,album_name,album_id,release_date,duration_ms,popularity,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,featured_artists
0,Nathaniel Bassey,Olorun Agbaye - You Are Mighty,0gDyskuhux7JfUcGzn0gY9,Hallelujah Again (Revelation 19:3),5HjyQX3GmSbNBL1bG3D6Gf,2021-04-01,436321,58,False,0.345,0.698,10,-6.186,1,0.0465,0.116,1e-06,0.348,0.188,123.922,3,"['Chandler Moore', 'Oba']"
1,Nathaniel Bassey,You Are God (feat. Chigozie Achugo),78D1ZERxVW4XvelhO1jG4K,This God Is Too Good,5RCbwQMe3LwArxqneTQMTg,2016-10-03,271438,55,False,0.487,0.641,0,-4.704,1,0.0351,0.0501,2e-06,0.183,0.434,123.955,4,['Chigozie Achugo']
2,Nathaniel Bassey,Adonai,7axctArGc7dtaEJSl3IZge,Names of God,3WCZbOcvzRlzyEnRVPtKQF,2022-02-05,511207,56,False,0.269,0.711,0,-5.951,0,0.0379,0.00366,0.00215,0.442,0.157,129.583,3,[]
3,Nathaniel Bassey,Olorun Agbaye - You Are Mighty,05o0RoSQ1pQFvHe54uFUZf,Olorun Agbaye - You Are Mighty,0bquI98KbNHqOrpGFXpNyS,2020-10-30,438744,49,False,0.346,0.696,10,-6.207,1,0.0464,0.116,1e-06,0.335,0.184,123.468,3,"['Chandler Moore', 'Oba']"
4,Nathaniel Bassey,Imela (feat. Enitan Adaba),50OHI0yJ1ti2jTWySMz6jY,The Son of God (& Imela),17QFKUXHOJcDzNoDijDcXn,2014-12-01,354573,53,False,0.366,0.522,6,-8.552,1,0.0319,0.146,0.00051,0.12,0.325,150.101,4,['Enitan Adaba']
5,Nathaniel Bassey,See What the Lord Has Done,3BPPakEzZWw0pQQzuQfvBt,Names of God,3WCZbOcvzRlzyEnRVPtKQF,2022-02-05,454829,54,False,0.265,0.539,8,-7.433,1,0.0315,0.025,0.0,0.371,0.321,139.699,4,[]
6,Nathaniel Bassey,Yeshua Hamashiach,6xmcjy9fiHc5UZlUZDg5uy,The King Is Coming,1zi8tByoRTWt088vDbI6OG,2019-11-29,464306,52,False,0.551,0.886,9,-4.766,0,0.0578,0.0726,1.7e-05,0.0896,0.346,147.995,4,[]
7,Nathaniel Bassey,Onise Iyanu (feat. Micah Stampley),0S7l0Nz5MztXjkNTppknRp,This God Is Too Good,5RCbwQMe3LwArxqneTQMTg,2016-10-03,366498,52,False,0.284,0.796,7,-4.604,1,0.0504,0.203,0.0,0.391,0.309,144.214,4,['Micah Stampley']
8,Nathaniel Bassey,Tobechukwu - Live,79zFqU5mmz9wnuKfjaiPJp,HALLELUJAH LIVE,5xe3knii9RbZzikwkiIqBY,2023-03-24,488339,53,False,0.425,0.715,10,-6.247,1,0.0733,0.381,0.0,0.0956,0.686,75.374,4,['MERCY CHINWO BLESSED']
9,Nathaniel Bassey,Ebenezer - Live,1NIsNb7J8yfy9TYE1ycKMp,HALLELUJAH LIVE,5xe3knii9RbZzikwkiIqBY,2023-03-24,511197,53,False,0.544,0.858,7,-4.976,1,0.0454,0.156,0.0,0.47,0.48,143.979,3,['VICTORIA ORENZE']


In [63]:
df.album_name.sort_values(ascending=True).unique()

array(['Alagbada Ina (feat. Victoria Orenze)', 'Call on Me',
       'Glorious God (feat. Chimdi Ochei)', 'God of Love',
       'Green Worship 1.0 (Live)', 'Green Worship 2.0',
       'Green Worship 3.0', 'Green Worship 4.0', 'HALLELUJAH LIVE',
       'Hallelujah Again (Revelation 19:3)', 'Halleluya Eh (Live)',
       'I Love You / Ama Medley', 'Iba',
       'Imela. "Thank You" (feat. Enitan Adaba)',
       'Incredible Love (feat. Chris Morgan & Simpa Adaba)',
       'Jesus: The Resurrection & the Life', 'Names of God',
       'New Jazz Stew Project: Revelation',
       'Olorun Agbaye - You Are Mighty', 'Olowogbogboro',
       'Onise Iyanu (feat. Glorious Fountain Choir & Micah Stampley)',
       'Powerful Worship', 'Revival Flames',
       'See What the Lord Has Done (Live)', "Someone's At The Door",
       'TOBECHUKWU', 'The Blood', 'The King Is Coming',
       'The Son of God (& Imela)', 'This God Is Too Good',
       'This God Is Too Good (feat. Micah Stampley)', 'Wonderful Wonder',

A random sample of the featured artists.

In [64]:
df.featured_artists.sample(n=20, random_state=123)

72     ['NTOKOZO MBAMBO', 'MERCY CHINWO BLESSED']
112                                            []
132                                            []
88                                             []
37                                  ['AIDEE IME']
138                                            []
87                                             []
42                            ['Jumoke Oshoboke']
8                        ['MERCY CHINWO BLESSED']
90                                             []
141                                            []
33                       ['OYINKAN KOKU-BAZUAYE']
59             ['Mahalia Buchanan', 'Joe Mettle']
116               ['Chris Morgan', 'Simpa Adaba']
135                                            []
104                             ['Mayra Alvarez']
36                                             []
13           ['Dunsin Oyekan', 'Dasola Akinbule']
63                                             []
45                                             []


In [65]:
df.artist_name.unique()

array(['Nathaniel Bassey'], dtype=object)

In [66]:
df[df['artist_name'] != 'Nathaniel Bassey'].head(50)

Unnamed: 0,artist_name,track_name,track_id,album_name,album_id,release_date,duration_ms,popularity,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,featured_artists


In [67]:
df[df['track_name'].duplicated(keep=False)].sort_values(by=['track_name', 'popularity'], ascending=[True, False])

Unnamed: 0,artist_name,track_name,track_id,album_name,album_id,release_date,duration_ms,popularity,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,featured_artists
16,Nathaniel Bassey,Alagbada Ina (feat. Victoria Orenze),5nYZcBMWmElPweuzKs4ANs,Revival Flames,3ouj0BXXkGizxWYTtcPK3y,2017-05-08,271935,47,False,0.486,0.74,0,-4.644,1,0.053,0.275,0.0,0.285,0.526,148.297,4,['Victoria Orenze']
131,Nathaniel Bassey,Alagbada Ina (feat. Victoria Orenze),1lJlXs8viNMFRzQbr4TYKo,Alagbada Ina (feat. Victoria Orenze),7A9rwfo0fffXMyQuesY6w2,2017-04-07,269949,18,False,0.492,0.752,0,-4.601,1,0.0656,0.299,0.0,0.326,0.452,148.218,4,['Victoria Orenze']
100,Nathaniel Bassey,Call on Me,3onAjULDuptX9CSYhTqdyg,Hallelujah Again (Revelation 19:3),5HjyQX3GmSbNBL1bG3D6Gf,2021-04-01,420870,25,False,0.595,0.766,0,-6.141,1,0.0364,0.00988,0.0,0.145,0.182,136.06,4,[]
140,Nathaniel Bassey,Call on Me,2nVe8nupIbNG7UL0zsKTI4,Call on Me,6gMnsFpJxVHrBtTd0F27SW,2020-11-13,421598,10,False,0.6,0.771,0,-5.393,1,0.0372,0.0443,0.0,0.145,0.233,136.077,4,[]
22,Nathaniel Bassey,Casting Crowns,6qZapm4LgqfEhVHcAM2OsK,God of Love,2T2gLRnY1ylgkEWaEVuL1H,2013-12-01,436400,45,False,0.511,0.454,5,-8.357,1,0.0302,0.547,0.00266,0.075,0.363,140.075,4,['Love Song']
51,Nathaniel Bassey,Casting Crowns,2AXoz82FM7qwhFwEKztGdm,The Son of God (& Imela),17QFKUXHOJcDzNoDijDcXn,2014-12-01,394031,39,False,0.516,0.493,5,-8.344,1,0.0264,0.0119,0.00104,0.139,0.168,139.953,4,[]
86,Nathaniel Bassey,Incredible Love (feat. Chris Morgan & Simpa Ad...,6FfULQbBi21vftYz7W8ZeH,Revival Flames,3ouj0BXXkGizxWYTtcPK3y,2017-05-08,346880,28,False,0.585,0.741,1,-5.419,1,0.0453,0.525,1e-06,0.313,0.576,155.899,4,"['Chris Morgan', 'Simpa Adaba']"
116,Nathaniel Bassey,Incredible Love (feat. Chris Morgan & Simpa Ad...,2FuWP3EjzImKRiGgF0GQVE,Incredible Love (feat. Chris Morgan & Simpa Ad...,1hOfadhpmXMxC5nURVwr0h,2017-04-08,346880,25,False,0.586,0.738,1,-5.419,1,0.0458,0.533,1e-06,0.313,0.581,155.905,4,"['Chris Morgan', 'Simpa Adaba']"
0,Nathaniel Bassey,Olorun Agbaye - You Are Mighty,0gDyskuhux7JfUcGzn0gY9,Hallelujah Again (Revelation 19:3),5HjyQX3GmSbNBL1bG3D6Gf,2021-04-01,436321,58,False,0.345,0.698,10,-6.186,1,0.0465,0.116,1e-06,0.348,0.188,123.922,3,"['Chandler Moore', 'Oba']"
3,Nathaniel Bassey,Olorun Agbaye - You Are Mighty,05o0RoSQ1pQFvHe54uFUZf,Olorun Agbaye - You Are Mighty,0bquI98KbNHqOrpGFXpNyS,2020-10-30,438744,49,False,0.346,0.696,10,-6.207,1,0.0464,0.116,1e-06,0.335,0.184,123.468,3,"['Chandler Moore', 'Oba']"


Some tracks like **Alagbada Ina (feat. Victoria Orenze)** appear twice because they were released as singles and also a part of an album. Others like **Casting Crowns** appear both in an EP _(in this case with a featured artist)_ and part of an album _(in this case, no featured artist)_.

ALso, it is obobserved that that the duplicated trakcs that are also found in albums have more popularity as the album verions. This further reinforces the fact that the album verions are the better tracks to keep.

In [68]:
df.track_name.duplicated().sum()

10

In [69]:
df.album_id.duplicated().sum()

116

In [70]:
df.album_name.unique().size, df.album_id.unique().size

(34, 34)

As at the time the data was scrapped, there are 34 album IDs appearing 150 times. For Album IDs, it is expected to have duplicate values, as many tracks from one album would all have the same Album ID. After further cleaning, it is expected that redundant, duplicate IDs would've been removed.

In [71]:
df[['track_name', 'track_id', 'album_name', 'album_id']].isna().any()

track_name    False
track_id      False
album_name    False
album_id      False
dtype: bool

---

### Quality Issues

1. Track name column has duplicate enteries.
2. Duration_ms should have it's values in minutes and seconds, as oppossed to milliseconds for more readability.
3. Change key values from numerals to alphabets (A, B, C and so on).
4. Change mode values from 0 and 1 to Minor and Major respectively.
5. Featured artists column has square brackets and quotes anomng the artist names.


### Tidiness Issues
1. Apart from the release date column, there should be column for month and year.
2. Albums, EPs and Singles should be indicated in  column.

---

<a id='Data CLeaning'></a>
#  Data Cleaning

In [72]:
# Duplicate the whoe dataset to have the original intact

df_copy = df.copy()

## Issue 1

Duplicate tracks

### Code

In [73]:
# Remove duplicate rows and keep the first occurrence
df_copy = df_copy.drop_duplicates(keep='first')

# Reset the index after removing duplicates
df_copy = df_copy.reset_index(drop=True)

In [74]:
# Sort the DataFrame in descending order by 'popularity'
df_copy_sorted = df_copy.sort_values(by='popularity', ascending=False)

# Drop duplicates based on 'track_name' and 'featured_artists', keeping the first occurrence (higher popularity)
df_copy_unique_tracks = df_copy_sorted.drop_duplicates(subset=['track_name', 'featured_artists'], keep='first')

# Reset the index after dropping duplicates
df_copy = df_copy_unique_tracks.sort_index()
df_copy.reset_index(drop=True, inplace=True)

### Test

In [75]:
df_copy['track_name'].duplicated().sum()

4

In [76]:
df_copy[df_copy['track_name'].duplicated(keep=False)].sort_values(by=['track_name', 'popularity'], ascending=[True, False])

Unnamed: 0,artist_name,track_name,track_id,album_name,album_id,release_date,duration_ms,popularity,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,featured_artists
21,Nathaniel Bassey,Casting Crowns,6qZapm4LgqfEhVHcAM2OsK,God of Love,2T2gLRnY1ylgkEWaEVuL1H,2013-12-01,436400,45,False,0.511,0.454,5,-8.357,1,0.0302,0.547,0.00266,0.075,0.363,140.075,4,['Love Song']
50,Nathaniel Bassey,Casting Crowns,2AXoz82FM7qwhFwEKztGdm,The Son of God (& Imela),17QFKUXHOJcDzNoDijDcXn,2014-12-01,394031,39,False,0.516,0.493,5,-8.344,1,0.0264,0.0119,0.00104,0.139,0.168,139.953,4,[]
9,Nathaniel Bassey,Wonderful Wonder,5sSoA18jYUkQkvC64o0IDS,God of Love,2T2gLRnY1ylgkEWaEVuL1H,2013-12-01,337160,52,False,0.668,0.575,8,-7.929,1,0.0632,0.491,0.0,0.0919,0.409,172.059,4,['Love Song']
89,Nathaniel Bassey,Wonderful Wonder,04RTQWoTLUNcIzBqlnpXdZ,Wonderful Wonder,32JB9skOJnLfvWdq8J2DVa,2023-02-17,293779,31,False,0.687,0.847,7,-4.756,0,0.0693,0.313,0.0,0.237,0.73,99.966,4,[]
5,Nathaniel Bassey,Yeshua Hamashiach,6xmcjy9fiHc5UZlUZDg5uy,The King Is Coming,1zi8tByoRTWt088vDbI6OG,2019-11-29,464306,52,False,0.551,0.886,9,-4.766,0,0.0578,0.0726,1.7e-05,0.0896,0.346,147.995,4,[]
138,Nathaniel Bassey,Yeshua Hamashiach,26i6YjkTW3OMObpR9TBTR3,Green Worship 4.0,4C18rGHHTOjL4OaUt9rSiF,2021-09-18,328625,10,False,0.401,0.78,9,-3.861,0,0.0419,0.0552,0.0,0.53,0.409,149.997,4,['Various Artists']
58,Nathaniel Bassey,You Are Holy,4RMbIqcWkxgoilODIW2nEy,The King Is Coming,1zi8tByoRTWt088vDbI6OG,2019-11-29,450475,33,False,0.404,0.549,3,-6.232,1,0.0309,0.402,0.0,0.0806,0.25,106.116,4,"['Mahalia Buchanan', 'Joe Mettle']"
128,Nathaniel Bassey,You Are Holy,3SBHK8iUSvnpQaSExvaQzw,Someone's At The Door,7tmA7AWF5AsD8s74QnFmeL,2017-04-07,341342,16,False,0.588,0.407,0,-8.029,1,0.0296,0.508,0.0,0.0782,0.248,117.964,4,[]


A look at this shows that the only duplicate tracks are diffrent version of each other, and are different by either featured artists, and Album, EP or Single where they were released.

## Issue 2

Albums, EPs and Singles should be indicated in column.

### Code

In [77]:
# Define lists of albums for each release type
album_list = ["The Son of God (& Imela)", "This God Is Too Good", "Someone's At The Door", "Revival Flames",
             "Jesus: The Resurrection & the Life", "The King Is Coming", "Hallelujah Again (Revelation 19:3)", "Names of God",
             ""]
ep_list = ["God of Love"]
single_list = ["Imela. \"Thank You\" (feat. Enitan Adaba)", "Glorious God (feat. Chimdi Ochei)", "Onise Iyanu (feat. Glorious Fountain Choir & Micah Stampley)"
              "Olowogbogboro", "You Are Holy (Worship Medly)", "Powerful Worship", "I Love You / Ama Medley",
              "See What the Lord Has Done (Live)", "TOBECHUKWU", "Wonderful Wonder", "Iba"]
live_album_list = ["HALLELUJAH LIVE"]

# Function to determine the release type based on the album name
def get_release_type(album_name):
    if album_name in album_list:
        return "Album"
    elif album_name in ep_list:
        return "EP"
    elif album_name in single_list:
        return "Single"
    elif album_name in live_album_list:
        return "Live Album"
    else:
        return "Unknown"  # You can customize this for other cases if needed

# Apply the function to the 'album_name' column to create the 'release_type' column
df_copy['release_type'] = df_copy['album_name'].apply(get_release_type)

### Test

In [78]:
df_copy[['track_name', 'album_name', 'release_date', 'release_type']].sample(10)

Unnamed: 0,track_name,album_name,release_date,release_type
10,There Is a Place,The King Is Coming,2019-11-29,Album
5,Yeshua Hamashiach,The King Is Coming,2019-11-29,Album
91,Take Your Glory,The Son of God (& Imela),2014-12-01,Album
25,"Imela. ""Thank You"" (feat. Enitan Adaba)","Imela. ""Thank You"" (feat. Enitan Adaba)",2012-04-01,Single
24,See What the Lord Has Done (Live),See What the Lord Has Done (Live),2022-01-07,Single
136,Halleluyah Eh,Green Worship 2.0,2019-11-29,Unknown
63,Awamaridi,Jesus: The Resurrection & the Life,2018-12-02,Album
121,Intro (Doxology: Praise God from Whom All Bles...,Jesus: The Resurrection & the Life,2018-12-02,Album
85,Incredible Love (feat. Chris Morgan & Simpa Ad...,Revival Flames,2017-05-08,Album
16,Hallelujah Challenge Praise Medley,Hallelujah Again (Revelation 19:3),2021-04-01,Album


## Issue 3

'duration_ms' should have it's values in minutes and seconds

### Code

In [79]:
# convert the duration column to a timedelta format
df_copy['duration_ms'] = pd.to_timedelta(df_copy['duration_ms'], unit='ms')

# extract the minutes and seconds components of the duration as strings
df_copy['duration'] = df_copy['duration_ms'].dt.components['minutes'].astype(str).str.zfill(2) + ':' + df_copy['duration_ms'].dt.components['seconds'].astype(str).str.zfill(2)
df_copy.drop(columns=['duration_ms'])

Unnamed: 0,artist_name,track_name,track_id,album_name,album_id,release_date,popularity,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,featured_artists,release_type,duration
0,Nathaniel Bassey,Olorun Agbaye - You Are Mighty,0gDyskuhux7JfUcGzn0gY9,Hallelujah Again (Revelation 19:3),5HjyQX3GmSbNBL1bG3D6Gf,2021-04-01,58,False,0.345,0.698,10,-6.186,1,0.0465,0.11600,0.000001,0.348,0.188,123.922,3,"['Chandler Moore', 'Oba']",Album,07:16
1,Nathaniel Bassey,You Are God (feat. Chigozie Achugo),78D1ZERxVW4XvelhO1jG4K,This God Is Too Good,5RCbwQMe3LwArxqneTQMTg,2016-10-03,55,False,0.487,0.641,0,-4.704,1,0.0351,0.05010,0.000002,0.183,0.434,123.955,4,['Chigozie Achugo'],Album,04:31
2,Nathaniel Bassey,Adonai,7axctArGc7dtaEJSl3IZge,Names of God,3WCZbOcvzRlzyEnRVPtKQF,2022-02-05,56,False,0.269,0.711,0,-5.951,0,0.0379,0.00366,0.002150,0.442,0.157,129.583,3,[],Album,08:31
3,Nathaniel Bassey,Imela (feat. Enitan Adaba),50OHI0yJ1ti2jTWySMz6jY,The Son of God (& Imela),17QFKUXHOJcDzNoDijDcXn,2014-12-01,53,False,0.366,0.522,6,-8.552,1,0.0319,0.14600,0.000510,0.120,0.325,150.101,4,['Enitan Adaba'],Album,05:54
4,Nathaniel Bassey,See What the Lord Has Done,3BPPakEzZWw0pQQzuQfvBt,Names of God,3WCZbOcvzRlzyEnRVPtKQF,2022-02-05,54,False,0.265,0.539,8,-7.433,1,0.0315,0.02500,0.000000,0.371,0.321,139.699,4,[],Album,07:34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,Nathaniel Bassey,Groovin’,4U952wffpOwPoAUYm9eayL,New Jazz Stew Project: Revelation,4FCTp4KLgCoTpXwS4OoUbg,2004-10-06,0,False,0.587,0.811,9,-10.367,0,0.0518,0.20100,0.891000,0.188,0.545,112.990,4,[],Unknown,06:30
140,Nathaniel Bassey,Halleluya Eh - Live,3ifmkDsCrTTk1He0JhkJgl,Halleluya Eh (Live),4AXWuUOlp8KeH89wkWn9bC,2022-10-09,0,False,0.463,0.704,5,-6.926,1,0.1270,0.71200,0.000000,0.390,0.672,140.675,4,[],Unknown,09:36
141,Nathaniel Bassey,Powerful Worship,2fre7Ki1MBebwE0kWNSydk,Powerful Worship,2Bjc8sWDPry9VhBHXp7rQY,2020,3,False,0.337,0.594,10,-14.529,1,0.1200,0.03040,0.000000,0.489,0.354,139.210,4,[],Single,57:09
142,Nathaniel Bassey,Good Times,6kPjIrmVWeM75c4SA8e0Te,New Jazz Stew Project: Revelation,4FCTp4KLgCoTpXwS4OoUbg,2004-10-06,0,False,0.599,0.687,10,-8.147,0,0.0328,0.06130,0.618000,0.126,0.608,141.930,4,[],Unknown,05:51


### Test

In [80]:
df_copy[['duration_ms', 'duration']].head()

Unnamed: 0,duration_ms,duration
0,0 days 00:07:16.321000,07:16
1,0 days 00:04:31.438000,04:31
2,0 days 00:08:31.207000,08:31
3,0 days 00:05:54.573000,05:54
4,0 days 00:07:34.829000,07:34


## Issue 4

Correct Key values

### Code

In [81]:
# create a dictionary to map the key numbers to their corresponding letters
key_dict = {
    0: "C",
    1: "C#/Db",
    2: "D",
    3: "D#/Eb",
    4: "E",
    5: "F",
    6: "F#/Gb",
    7: "G",
    8: "G#/Ab",
    9: "A",
    10: "A#/Bb",
    11: "B",
    -1: "NaN"
}

df_copy['key'] = df_copy['key'].apply(lambda x: key_dict[x])

### Test

In [82]:
df_copy.key.head()

0    A#/Bb
1        C
2        C
3    F#/Gb
4    G#/Ab
Name: key, dtype: object

## Issue 5

Correct Mode values

### Code

In [83]:
# create a dictionary to map the key numbers to their corresponding letters
mode_dict = {
    0: "Minor",
    1: "Major"
}

df_copy['mode'] = df['mode'].apply(lambda x: mode_dict[x])

### Test

In [84]:
df_copy['mode'].head()

0    Major
1    Major
2    Minor
3    Major
4    Major
Name: mode, dtype: object

## Issue 6

Release date should have day month and year columns

In [85]:
# convert release_date column to datetime format
df_copy['release_date'] = pd.to_datetime(df_copy['release_date'])

# extract month and year from release_date column
df_copy['release_day'] = df_copy['release_date'].dt.strftime('%A')
df_copy['release_month'] = df_copy['release_date'].dt.strftime('%B')
df_copy['release_year'] = df_copy['release_date'].dt.year

# Order the Days and Months.
days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',
          'September', 'October', 'November', 'December']

df_copy['release_month'] = df_copy['release_month'].astype(CategoricalDtype(categories = months, ordered = True))
df_copy['release_day'] = df_copy['release_day'].astype(CategoricalDtype(categories = days, ordered = True))

### Test

In [86]:
df_copy.head()

Unnamed: 0,artist_name,track_name,track_id,album_name,album_id,release_date,duration_ms,popularity,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,featured_artists,release_type,duration,release_day,release_month,release_year
0,Nathaniel Bassey,Olorun Agbaye - You Are Mighty,0gDyskuhux7JfUcGzn0gY9,Hallelujah Again (Revelation 19:3),5HjyQX3GmSbNBL1bG3D6Gf,2021-04-01,0 days 00:07:16.321000,58,False,0.345,0.698,A#/Bb,-6.186,Major,0.0465,0.116,1e-06,0.348,0.188,123.922,3,"['Chandler Moore', 'Oba']",Album,07:16,Thursday,April,2021
1,Nathaniel Bassey,You Are God (feat. Chigozie Achugo),78D1ZERxVW4XvelhO1jG4K,This God Is Too Good,5RCbwQMe3LwArxqneTQMTg,2016-10-03,0 days 00:04:31.438000,55,False,0.487,0.641,C,-4.704,Major,0.0351,0.0501,2e-06,0.183,0.434,123.955,4,['Chigozie Achugo'],Album,04:31,Monday,October,2016
2,Nathaniel Bassey,Adonai,7axctArGc7dtaEJSl3IZge,Names of God,3WCZbOcvzRlzyEnRVPtKQF,2022-02-05,0 days 00:08:31.207000,56,False,0.269,0.711,C,-5.951,Minor,0.0379,0.00366,0.00215,0.442,0.157,129.583,3,[],Album,08:31,Saturday,February,2022
3,Nathaniel Bassey,Imela (feat. Enitan Adaba),50OHI0yJ1ti2jTWySMz6jY,The Son of God (& Imela),17QFKUXHOJcDzNoDijDcXn,2014-12-01,0 days 00:05:54.573000,53,False,0.366,0.522,F#/Gb,-8.552,Major,0.0319,0.146,0.00051,0.12,0.325,150.101,4,['Enitan Adaba'],Album,05:54,Monday,December,2014
4,Nathaniel Bassey,See What the Lord Has Done,3BPPakEzZWw0pQQzuQfvBt,Names of God,3WCZbOcvzRlzyEnRVPtKQF,2022-02-05,0 days 00:07:34.829000,54,False,0.265,0.539,G#/Ab,-7.433,Major,0.0315,0.025,0.0,0.371,0.321,139.699,4,[],Album,07:34,Saturday,February,2022


## Issue 7

Featured artists column has square brackets and quotes anomng the artist names.

### Code

In [87]:
# Function to remove square brackets and quotes from featured_artists
def clean_featured_artists(featured_artists):
    if len(featured_artists) > 0:
        # Remove square brackets and split by comma
        artist_list = featured_artists.strip('[]').split(',')
        # Remove leading and trailing spaces and quotes from each artist name
        cleaned_artists = [artist.strip().strip("'") for artist in artist_list]
        return ', '.join(cleaned_artists)
    else:
        return ''

# Apply the function to the 'featured_artists' column to clean the values
df_copy['featured_artists'] = df_copy['featured_artists'].apply(clean_featured_artists)

### Test

In [88]:
df_copy.head(15)

Unnamed: 0,artist_name,track_name,track_id,album_name,album_id,release_date,duration_ms,popularity,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,featured_artists,release_type,duration,release_day,release_month,release_year
0,Nathaniel Bassey,Olorun Agbaye - You Are Mighty,0gDyskuhux7JfUcGzn0gY9,Hallelujah Again (Revelation 19:3),5HjyQX3GmSbNBL1bG3D6Gf,2021-04-01,0 days 00:07:16.321000,58,False,0.345,0.698,A#/Bb,-6.186,Major,0.0465,0.116,1e-06,0.348,0.188,123.922,3,"Chandler Moore, Oba",Album,07:16,Thursday,April,2021
1,Nathaniel Bassey,You Are God (feat. Chigozie Achugo),78D1ZERxVW4XvelhO1jG4K,This God Is Too Good,5RCbwQMe3LwArxqneTQMTg,2016-10-03,0 days 00:04:31.438000,55,False,0.487,0.641,C,-4.704,Major,0.0351,0.0501,2e-06,0.183,0.434,123.955,4,Chigozie Achugo,Album,04:31,Monday,October,2016
2,Nathaniel Bassey,Adonai,7axctArGc7dtaEJSl3IZge,Names of God,3WCZbOcvzRlzyEnRVPtKQF,2022-02-05,0 days 00:08:31.207000,56,False,0.269,0.711,C,-5.951,Minor,0.0379,0.00366,0.00215,0.442,0.157,129.583,3,,Album,08:31,Saturday,February,2022
3,Nathaniel Bassey,Imela (feat. Enitan Adaba),50OHI0yJ1ti2jTWySMz6jY,The Son of God (& Imela),17QFKUXHOJcDzNoDijDcXn,2014-12-01,0 days 00:05:54.573000,53,False,0.366,0.522,F#/Gb,-8.552,Major,0.0319,0.146,0.00051,0.12,0.325,150.101,4,Enitan Adaba,Album,05:54,Monday,December,2014
4,Nathaniel Bassey,See What the Lord Has Done,3BPPakEzZWw0pQQzuQfvBt,Names of God,3WCZbOcvzRlzyEnRVPtKQF,2022-02-05,0 days 00:07:34.829000,54,False,0.265,0.539,G#/Ab,-7.433,Major,0.0315,0.025,0.0,0.371,0.321,139.699,4,,Album,07:34,Saturday,February,2022
5,Nathaniel Bassey,Yeshua Hamashiach,6xmcjy9fiHc5UZlUZDg5uy,The King Is Coming,1zi8tByoRTWt088vDbI6OG,2019-11-29,0 days 00:07:44.306000,52,False,0.551,0.886,A,-4.766,Major,0.0578,0.0726,1.7e-05,0.0896,0.346,147.995,4,,Album,07:44,Friday,November,2019
6,Nathaniel Bassey,Onise Iyanu (feat. Micah Stampley),0S7l0Nz5MztXjkNTppknRp,This God Is Too Good,5RCbwQMe3LwArxqneTQMTg,2016-10-03,0 days 00:06:06.498000,52,False,0.284,0.796,G,-4.604,Minor,0.0504,0.203,0.0,0.391,0.309,144.214,4,Micah Stampley,Album,06:06,Monday,October,2016
7,Nathaniel Bassey,Tobechukwu - Live,79zFqU5mmz9wnuKfjaiPJp,HALLELUJAH LIVE,5xe3knii9RbZzikwkiIqBY,2023-03-24,0 days 00:08:08.339000,53,False,0.425,0.715,A#/Bb,-6.247,Major,0.0733,0.381,0.0,0.0956,0.686,75.374,4,MERCY CHINWO BLESSED,Live Album,08:08,Friday,March,2023
8,Nathaniel Bassey,Ebenezer - Live,1NIsNb7J8yfy9TYE1ycKMp,HALLELUJAH LIVE,5xe3knii9RbZzikwkiIqBY,2023-03-24,0 days 00:08:31.197000,53,False,0.544,0.858,G,-4.976,Major,0.0454,0.156,0.0,0.47,0.48,143.979,3,VICTORIA ORENZE,Live Album,08:31,Friday,March,2023
9,Nathaniel Bassey,Wonderful Wonder,5sSoA18jYUkQkvC64o0IDS,God of Love,2T2gLRnY1ylgkEWaEVuL1H,2013-12-01,0 days 00:05:37.160000,52,False,0.668,0.575,G#/Ab,-7.929,Major,0.0632,0.491,0.0,0.0919,0.409,172.059,4,Love Song,EP,05:37,Sunday,December,2013


In [89]:
df_copy.columns

Index(['artist_name', 'track_name', 'track_id', 'album_name', 'album_id',
       'release_date', 'duration_ms', 'popularity', 'explicit', 'danceability',
       'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature',
       'featured_artists', 'release_type', 'duration', 'release_day',
       'release_month', 'release_year'],
      dtype='object')

---
<a id='Storing Data'></a>
# Storing Data

In [91]:
# Store dataset
df_copy.to_csv('Nathaniel Bassey Spotify Tracks Cleaned.csv', index=False)