In [2]:
import pandas as pd
import re
from datetime import datetime

In [3]:
# Show all rows and cols
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

## David Crosby

In [None]:
def clean_dataset(path): # Function to clean dataset - multiple techniques are applied, such as dropping duplicates and unwanted columns, renaming columns, changing the order of columns.
  df_spotify = pd.read_csv(path)

  # Drop duplicates based on track_id
  df_spotify = df_spotify.drop_duplicates(subset='track_id')
  df_spotify.reset_index(drop=True, inplace=True)

  # Drop columns
  columns_to_drop = ['Unnamed', 'artist_id', 'duration_ms_y', 'track_id', 'album_id']
  df_spotify = df_spotify.drop(columns=[col for col in df_spotify.columns if any(x in col for x in columns_to_drop)])

  # Rename columns
  df_spotify.rename(columns={'track_name' : 'track', 'album_name' : 'album', 'album_release_date' : 'release_date', 'duration_ms_x' : 'duration', 'artist_name' : 'artist'}, inplace=True)

  # Change order
  new_order = [
      'track',
      'artist',
      'album',
      'album_type',
      'duration',
      'release_date',
      'popularity',
      'danceability',
      'energy',
      'key',
      'loudness',
      'mode',
      'speechiness',
      'acousticness',
      'instrumentalness',
      'liveness',
      'valence',
      'tempo',
      'time_signature'
  ]

  # Reorder the DataFrame
  df_spotify = df_spotify[new_order]

  return df_spotify

david_crosby_spotify = clean_dataset('/content/drive/MyDrive/Eksplorativna analiza podataka sa Spotify-a/Spotify Web Scraping/Spotify Datasets/raw/spotify-david-crosby.csv')

# Remove album 'Another Stoney Evening', because it belongs to 'Crosby & Nash' discography
another_stoney_evening_subset = david_crosby_spotify[david_crosby_spotify['album'] == 'Another Stoney Evening']
another_stoney_evening_subset.to_csv('spotify-another-stoney-evening.csv', index=False)

david_crosby_spotify = david_crosby_spotify[david_crosby_spotify['album'] != 'Another Stoney Evening']

# Sort DataFrame by 'track'
david_crosby_spotify = david_crosby_spotify.sort_values(by='track', ascending=True)

# Formatting track names to extract duplicates easier
david_crosby_spotify['track'] = david_crosby_spotify['track'].str.replace(r'\[.*?\]', '', regex=True) # remove all rows that contain square brackets in 'tracks'
david_crosby_spotify['track'] = david_crosby_spotify['track'].str.replace(r'\((.*?(Live|Demo|feat|Alternate).*?)\)', '', regex=True)
david_crosby_spotify['track'] = david_crosby_spotify['track'].str.split('-').str[0].str.strip()
david_crosby_spotify['track'] = david_crosby_spotify['track'].str.strip()

# Remove excess albums
tracks_to_keep = ['Regina', 'Guinnevere', 'Carry Me']
david_crosby_spotify = david_crosby_spotify[
    (david_crosby_spotify['album'] != 'Live at the Capitol Theatre') |
    (david_crosby_spotify['track'].isin(tracks_to_keep))
]

tracks_to_keep_anniversary_edititon = ['Bach Mode (Pre', 'Coast Road', 'Dancer', 'Fugue', 'Games', 'Kids And Dogs', 'Riff 1', 'The Wall Song', 'Where Will I Be?']
david_crosby_spotify = david_crosby_spotify[
    (david_crosby_spotify['album'] != 'If I Could Only Remember My Name (50th Anniversary Edition; 2021 Remaster)') |
    (david_crosby_spotify['track'].isin(tracks_to_keep_anniversary_edititon))
]

# Remove duplicated tracks
david_crosby_spotify = david_crosby_spotify.drop_duplicates(subset='track', keep='first')
david_crosby_spotify.reset_index(drop=True, inplace=True)

david_crosby_spotify.drop(columns=['album_type'], inplace=True)

# Show duplicated tracks
duplicated_tracks = david_crosby_spotify[david_crosby_spotify.duplicated(subset='track', keep=False)]
print('duplicated tracks ', duplicated_tracks)

david_crosby_spotify.to_csv('spotify-david-crosby-v2.csv', index=False)
david_crosby_spotify

## Stephen Stills

In [None]:
stephen_stills_spotify = clean_dataset('/content/drive/MyDrive/Eksplorativna analiza podataka sa Spotify-a/Spotify Web Scraping/Spotify Datasets/raw/spotify-stephen-stills.csv')

# Sort DataFrame by 'track'
stephen_stills_spotify = stephen_stills_spotify.sort_values(by='track', ascending=True)

stephen_stills_spotify['track'] = stephen_stills_spotify['track'].str.split(' - ').str[0].str.strip()

# Remove excess albums (live, compilations, by other bands)
songs_to_keep = [
    'Crossroads / You Can\'t Catch Me',
    'Everybody\'s Talkin at Me',
    'Four Days Gone',
    'Jet Set / Rocky Mountain Way / Jet Set (Sigh)',
    'Special Care'
]
stephen_stills_spotify = stephen_stills_spotify[
    (stephen_stills_spotify['album'] != 'Live') |
    (stephen_stills_spotify['track'].isin(songs_to_keep))
]

stephen_stills_spotify = stephen_stills_spotify[~stephen_stills_spotify['album'].str.contains('manassas', case=False, na=False)]
stephen_stills_spotify = stephen_stills_spotify[~stephen_stills_spotify['album'].str.contains('down the road', case=False, na=False)]
stephen_stills_spotify = stephen_stills_spotify[~stephen_stills_spotify['album'].str.contains('winterland', case=False, na=False)]

stephen_stills_spotify.reset_index(drop=True, inplace=True)

stephen_stills_spotify.drop(columns=['album_type'], inplace=True)

# Show duplicated tracks
duplicated_tracks = stephen_stills_spotify[stephen_stills_spotify.duplicated(subset='track', keep=False)]
print('duplicated tracks ', duplicated_tracks)

stephen_stills_spotify.to_csv('spotify-stephen-stills-v2.csv', index=False)
stephen_stills_spotify

## Graham Nash

In [None]:
graham_nash_spotify = clean_dataset('/content/drive/MyDrive/Eksplorativna analiza podataka sa Spotify-a/Spotify Web Scraping/Spotify Datasets/raw/spotify-graham-nash.csv')

# Sort DataFrame by 'track'
graham_nash_spotify = graham_nash_spotify.sort_values(by='track', ascending=True)

# # Remove excess albums (live, compilations, by other bands)
graham_nash_spotify = graham_nash_spotify[~graham_nash_spotify['album'].str.contains('Live: Songs For Beginners / Wild Tales', case=False, na=False)]
graham_nash_spotify = graham_nash_spotify[~graham_nash_spotify['album'].str.contains('mix', case=False, na=False)]
graham_nash_spotify = graham_nash_spotify[~graham_nash_spotify['artist'].isin(['The Hollies', 'Crosby, Stills & Nash', 'Crosby, Stills, Nash & Young'])] # filter album 'Reflections', which is a box set containing multiple highlighted songs

tracks_to_keep = ['Love Is the Reason - 2008 Stereo Mix', 'Magical Child - 2008 Stereo Mix']
graham_nash_spotify = graham_nash_spotify[~(graham_nash_spotify['track'].str.contains('2008 Stereo Mix') & ~graham_nash_spotify['track'].isin(tracks_to_keep))]

graham_nash_spotify.reset_index(drop=True, inplace=True)

graham_nash_spotify.drop(columns=['album_type'], inplace=True)

# Show duplicated tracks
duplicated_tracks = graham_nash_spotify[graham_nash_spotify.duplicated(subset='track', keep=False)]
print('duplicated tracks ', duplicated_tracks)

graham_nash_spotify.to_csv('spotify-graham-nash-v2.csv', index=False)
graham_nash_spotify

## Neil Young - albums

In [None]:
neil_young_spotify = clean_dataset('/content/drive/MyDrive/Eksplorativna analiza podataka sa Spotify-a/Spotify Web Scraping/Spotify Datasets/raw/spotify-neil-young.csv')

# Sort DataFrame by 'track'
neil_young_spotify = neil_young_spotify.sort_values(by='track', ascending=True)

# Remove excess albums (live, compilations, by other bands)
neil_young_spotify['album'] = neil_young_spotify['album'].str.strip()

tracks_to_keep = [ "Old Laughing Lady",
    "Songs for Judy - Intro",
    "Sugar Mountain"]
neil_young_spotify = neil_young_spotify[
    (neil_young_spotify['album'] != "Songs for Judy") |
    (neil_young_spotify['track'].isin(tracks_to_keep))
]

neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains('decade', case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains('zuma', case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("early daze", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("fu##in' up", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("world record", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Noise and Flowers (Live)", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Way Down In The Rust Bucket (Live)", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("barn", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("sleeps with angels", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("ragged glory", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("greendale", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("americana", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("toast", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("broken arrow", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("are you passionate", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("psychedelic pill", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("colorado", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("American Stars 'N Bars", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("rust never sleeps", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("live rust", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Everybody Knows This Is Nowhere", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Re-ac-tor (2003 Remaster)", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Everybody's Rockin", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains('Life (Live)', case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("This Note's for You", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Arc (Live)", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Weld (Live)", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("year of the horse", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("the monsanto years", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("earth", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("earth", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("the visitor", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Tuscaloosa (Live)", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['track'].str.contains("Tuscaloosa", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['track'].str.contains("live from the roxy", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Harvest (2009 Remaster)", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Young Shakespeare (Live)", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Live at the Fillmore East 1970", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("carnegie hall", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("Live at Massey Hall 1971", case=False, na=False, regex=False)]
neil_young_spotify = neil_young_spotify[~neil_young_spotify['album'].str.contains("After the Gold Rush (2009 Remaster)	", case=False, na=False, regex=False)]

neil_young_spotify.reset_index(drop=True, inplace=True)

neil_young_spotify.drop(columns=['album_type'], inplace=True)
neil_young_spotify.to_csv('spotify-neil-young-v2.csv', index=False)
neil_young_spotify

## The Stills Young Band

In [109]:
stills_young_band_spotify = pd.read_csv('/content/drive/MyDrive/Eksplorativna analiza podataka sa Spotify-a/Spotify Web Scraping/Spotify Datasets/raw/spotify-stills-young-band.csv')

# # Drop columns
columns_to_drop = ['Unnamed']
stills_young_band_spotify = stills_young_band_spotify.drop(columns=[col for col in stills_young_band_spotify.columns if any(x in col for x in columns_to_drop)])

# Rename columns
stills_young_band_spotify.rename(columns={'track_id' : 'track', 'album_name' : 'album', 'album_release_date' : 'release_date', 'duration_ms' : 'duration', 'artist_name' : 'artist'}, inplace=True)

# Change order
new_order = [
      'track',
      'artist',
      'album',
      'album_type',
      'duration',
      'release_date',
      'popularity',
      'danceability',
      'energy',
      'key',
      'loudness',
      'mode',
      'speechiness',
      'acousticness',
      'instrumentalness',
      'liveness',
      'valence',
      'tempo',
      'time_signature'
  ]

# Reorder the DataFrame
stills_young_band_spotify = stills_young_band_spotify[new_order]
stills_young_band_spotify.drop(columns=['album_type'], inplace=True)

stills_young_band_spotify.to_csv('spotify-stills-young-band-v2.csv', index=False)
stills_young_band_spotify


Unnamed: 0,track,artist,album,duration,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Long May You Run,The Stills-Young Band,Long May You Run,236413,1976-20-09,37,0.565,0.482,2,-9.535,1,0.026,0.117,2.2e-05,0.112,0.697,146.168,4
1,Fontainebleau,The Stills-Young Band,Long May You Run,239493,1976-20-09,20,0.485,0.549,0,-10.028,1,0.0287,0.114,0.0898,0.142,0.145,120.484,4
2,Let It Shine,The Stills-Young Band,Long May You Run,286213,1976-20-09,12,0.413,0.596,4,-9.331,1,0.026,0.125,0.0123,0.382,0.645,156.87,4
3,Make Love to You,The Stills-Young Band,Long May You Run,416599,1976-20-09,4,0.216,0.744,8,-11.146,1,0.11,0.293,2.2e-05,0.556,0.46,93.381,4
4,Midnight on the Bay,The Stills-Young Band,Long May You Run,242573,1976-20-09,20,0.557,0.337,2,-13.268,1,0.0354,0.537,0.0943,0.1,0.454,150.162,4
5,Ocean Girl,The Stills-Young Band,Long May You Run,203720,1976-20-09,9,0.661,0.484,7,-10.597,1,0.0252,0.659,0.0127,0.0841,0.766,87.101,4


## Crosby & Nash

In [None]:
cn_spotify = clean_dataset('/content/drive/MyDrive/Eksplorativna analiza podataka sa Spotify-a/Spotify Web Scraping/Spotify Datasets/raw/spotify-crosby-nash.csv')

# Sort DataFrame by 'track'
cn_spotify = cn_spotify.sort_values(by='track', ascending=False)

# Remove excess albums (live)
cn_spotify = cn_spotify[~cn_spotify['album'].str.contains('live', case=False, na=False)]

cn_spotify.drop(columns=['album_type'], inplace=True)

# Merge album 'Another Stoney Evening' to spotify-cn
another_stoney_evening_subset = pd.read_csv('/content/spotify-another-stoney-evening.csv')

merged_df = pd.concat([cn_spotify, another_stoney_evening_subset])

# Show duplicated tracks
duplicated_tracks = cn_spotify[cn_spotify.duplicated(subset='track', keep=False)]
print('duplicated tracks ', duplicated_tracks)

merged_df.to_csv('spotify-cn-v2.csv', index=False)
merged_df.reset_index(drop=True, inplace=True)
merged_df


## Crosby, Stills & Nash

In [None]:
csn_spotify = clean_dataset('/content/drive/MyDrive/Eksplorativna analiza podataka sa Spotify-a/Spotify Web Scraping/Spotify Datasets/raw/spotify-csn.csv')

# Sort DataFrame by 'track'
csn_spotify = csn_spotify.sort_values(by='track', ascending=False)

# csn_spotify = csn_spotify[csn_spotify['album_type'] != 'compilation'] # Remove 'compilation' from 'album_type'
# csn_spotify = csn_spotify[csn_spotify['album'] != 'Decade'] # Remove 'Decade' compilation
# csn_spotify = csn_spotify[~csn_spotify['album'].str.contains('archives', case=False, na=False)] # Remove archives
# csn_spotify = csn_spotify[~csn_spotify['track'].str.contains('archives', case=False, na=False)]
# csn_spotify = csn_spotify[~csn_spotify['album'].str.contains('deluxe', case=False, na=False)] # Remove deluxe editions
# csn_spotify = csn_spotify[~csn_spotify['track'].str.contains('deluxe', case=False, na=False)]
# csn_spotify = csn_spotify[~csn_spotify['album'].str.contains('remaster', case=False, na=False)] # Remove remastered version
# csn_spotify = csn_spotify[~csn_spotify['track'].str.contains('remaster', case=False, na=False)]
# csn_spotify = csn_spotify[~csn_spotify['album'].str.contains('early', case=False, na=False)] # Remove early version
# csn_spotify = csn_spotify[~csn_spotify['track'].str.contains('early', case=False, na=False)]
# csn_spotify = csn_spotify[~csn_spotify['album'].str.contains('demo', case=False, na=False)] # Remove demo version
# csn_spotify = csn_spotify[~csn_spotify['track'].str.contains('demo', case=False, na=False)]
# csn_spotify = csn_spotify[~csn_spotify['album'].str.contains('mix', case=False, na=False)] # Remove remix version
# csn_spotify = csn_spotify[~csn_spotify['track'].str.contains('mix', case=False, na=False)]
# csn_spotify = csn_spotify[~csn_spotify['album'].str.contains('live', case=False, na=False)] # Remove live albums
# csn_spotify = csn_spotify[~csn_spotify['album'].str.contains("this one's for bill", case=False, na=False)] # Remove live albums

csn_spotify.drop(columns=['album_type'], inplace=True)
csn_spotify.reset_index(drop=True, inplace=True)

# Show duplicated tracks
duplicated_tracks = csn_spotify[csn_spotify.duplicated(subset='track', keep=False)]
print('duplicated tracks ', duplicated_tracks)

csn_spotify.to_csv('spotify-csn-v2.csv', index=False)
csn_spotify

## Crosby, Stills, Nash & Young

In [126]:
csny_spotify = clean_dataset('/content/drive/MyDrive/Eksplorativna analiza podataka sa Spotify-a/Spotify Web Scraping/Spotify Datasets/raw/spotify-csny.csv')

# Sort DataFrame by 'track'
csny_spotify = csny_spotify.sort_values(by='track', ascending=False)

# csny_spotify = csny_spotify[~csny_spotify['album'].str.contains('archives', case=False, na=False)] # Remove archives
# csny_spotify = csny_spotify[~csny_spotify['track'].str.contains('archives', case=False, na=False)]
# csny_spotify = csny_spotify[~csny_spotify['album'].str.contains('early', case=False, na=False)] # Remove early version
# csny_spotify = csny_spotify[~csny_spotify['track'].str.contains('early', case=False, na=False)]
csny_spotify = csny_spotify[~csny_spotify['album'].str.contains('Déjà vu (2021 Remaster)', case=False, na=False, regex=False)] # Remove remastered version
# csny_spotify = csny_spotify[~csny_spotify['track'].str.contains('remaster', case=False, na=False)]
# csny_spotify = csny_spotify[~csny_spotify['album'].str.contains('mix', case=False, na=False)] # Remove remix version
# csny_spotify = csny_spotify[~csny_spotify['track'].str.contains('mix', case=False, na=False)]
# csny_spotify = csny_spotify[~csny_spotify['album'].str.contains('deluxe', case=False, na=False)] # Remove deluxe anniversary version
# csny_spotify = csny_spotify[~csny_spotify['track'].str.contains('deluxe', case=False, na=False)]
# csny_spotify = csny_spotify[~csny_spotify['album'].str.contains('demo', case=False, na=False)] # Remove demo version
# csny_spotify = csny_spotify[~csny_spotify['track'].str.contains('demo', case=False, na=False)]
# csny_spotify = csny_spotify[~csny_spotify['track'].str.contains('live', case=False, na=False)]
# csny_spotify = csny_spotify[~csny_spotify['album'].str.contains('live', case=False, na=False)]

csny_spotify.drop(columns=['album_type'], inplace=True)
csny_spotify.reset_index(drop=True, inplace=True)

# Show duplicated tracks
duplicated_tracks = csn_spotify[csny_spotify.duplicated(subset='track', keep=False)]
print('duplicated tracks ', duplicated_tracks)

csny_spotify.to_csv('spotify-csny-v2.csv', index=False)
csny_spotify

duplicated tracks                                      track                 artist  \
12  Too Much Love to Hide - 2005 Remaster  Crosby, Stills & Nash   
13                Tomorrow Is Another Day  Crosby, Stills & Nash   
80                              Cold Rain  Crosby, Stills & Nash   
81                              Cathedral  Crosby, Stills & Nash   

                              album  duration release_date  popularity  \
12  Daylight Again (Deluxe Edition)    237013   1982-06-21          31   
13  Daylight Again (Deluxe Edition)    245400   1982-06-21          14   
80                              CSN    154600   1977-06-17          25   
81                         CSN 2012    478280   2012-07-10          15   

    danceability  energy  key  loudness  mode  speechiness  acousticness  \
12         0.688  0.7400    2   -10.276     1       0.0287        0.0281   
13         0.646  0.4910    7   -11.515     1       0.0380        0.1460   
80         0.350  0.0444    9   -21.421  

  duplicated_tracks = csn_spotify[csny_spotify.duplicated(subset='track', keep=False)]


Unnamed: 0,track,artist,album,duration,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Woodstock - 2021 Remaster,"Crosby, Stills, Nash & Young",Déjà Vu (50th Anniversary Deluxe Edition),234462,1970-03-11,25,0.46,0.533,0,-12.507,1,0.0531,0.0147,0.0,0.147,0.54,118.568,4
1,Woodstock (Alternate Vocals),"Crosby, Stills, Nash & Young",Déjà Vu (50th Anniversary Deluxe Edition),258952,1970-03-11,13,0.449,0.575,0,-12.304,1,0.037,0.0292,0.0,0.319,0.646,122.501,4
2,Woodstock,"Crosby, Stills, Nash & Young",Deja Vu,233773,1970-03-11,49,0.458,0.539,4,-12.403,0,0.057,0.122,0.00946,0.17,0.56,118.95,4
3,Wooden Ships - Live,"Crosby, Stills, Nash & Young",CSNY / Deja Vu (Live),498293,2008-07-22,22,0.295,0.604,0,-9.848,1,0.0545,0.601,0.0129,0.99,0.4,154.428,4
4,What Are Their Names? - Live,"Crosby, Stills, Nash & Young",CSNY / Deja Vu (Live),148493,2008-07-22,26,0.636,0.568,9,-13.587,0,0.233,0.828,0.0,0.944,0.25,114.69,4
5,Triad - Live,"Crosby, Stills, Nash & Young",4 Way Street,414960,1971-04-07,25,0.433,0.133,2,-21.08,1,0.0744,0.838,0.000244,0.747,0.217,133.657,4
6,Triad (Demo) - 2021 Remaster,David Crosby,Déjà Vu (50th Anniversary Deluxe Edition),330946,1970-03-11,16,0.514,0.114,7,-14.821,1,0.0349,0.848,2e-05,0.127,0.178,129.781,4
7,This Old House,"Crosby, Stills, Nash & Young",American Dream,289666,1988-11-11,28,0.662,0.0974,5,-20.016,1,0.0321,0.358,0.0,0.111,0.433,117.483,4
8,The Restless Consumer - Live,"Crosby, Stills, Nash & Young",CSNY / Deja Vu (Live),383985,2008-07-22,16,0.431,0.692,9,-7.949,1,0.0352,0.117,0.0,0.698,0.169,125.759,4
9,The Lee Shore - Live,"Crosby, Stills, Nash & Young",4 Way Street,268706,1971-04-07,28,0.349,0.206,7,-17.54,1,0.043,0.824,4e-06,0.694,0.143,83.132,4


## Concat all DataFrames to 'spotify-v2.csv'

In [128]:
df_spotify_v2 = pd.concat([david_crosby_spotify, stephen_stills_spotify, graham_nash_spotify, neil_young_spotify, stills_young_band_spotify, csn_spotify, csny_spotify]).reset_index(drop=True)
df_spotify_v2.to_csv('spotify-v2.csv', index=False)
df_spotify_v2

Unnamed: 0,track,artist,album,duration,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,1967,David Crosby,Here If You Listen,199626,2018-10-26,9,0.487,0.307,2,-11.278,0,0.0338,0.878,0.00722,0.182,0.375,81.392,4
1,1974,David Crosby,Here If You Listen,180106,2018-10-26,13,0.418,0.268,8,-11.004,0,0.04,0.894,2e-06,0.127,0.357,179.884,4
2,Almost Cut My Hair,David Crosby,It's All Coming Back to Me Now (Live),369733,1995-01-24,14,0.321,0.605,7,-9.636,1,0.0566,0.597,2.2e-05,0.677,0.487,124.999,4
3,Amelia,David Crosby,Sky Trails,337346,2017-09-29,11,0.659,0.148,9,-13.794,1,0.0405,0.877,0.0093,0.117,0.165,122.088,4
4,Bach Mode (Pre,David Crosby,If I Could Only Remember My Name (50th Anniver...,119626,1971,6,0.23,0.00306,5,-17.789,1,0.0462,0.942,2e-06,0.11,0.365,70.981,1
5,Balanced on a Pin,David Crosby,Here If You Listen,270120,2018-10-26,11,0.665,0.208,2,-15.086,0,0.0377,0.956,0.233,0.119,0.167,117.064,4
6,Before Tomorrow Falls On Love,David Crosby,Sky Trails,230760,2017-09-29,11,0.528,0.104,0,-16.774,1,0.0341,0.938,0.0205,0.112,0.104,104.301,4
7,Boxes,David Crosby,For Free,254853,2021-07-23,15,0.669,0.587,0,-9.388,1,0.0406,0.277,0.0256,0.114,0.55,159.995,4
8,Buddha on a Hill,David Crosby,Here If You Listen,247253,2018-10-26,12,0.586,0.314,7,-11.417,1,0.0342,0.68,0.176,0.115,0.139,118.081,4
9,By the Light of Common Day,David Crosby,Lighthouse,375293,2016-10-21,12,0.686,0.152,0,-14.101,1,0.0403,0.918,0.00045,0.171,0.245,121.811,4
