In [14]:
library(ggplot2)
library(knitr)
library(spotifyr)
library(dplyr)
library(lubridate)
library(plotly)

Sys.setenv(SPOTIFY_CLIENT_ID = "48875e31f589401f83c6bd43005d94f7")
Sys.setenv(SPOTIFY_CLIENT_SECRET = "d215e4ea690d4b9b9c1c5e0afbb113a5")

In [59]:
access_token <- get_spotify_access_token()
artists <- c("Drake", "Beyonce", "Harry Styles", "Taylor Swift", "Twice", "BTS", "EXO", "BlackPink")

get_combined_artists_data <- function(artist_names) {
  combined_data <- data.frame()
  
  for (artist_name in artist_names) {
    artist_data <- get_artist_audio_features(artist_name)
    
    selected_columns <- c("artist_name", "instrumentalness", "valence", "danceability",
                           "energy", "loudness", "speechiness", "acousticness",
                           "liveness", "tempo", "track_name", "track_id", "album_name",
                           "album_release_year", "album_release_date")
    
    artist_df <- data.frame(artist_data[selected_columns])
    
    colnames(artist_df) <- c("artist_name", "instrumentalness", "valence", "danceability",
                             "energy", "loudness", "speechiness", "acousticness",
                             "liveness", "tempo", "track_name", "track_id", "album_name",
                             "album_release_year", "album_release_date")
    
    # Combine data for each artist
    combined_data <- bind_rows(combined_data, artist_df)
  }
  
  return(combined_data)
}

spotify_data <- get_combined_artists_data(artists)


In [60]:
popularity_data <- read.csv("artists_top_song_popularity.csv")

In [61]:
popularity_data <- popularity_data %>%
select("track_id", "popularity")

In [62]:
spotify_data <- spotify_data %>%
left_join(popularity_data, by = 'track_id')

In [63]:
missing_rows <- is.na(spotify_data$popularity)

# Separate the data into two sets: one with NAs in 'popularity' and one without NAs
data_with_na <- spotify_data[missing_rows, ]
data_without_na <- spotify_data[!missing_rows, ]

# Build a linear regression model using rows without NAs
model <- lm(popularity ~ instrumentalness + valence + danceability + energy + loudness + speechiness + acousticness + liveness + tempo, data = data_without_na)

# Predict popularity for rows with NAs
predicted_popularity <- predict(model, newdata = data_with_na)

# Replace NAs with predicted values
spotify_data$popularity[missing_rows] <- predicted_popularity

spotify_data$popularity <- as.integer(spotify_data$popularity)

spotify_data <- spotify_data[!(abs(spotify_data$popularity - mean(spotify_data$popularity)) > (3 * sd(spotify_data$popularity))), ]

In [64]:
spotify_data

Unnamed: 0_level_0,artist_name,instrumentalness,valence,danceability,energy,loudness,speechiness,acousticness,liveness,tempo,track_name,track_id,album_name,album_release_year,album_release_date,popularity
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<chr>,<int>
1,Drake,0.00e+00,0.2000,0.402,0.514,-7.322,0.0471,0.1060,0.1810,120.009,Virginia Beach,6YV2AI87l1n2fzqU8Dyo05,For All The Dogs Scary Hours Edition,2023,2023-11-17,83
2,Drake,0.00e+00,0.3090,0.646,0.605,-7.590,0.1880,0.0636,0.2530,90.156,Amen (feat. Teezo Touchdown),59ZmQR4pwCaa5iM3veM7Cs,For All The Dogs Scary Hours Edition,2023,2023-11-17,83
3,Drake,0.00e+00,0.1040,0.571,0.550,-6.567,0.1060,0.0548,0.1790,120.947,Calling For You (feat. 21 Savage),2R30S0W4JCM9JaQWlpmeWn,For All The Dogs Scary Hours Edition,2023,2023-11-17,81
4,Drake,0.00e+00,0.0658,0.697,0.320,-9.347,0.2140,0.0127,0.1800,136.976,Fear Of Heights,6LFW4dEsLeiGluniXRgVYr,For All The Dogs Scary Hours Edition,2023,2023-11-17,86
5,Drake,0.00e+00,0.0667,0.566,0.564,-7.880,0.2770,0.0149,0.1040,139.920,Daylight,1us5wNgZc0YLT8RQQs2Q7L,For All The Dogs Scary Hours Edition,2023,2023-11-17,85
6,Drake,0.00e+00,0.2450,0.475,0.655,-7.342,0.3300,0.0306,0.3750,163.997,First Person Shooter (feat. J. Cole),6xIsHPRHdbzU6UMVFn4wh8,For All The Dogs Scary Hours Edition,2023,2023-11-17,84
7,Drake,2.84e-04,0.1400,0.673,0.672,-8.577,0.2240,0.0466,0.1930,136.902,IDGAF (feat. Yeat),2uvBprdlMpzeN5Bq0PzMBI,For All The Dogs Scary Hours Edition,2023,2023-11-17,88
8,Drake,0.00e+00,0.3250,0.487,0.490,-9.373,0.0649,0.5540,0.1260,141.389,7969 Santa,0pdMOh52apEWAS1xELJY7Q,For All The Dogs Scary Hours Edition,2023,2023-11-17,88
9,Drake,0.00e+00,0.1050,0.483,0.408,-9.243,0.0502,0.5080,0.2590,88.880,Slime You Out (feat. SZA),4gQBXN2GBRpemMuxg5y3h9,For All The Dogs Scary Hours Edition,2023,2023-11-17,86
10,Drake,1.31e-05,0.3690,0.626,0.309,-10.397,0.0433,0.8500,0.1170,117.940,Bahamas Promises,3nHat22UwPywIevUrXIhy1,For All The Dogs Scary Hours Edition,2023,2023-11-17,87


In [65]:
write.csv(spotify_data, "spotify_data_cleaned.csv")