In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import linregress
import scipy.stats as st

In [2]:
# Store filepath in a variable
data = "Resources/data.csv"
data_by_artist = "Resources/data_by_artist.csv"
data_by_genres = "Resources/data_by_genres.csv"
data_by_year = "Resources/data_by_year.csv"
data_w_genres = "Resources/data_w_genres.csv"

In [3]:
data_df = pd.read_csv(data, encoding="ISO-8859-1")
data_by_artist_df = pd.read_csv(data_by_artist, encoding="ISO-8859-1")
data_by_genres_df = pd.read_csv(data_by_genres, encoding="ISO-8859-1")
data_by_year_df = pd.read_csv(data_by_year, encoding="ISO-8859-1")
data_w_genres_df = pd.read_csv(data_w_genres, encoding="ISO-8859-1")

In [4]:
def tidy_split(df, column, sep='|', keep=False):
    """
    Split the values of a column and expand so the new DataFrame has one split
    value per row. Filters rows where the column is missing.

    Params
    ------
    df : pandas.DataFrame
        dataframe with the column to split and expand
    column : str
        the column to split and expand
    sep : str
        the string used to split the column's values
    keep : bool
        whether to retain the presplit value as it's own row

    Returns
    -------
    pandas.DataFrame
        Returns a dataframe with the same columns as `df`.
    """
    indexes = list()
    new_values = list()
    df = df.dropna(subset=[column])
    for i, presplit in enumerate(df[column].astype(str)):
        values = presplit.split(sep)
        if keep and len(values) > 1:
            indexes.append(i)
            new_values.append(presplit)
        for value in values:
            indexes.append(i)
            new_values.append(value)
    new_df = df.iloc[indexes, :].copy()
    new_df[column] = new_values
    return new_df

In [5]:
data_df["artists"] = data_df["artists"].str.strip("[")
data_df["artists"] = data_df["artists"].str.strip("]")
data_df["artists"]

artists = tidy_split(data_df,"artists",sep=', ',keep=True)
artists["artists"] = artists["artists"].str.strip("'")
artists["artists"] = artists["artists"].str.strip('"')
artists

Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year
0,0.991000,Mamie Smith,0.598,168333,0.224,0,0cS0A1fUEUd1EW3FcF8AEI,0.000522,5,0.3790,-12.628,0,Keep A Song In Your Soul,12,1920,0.0936,149.976,0.6340,1920
1,0.643000,Screamin' Jay Hawkins,0.852,150200,0.517,0,0hbkKFIJm7Z05H8Zl9w30f,0.026400,5,0.0809,-7.261,0,I Put A Spell On You,7,1920-01-05,0.0534,86.889,0.9500,1920
2,0.993000,Mamie Smith,0.647,163827,0.186,0,11m7laMUgmOKqI3oYzuhne,0.000018,0,0.5190,-12.098,1,Golfing Papa,4,1920,0.1740,97.600,0.6890,1920
3,0.000173,Oscar Velazquez,0.730,422087,0.798,0,19Lc5SfJJ5O1oaxY0fpwfh,0.801000,2,0.1280,-7.311,1,True House Music - Xavier Santos & Carlos Gomi...,17,1920-01-01,0.0425,127.997,0.0422,1920
4,0.295000,Mixe,0.704,165224,0.707,1,2hJjbsLCytGsnAHfdsLejp,0.000246,10,0.4020,-6.036,0,Xuniverxe,2,1920-10-01,0.0768,122.076,0.2990,1920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174384,0.009170,Tony T,0.792,147615,0.866,0,46LhBf6TvYjZU2SMvGZAbn,0.000060,6,0.1780,-5.089,0,The One,0,2020-12-25,0.0356,125.972,0.1860,2020
174385,0.795000,Alessia Cara,0.429,144720,0.211,0,7tue2Wemjd0FZzRtDrQFZd,0.000000,4,0.1960,-11.665,1,A Little More,0,2021-01-22,0.0360,94.710,0.2280,2021
174386,0.806000,Roger Fly,0.671,218147,0.589,0,48Qj61hOdYmUCFJbpQ29Ob,0.920000,4,0.1130,-12.393,0,Together,0,2020-12-09,0.0282,108.058,0.7140,2020
174387,0.920000,Taylor Swift,0.462,244000,0.240,1,1gcyHQpBQ1lfXGdhZmWrHP,0.000000,0,0.1130,-12.077,1,champagne problems,69,2021-01-07,0.0377,171.319,0.3200,2021


In [6]:
merge_df = pd.merge(artists, data_w_genres_df, on="artists", how="outer")
merge_df

Unnamed: 0,acousticness_x,artists,danceability_x,duration_ms_x,energy_x,explicit,id,instrumentalness_x,key_x,liveness_x,...,liveness_y,loudness_y,speechiness_y,tempo_y,valence_y,popularity_y,key_y,mode_y,count,genres
0,0.991,Mamie Smith,0.598,168333.0,0.2240,0.0,0cS0A1fUEUd1EW3FcF8AEI,0.000522,5.0,0.379,...,0.372625,-12.800313,0.121438,113.009938,0.641062,3.1875,0.0,1.0,16.0,"['harlem renaissance', 'traditional blues']"
1,0.993,Mamie Smith,0.647,163827.0,0.1860,0.0,11m7laMUgmOKqI3oYzuhne,0.000018,0.0,0.519,...,0.372625,-12.800313,0.121438,113.009938,0.641062,3.1875,0.0,1.0,16.0,"['harlem renaissance', 'traditional blues']"
2,0.992,Mamie Smith,0.782,195200.0,0.0573,0.0,5DlCyqLyX2AOVDTjjkDZ8x,0.000002,5.0,0.176,...,0.372625,-12.800313,0.121438,113.009938,0.641062,3.1875,0.0,1.0,16.0,"['harlem renaissance', 'traditional blues']"
3,0.995,Mamie Smith,0.482,198000.0,0.2290,0.0,0lqEx4vktZP1y9hnwfF27Y,0.000061,7.0,0.549,...,0.372625,-12.800313,0.121438,113.009938,0.641062,3.1875,0.0,1.0,16.0,"['harlem renaissance', 'traditional blues']"
4,0.992,Mamie Smith,0.574,189800.0,0.1380,0.0,4HYmmG8uHL2hP4zSFWavKF,0.000492,3.0,0.233,...,0.372625,-12.800313,0.121438,113.009938,0.641062,3.1875,0.0,1.0,16.0,"['harlem renaissance', 'traditional blues']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273323,,"Victor Yturbe ""El Piruli""",,,,,,,,,...,0.176600,-12.227800,0.029280,98.380400,0.509000,34.2000,9.0,1.0,10.0,"['bolero', 'bolero mexicano', 'ranchera']"
273324,,"Weiss, George D.",,,,,,,,,...,0.184000,-9.793000,0.032600,171.248000,0.242000,0.0000,3.0,1.0,1.0,[]
273325,,"Yessister, Jazzsister",,,,,,,,,...,0.082567,-9.682333,0.032133,147.673000,0.249000,0.0000,5.0,0.0,3.0,[]
273326,,\,,,,,,,,,...,0.332000,-15.643000,0.048800,113.235000,0.487000,0.0000,4.0,1.0,1.0,[]


In [7]:
merge_df.columns

Index(['acousticness_x', 'artists', 'danceability_x', 'duration_ms_x',
       'energy_x', 'explicit', 'id', 'instrumentalness_x', 'key_x',
       'liveness_x', 'loudness_x', 'mode_x', 'name', 'popularity_x',
       'release_date', 'speechiness_x', 'tempo_x', 'valence_x', 'year',
       'acousticness_y', 'danceability_y', 'duration_ms_y', 'energy_y',
       'instrumentalness_y', 'liveness_y', 'loudness_y', 'speechiness_y',
       'tempo_y', 'valence_y', 'popularity_y', 'key_y', 'mode_y', 'count',
       'genres'],
      dtype='object')

In [8]:
reordered_merge_df = merge_df[["artists", "year", "release_date", "genres", "name", "id", "popularity_x", "popularity_y", "danceability_x", "danceability_y", "tempo_x", "tempo_y", "energy_x", "energy_y", "acousticness_x", "acousticness_y", "duration_ms_x", "duration_ms_y", "explicit", "instrumentalness_x", "instrumentalness_y", "key_x", "key_y", "liveness_x", "liveness_y", "loudness_x", "loudness_y", "speechiness_x", "speechiness_y", "valence_x", "valence_y"]]


In [9]:
renamed_merge_df = reordered_merge_df.rename(columns={"popularity_x": "Popularity of Song",
                                        "popularity_y": "Avg Popularity of Artist",
                                        "danceability_x": "Danceability of Song",
                                        "danceability_y": "Avg Danceability of Artist",
                                        "tempo_x": "Tempo of Song",
                                        "tempo_y": "Avg Tempo of Artist",
                                        "energy_x": "Energy of Song",
                                        "energy_y": "Avg Energy of Artist",
                                        "acousticness_x": "Acousticness of Song", 
                                        "acousticness_y": "Avg Acousticness of Artist",
                                        "duration_ms_x": "Duration of Song",
                                        "duration_ms_y": "Avg Duration of Artist",
                                        "danceability_x": "Danceability of Song",
                                        "danceability_y": "Avg Danceability of Artist",
                                        "tempo_x": "Tempo of Song",
                                        "tempo_y": "Avg Tempo of Artist",
                                        "instrumentalness_x": "Three Star Reviews",
                                        "instrumentalness_y": "Four Star Reviews",
                                        "key_x": "Key of Song",
                                        "key_y": "Avg Key of Artist",
                                        "loudness_x": "Loudness of Song",
                                        "loudness_y": "Avg Loudness of Artist",
                                        "speechiness_x": "Speechiness of Song",
                                        "speechiness_y": "Avg Speechiness of Artist",
                                        "valence_x": "Valence of Song",
                                        "valence_y": "Avg Valence of Artist",
                                        "liveness_x": "Liveness of Song",
                                        "liveness_y": "Avg Liveness of Artist",
                                       })

In [10]:
renamed_merge_df

Unnamed: 0,artists,year,release_date,genres,name,id,Popularity of Song,Avg Popularity of Artist,Danceability of Song,Avg Danceability of Artist,...,Key of Song,Avg Key of Artist,Liveness of Song,Avg Liveness of Artist,Loudness of Song,Avg Loudness of Artist,Speechiness of Song,Avg Speechiness of Artist,Valence of Song,Avg Valence of Artist
0,Mamie Smith,1920.0,1920,"['harlem renaissance', 'traditional blues']",Keep A Song In Your Soul,0cS0A1fUEUd1EW3FcF8AEI,12.0,3.1875,0.598,0.627250,...,5.0,0.0,0.379,0.372625,-12.628,-12.800313,0.0936,0.121438,0.634,0.641062
1,Mamie Smith,1920.0,1920,"['harlem renaissance', 'traditional blues']",Golfing Papa,11m7laMUgmOKqI3oYzuhne,4.0,3.1875,0.647,0.627250,...,0.0,0.0,0.519,0.372625,-12.098,-12.800313,0.1740,0.121438,0.689,0.641062
2,Mamie Smith,1920.0,1920,"['harlem renaissance', 'traditional blues']",Don't You Advertise Your Man,5DlCyqLyX2AOVDTjjkDZ8x,5.0,3.1875,0.782,0.627250,...,5.0,0.0,0.176,0.372625,-12.453,-12.800313,0.0592,0.121438,0.487,0.641062
3,Mamie Smith,1920.0,1920,"['harlem renaissance', 'traditional blues']",Kansas City Man Blues - 78rpm Version,0lqEx4vktZP1y9hnwfF27Y,4.0,3.1875,0.482,0.627250,...,7.0,0.0,0.549,0.372625,-12.619,-12.800313,0.0812,0.121438,0.461,0.641062
4,Mamie Smith,1920.0,1920,"['harlem renaissance', 'traditional blues']",Miss Jenny's Ball (aka There'll Be No Freebies...,4HYmmG8uHL2hP4zSFWavKF,1.0,3.1875,0.574,0.627250,...,3.0,0.0,0.233,0.372625,-14.171,-12.800313,0.1090,0.121438,0.764,0.641062
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273323,"Victor Yturbe ""El Piruli""",,,"['bolero', 'bolero mexicano', 'ranchera']",,,,34.2000,,0.521000,...,,9.0,,0.176600,,-12.227800,,0.029280,,0.509000
273324,"Weiss, George D.",,,[],,,,0.0000,,0.296000,...,,3.0,,0.184000,,-9.793000,,0.032600,,0.242000
273325,"Yessister, Jazzsister",,,[],,,,0.0000,,0.256333,...,,5.0,,0.082567,,-9.682333,,0.032133,,0.249000
273326,\,,,[],,,,0.0000,,0.335000,...,,4.0,,0.332000,,-15.643000,,0.048800,,0.487000


In [11]:
renamed_merge_df["genres"].replace('\'','', regex=True, inplace=True)
renamed_merge_df["genres"] = renamed_merge_df["genres"].str.strip("[")
renamed_merge_df["genres"] = renamed_merge_df["genres"].str.strip("]")
renamed_merge_df

Unnamed: 0,artists,year,release_date,genres,name,id,Popularity of Song,Avg Popularity of Artist,Danceability of Song,Avg Danceability of Artist,...,Key of Song,Avg Key of Artist,Liveness of Song,Avg Liveness of Artist,Loudness of Song,Avg Loudness of Artist,Speechiness of Song,Avg Speechiness of Artist,Valence of Song,Avg Valence of Artist
0,Mamie Smith,1920.0,1920,"harlem renaissance, traditional blues",Keep A Song In Your Soul,0cS0A1fUEUd1EW3FcF8AEI,12.0,3.1875,0.598,0.627250,...,5.0,0.0,0.379,0.372625,-12.628,-12.800313,0.0936,0.121438,0.634,0.641062
1,Mamie Smith,1920.0,1920,"harlem renaissance, traditional blues",Golfing Papa,11m7laMUgmOKqI3oYzuhne,4.0,3.1875,0.647,0.627250,...,0.0,0.0,0.519,0.372625,-12.098,-12.800313,0.1740,0.121438,0.689,0.641062
2,Mamie Smith,1920.0,1920,"harlem renaissance, traditional blues",Don't You Advertise Your Man,5DlCyqLyX2AOVDTjjkDZ8x,5.0,3.1875,0.782,0.627250,...,5.0,0.0,0.176,0.372625,-12.453,-12.800313,0.0592,0.121438,0.487,0.641062
3,Mamie Smith,1920.0,1920,"harlem renaissance, traditional blues",Kansas City Man Blues - 78rpm Version,0lqEx4vktZP1y9hnwfF27Y,4.0,3.1875,0.482,0.627250,...,7.0,0.0,0.549,0.372625,-12.619,-12.800313,0.0812,0.121438,0.461,0.641062
4,Mamie Smith,1920.0,1920,"harlem renaissance, traditional blues",Miss Jenny's Ball (aka There'll Be No Freebies...,4HYmmG8uHL2hP4zSFWavKF,1.0,3.1875,0.574,0.627250,...,3.0,0.0,0.233,0.372625,-14.171,-12.800313,0.1090,0.121438,0.764,0.641062
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273323,"Victor Yturbe ""El Piruli""",,,"bolero, bolero mexicano, ranchera",,,,34.2000,,0.521000,...,,9.0,,0.176600,,-12.227800,,0.029280,,0.509000
273324,"Weiss, George D.",,,,,,,0.0000,,0.296000,...,,3.0,,0.184000,,-9.793000,,0.032600,,0.242000
273325,"Yessister, Jazzsister",,,,,,,0.0000,,0.256333,...,,5.0,,0.082567,,-9.682333,,0.032133,,0.249000
273326,\,,,,,,,0.0000,,0.335000,...,,4.0,,0.332000,,-15.643000,,0.048800,,0.487000


In [12]:
new_artists = tidy_split(renamed_merge_df,"genres",sep=', ',keep=False)
renamed_merge_df["genres"] = renamed_merge_df["genres"].str.lstrip()
new_artists

Unnamed: 0,artists,year,release_date,genres,name,id,Popularity of Song,Avg Popularity of Artist,Danceability of Song,Avg Danceability of Artist,...,Key of Song,Avg Key of Artist,Liveness of Song,Avg Liveness of Artist,Loudness of Song,Avg Loudness of Artist,Speechiness of Song,Avg Speechiness of Artist,Valence of Song,Avg Valence of Artist
0,Mamie Smith,1920.0,1920,harlem renaissance,Keep A Song In Your Soul,0cS0A1fUEUd1EW3FcF8AEI,12.0,3.1875,0.598,0.627250,...,5.0,0.0,0.379,0.372625,-12.628,-12.800313,0.0936,0.121438,0.634,0.641062
0,Mamie Smith,1920.0,1920,traditional blues,Keep A Song In Your Soul,0cS0A1fUEUd1EW3FcF8AEI,12.0,3.1875,0.598,0.627250,...,5.0,0.0,0.379,0.372625,-12.628,-12.800313,0.0936,0.121438,0.634,0.641062
1,Mamie Smith,1920.0,1920,harlem renaissance,Golfing Papa,11m7laMUgmOKqI3oYzuhne,4.0,3.1875,0.647,0.627250,...,0.0,0.0,0.519,0.372625,-12.098,-12.800313,0.1740,0.121438,0.689,0.641062
1,Mamie Smith,1920.0,1920,traditional blues,Golfing Papa,11m7laMUgmOKqI3oYzuhne,4.0,3.1875,0.647,0.627250,...,0.0,0.0,0.519,0.372625,-12.098,-12.800313,0.1740,0.121438,0.689,0.641062
2,Mamie Smith,1920.0,1920,harlem renaissance,Don't You Advertise Your Man,5DlCyqLyX2AOVDTjjkDZ8x,5.0,3.1875,0.782,0.627250,...,5.0,0.0,0.176,0.372625,-12.453,-12.800313,0.0592,0.121438,0.487,0.641062
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273323,"Victor Yturbe ""El Piruli""",,,ranchera,,,,34.2000,,0.521000,...,,9.0,,0.176600,,-12.227800,,0.029280,,0.509000
273324,"Weiss, George D.",,,,,,,0.0000,,0.296000,...,,3.0,,0.184000,,-9.793000,,0.032600,,0.242000
273325,"Yessister, Jazzsister",,,,,,,0.0000,,0.256333,...,,5.0,,0.082567,,-9.682333,,0.032133,,0.249000
273326,\,,,,,,,0.0000,,0.335000,...,,4.0,,0.332000,,-15.643000,,0.048800,,0.487000


In [13]:
new_artists.to_csv(path_or_buf="Resources/artist_genres.csv", index=False)

In [14]:
new_artists.value_counts("genres").head(30)

genres
                         46306
rock                     23455
adult standards          17711
classic rock             17367
classical                16160
mellow gold              14514
album rock               12542
folk rock                12147
soft rock                12056
country rock             10952
vocal jazz               10510
classical performance    10458
lounge                    9219
soul                      8968
art rock                  8120
hard rock                 7861
cool jazz                 7822
rap                       7709
jazz                      7693
brill building pop        7571
bebop                     7558
folk                      7159
hip hop                   7117
dance pop                 6975
pop                       6689
psychedelic rock          6476
roots rock                6262
swing                     6108
blues rock                5977
pop rap                   5578
dtype: int64

In [15]:
smaller_artists = new_artists[["artists", "release_date", "genres", "name", "id", "Popularity of Song", "Avg Popularity of Artist"]].copy()
smaller_artists

Unnamed: 0,artists,release_date,genres,name,id,Popularity of Song,Avg Popularity of Artist
0,Mamie Smith,1920,harlem renaissance,Keep A Song In Your Soul,0cS0A1fUEUd1EW3FcF8AEI,12.0,3.1875
0,Mamie Smith,1920,traditional blues,Keep A Song In Your Soul,0cS0A1fUEUd1EW3FcF8AEI,12.0,3.1875
1,Mamie Smith,1920,harlem renaissance,Golfing Papa,11m7laMUgmOKqI3oYzuhne,4.0,3.1875
1,Mamie Smith,1920,traditional blues,Golfing Papa,11m7laMUgmOKqI3oYzuhne,4.0,3.1875
2,Mamie Smith,1920,harlem renaissance,Don't You Advertise Your Man,5DlCyqLyX2AOVDTjjkDZ8x,5.0,3.1875
...,...,...,...,...,...,...,...
273323,"Victor Yturbe ""El Piruli""",,ranchera,,,,34.2000
273324,"Weiss, George D.",,,,,,0.0000
273325,"Yessister, Jazzsister",,,,,,0.0000
273326,\,,,,,,0.0000


In [16]:
smaller_artists.to_csv(path_or_buf="Resources/artists_and_genres.csv", index=False)

In [17]:
rock = smaller_artists[smaller_artists['genres'].str.contains('rock', regex=False)]
rock

Unnamed: 0,artists,release_date,genres,name,id,Popularity of Song,Avg Popularity of Artist
22058,H.P. Lovecraft,1928,psychedelic rock,CapÃ­tulo 7.3 - la Casa Maldita,04Bu0bNqImbszwh9OnvJD1,0.0,0.000000
22059,H.P. Lovecraft,1928,psychedelic rock,CapÃ­tulo 1.4 - la Casa Maldita,07akD0l9xOzvuosXQJx4xI,0.0,0.000000
22060,H.P. Lovecraft,1928,psychedelic rock,CapÃ­tulo 1.1 - la Casa Maldita,08i6R3zRuUMSiH0XgrcJYt,0.0,0.000000
22061,H.P. Lovecraft,1928,psychedelic rock,CapÃ­tulo 9.2 - la Casa Maldita,09V1w0C5nldAH5Se6Dsr0Q,0.0,0.000000
22062,H.P. Lovecraft,1928,psychedelic rock,CapÃ­tulo 9.5 - la Casa Maldita,0GiRp9XdFoM1Xhh6uya2EM,0.0,0.000000
...,...,...,...,...,...,...,...
273078,The Unlikely Candidates,2019-03-22,rock,Novocaine,4GKcaqt6PFor4siHXMO42e,65.0,65.000000
273082,Badflower,2019-02-22,modern alternative rock,Ghost,7w09r53aPON8ZUvEAnPe94,63.0,63.000000
273082,Badflower,2019-02-22,modern rock,Ghost,7w09r53aPON8ZUvEAnPe94,63.0,63.000000
273082,Badflower,2019-02-22,rock,Ghost,7w09r53aPON8ZUvEAnPe94,63.0,63.000000


In [18]:
rock.replace(
    {"psychedelic rock": "rock", "modern alternative rock": "rock"})


Unnamed: 0,artists,release_date,genres,name,id,Popularity of Song,Avg Popularity of Artist
22058,H.P. Lovecraft,1928,rock,CapÃ­tulo 7.3 - la Casa Maldita,04Bu0bNqImbszwh9OnvJD1,0.0,0.000000
22059,H.P. Lovecraft,1928,rock,CapÃ­tulo 1.4 - la Casa Maldita,07akD0l9xOzvuosXQJx4xI,0.0,0.000000
22060,H.P. Lovecraft,1928,rock,CapÃ­tulo 1.1 - la Casa Maldita,08i6R3zRuUMSiH0XgrcJYt,0.0,0.000000
22061,H.P. Lovecraft,1928,rock,CapÃ­tulo 9.2 - la Casa Maldita,09V1w0C5nldAH5Se6Dsr0Q,0.0,0.000000
22062,H.P. Lovecraft,1928,rock,CapÃ­tulo 9.5 - la Casa Maldita,0GiRp9XdFoM1Xhh6uya2EM,0.0,0.000000
...,...,...,...,...,...,...,...
273078,The Unlikely Candidates,2019-03-22,rock,Novocaine,4GKcaqt6PFor4siHXMO42e,65.0,65.000000
273082,Badflower,2019-02-22,rock,Ghost,7w09r53aPON8ZUvEAnPe94,63.0,63.000000
273082,Badflower,2019-02-22,modern rock,Ghost,7w09r53aPON8ZUvEAnPe94,63.0,63.000000
273082,Badflower,2019-02-22,rock,Ghost,7w09r53aPON8ZUvEAnPe94,63.0,63.000000


In [19]:
genres = smaller_artists.genres.unique().tolist()
genres
    



['harlem renaissance',
 'traditional blues',
 'blues',
 'circuit',
 'glitchcore',
 '',
 'tango',
 'vintage tango',
 'electro trash',
 'bouzouki',
 'rebetiko',
 'adult standards',
 'big band',
 'deep adult standards',
 'easy listening',
 'lounge',
 'swing',
 'houston rap',
 'pop',
 'pop rap',
 'trap queen',
 'vintage chanson',
 'cabaret',
 'vintage hollywood',
 'boy band',
 'dance pop',
 '"canadian childrens music"',
 '"childrens folk"',
 '"childrens music"',
 'ghoststep',
 'greek clarinet',
 'french hip hop',
 'british soul',
 'funk 150 bpm',
 'funk carioca',
 'funk ostentacao',
 'torch song',
 'classical',
 'post-romantic era',
 'russian romanticism',
 'ukrainian classical',
 'classical performance',
 'opera',
 'orchestral performance',
 'german orchestra',
 'orchestra',
 'ilahiler',
 'sufi chant',
 'american classical piano',
 'early modern classical',
 'french romanticism',
 'historic orchestral performance',
 'bow pop',
 'classify',
 'irish ballad',
 'irish folk',
 'vintage classic

In [None]:
for i, row in smaller_artists.iterrows():
    ifor_val = something
    if <condition>:
        ifor_val = something_else
    df.set_value(i,'ifor',ifor_val)

In [23]:
df['genres'].apply(lambda v: "rock" if smaller_artists['genres'].str.contains('rock', regex=False)])


SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' (<ipython-input-23-c003b5b6f72f>, line 1)

In [None]:
smaller_artists["genres"].apply(lamda v: "")

In [20]:
for i, row in smaller_artists.iterrows():
   
    
    if "folk" in row["genres"]:
        row["genres"] = "Folk"
    
    if "rock" in row["genres"]:
        row["genres"] = "Rock"
     
    if "classical" in row["genres"]:
        row["genres"] = "Classical"
        
    if "jazz" in row["genres"]:
        row["genres"] = "Jazz"
        
    if "rap" in row["genres"]:
        row["genres"] = "Hip-Hop"
        
    if "country" in row["genres"]:
        row["genres"] = "country"

In [21]:
new_artists.value_counts("genres").head(30)

genres
                         46306
rock                     23455
adult standards          17711
classic rock             17367
classical                16160
mellow gold              14514
album rock               12542
folk rock                12147
soft rock                12056
country rock             10952
vocal jazz               10510
classical performance    10458
lounge                    9219
soul                      8968
art rock                  8120
hard rock                 7861
cool jazz                 7822
rap                       7709
jazz                      7693
brill building pop        7571
bebop                     7558
folk                      7159
hip hop                   7117
dance pop                 6975
pop                       6689
psychedelic rock          6476
roots rock                6262
swing                     6108
blues rock                5977
pop rap                   5578
dtype: int64

In [22]:

for genre in smaller_artists["genres"]:
    if smaller_artists[smaller_artists['genres'].str.contains('rock', regex=False)]:
        replace
        

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().