In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [2]:
from datetime import datetime

def log(message):
    print(datetime.now().strftime("%H:%M:%S -"), message)
    
def printnow():
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print("Current Time =", current_time)

In [3]:
# Define constants for the dataset
section_headers = ['Intro','Verse','Refrain','Pre-Chorus','Chorus','Post-Chorus','Hooks','Riffs/Basslines','Scratches','Sampling','Bridge','Interlude','Skit','Collision','Instrumental','Solo','Ad-lib','Segue','Outro']
header_bigrams = ['|'.join([a,b]) for a in ['START'] + section_headers for b in section_headers + ['END']]

# a function to extract header bigrams quickly from a given lyrics
def lyrics_to_bigrams(lyrics):
    headers = [word[1:-1] for word in lyrics.split() if word[0] == '[' and word[-1] == ']' and word[1:-1] in section_headers]
    bigrams = ['|'.join(map(str, vals)) for vals in zip(headers[:-1], headers[1:])]
    bigram_counts = {bigram:0 for bigram in header_bigrams}
    for bigram in bigrams: bigram_counts[bigram] = bigram_counts[bigram] + 1
    return bigram_counts

### For Nonredundant Data, Create the Following Encodings:
#### One-Hot
#### TF-IDF with SVD
#### Word2Vec with Google Embeddings

In [7]:
# Load nonredundant data
country_df = pd.read_pickle(r'train_test_data/country_data.zip')
country_df['genre'] = 'country'
hiphop_df = pd.read_pickle(r'train_test_data/hiphop_data.zip')
hiphop_df['genre'] = 'hiphop'
pop_df = pd.read_pickle(r'train_test_data/pop_data.zip')
pop_df['genre'] = 'pop'
rock_df = pd.read_pickle(r'train_test_data/rock_data.zip')
rock_df['genre'] = 'rock'

full_df = pd.concat([country_df,hiphop_df, pop_df,rock_df])
full_df.reset_index(inplace=True, drop=True)
full_df

Unnamed: 0,title-artist,lyrics,genre
0,No HurryZac Brown Band,[START]\n[Verse]\nYou know my old car needs wa...,country
1,A Little More SummertimeJason Aldean,[START]\n[Verse]\nThey boarding up this water ...,country
2,Yeah BoyKelsea Ballerini,"[START]\n[Verse]\nCaptured my attention, make ...",country
3,Wild In Your SmileDustin Lynch,"[START]\n[Verse]\nHey girl what's up, looks li...",country
4,Real ThingZac Brown Band,[START]\n[Verse]\nIt was in a crystal bottle o...,country
...,...,...,...
4916,Rolling In The DeepAdele,[START]\n[Verse]\nThere's a fire starting in m...,rock
4917,Simple SongThe Shins,[START]\n[Verse]\nWell this is just a simple s...,rock
4918,Still SwingingPapa Roach,[START]\n[Intro]\nYeah\nThat's right\nYeah yea...,rock
4919,The Divine ZeroPierce The Veil,[START]\n[Verse]\nMaybe I could swim into your...,rock


In [95]:
# Split into training and testing and save as pickles
train_df = pd.DataFrame()
test_df = pd.DataFrame()
lyrics_train, lyrics_test, genres_train, genres_test = train_test_split(full_df['lyrics'], full_df['genre'], test_size=0.2)
lyrics_train.reset_index(inplace=True, drop=True)
genres_train.reset_index(inplace=True, drop=True)
lyrics_test.reset_index(inplace=True, drop=True)
genres_test.reset_index(inplace=True, drop=True)
assert len(lyrics_train) == len(genres_train)
assert len(lyrics_test) == len(genres_test)
train_df['lyrics'] = lyrics_train
train_df['genre'] = genres_train
test_df['lyrics'] = lyrics_test
test_df['genre'] = genres_test
train_df.to_pickle(r'train_test_data/train.zip')
test_df.to_pickle(r'train_test_data/test.zip')

In [11]:
train_df = pd.read_pickle(r'train_test_data/train.zip')
test_df = pd.read_pickle(r'train_test_data/test.zip')
lyrics_train = train_df['lyrics']
lyrics_test = test_df['lyrics']
genres_train = train_df['genre']
genres_test = test_df['genre']

In [108]:
# Generate the vocabulary with training data
log('Creating vocabulary')
vocab_counts = {}
for lyric in train_df['lyrics']:
    for word in lyric.lower().split():
        vocab_counts[word] = vocab_counts.get(word, 0) + 1
# - set all words that appear fewer than 3 times as <UNK>
log('Setting words with counts less than 3 to <UNK>')
vocab_counts_unk = {}
for word,count in vocab_counts.items():
    if count < 3: vocab_counts_unk['<UNK>'] = vocab_counts_unk.get('<UNK>', 0) + count
    else: vocab_counts_unk[word] = count
log(f'Done, vocab length: {len(vocab_counts_unk)}\n')

# a function that tokenizes given lyrics using the vocabulary
def lyrics_to_vocab(lyrics):
    words = [word if word in vocab_counts_unk else '<UNK>' for word in lyrics.lower().split()]
    return words


14:31:49 - Creating vocabulary
14:31:50 - Setting words with counts less than 3 to <UNK>
14:31:50 - Done, vocab length: 14358



In [111]:
# Create One-Hot encoding for nonredundant data
def encode_onehot(lyrics):
    bigram_counts = lyrics_to_bigrams(lyrics)
    words = lyrics_to_vocab(lyrics)
    onehot = [1 if vocab_word in words else 0 for vocab_word in vocab_counts_unk.keys()]
    assert len(onehot) == len(vocab_counts_unk)
    encoding = onehot + [count for count in bigram_counts.values()]
    return encoding

In [112]:
# Encoding One-Hot training and testing data
log('Encoding training data')
onehotX_train = train_df['lyrics'].map(encode_onehot)
onehotX_train.reset_index(inplace=True, drop=True)
log('Done\n')

log('Encoding testing data')
onehotX_test = test_df['lyrics'].map(encode_onehot)
onehotX_test.reset_index(inplace=True, drop=True)
log('Done\n')

14:32:49 - Encoding training data
14:37:34 - Done

14:37:34 - Encoding testing data
14:38:45 - Done



In [113]:
# Saving One-Hot training and testing data as a .zip pickle
log('Loading training and testing data into dataframes')
onehot_train = pd.DataFrame(onehotX_train.values.tolist(), index=onehotX_train.index)
onehot_train['y'] = genres_train
onehot_test = pd.DataFrame(onehotX_test.values.tolist(), index=onehotX_test.index)
onehot_test['y'] = genres_train
log('Done\n')

log('Saving onehot encodings as pickles')
onehot_train.to_pickle('train_test_encoded/onehot_train.zip')
onehot_test.to_pickle('train_test_encoded/onehot_test.zip')
log('Done\n')

14:38:45 - Loading training and testing data into dataframes
14:39:17 - Done

14:39:17 - Saving onehot encodings as pickles
14:39:21 - Done



In [123]:
pd.read_pickle('train_test_encoded/onehot_train.zip')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14749,14750,14751,14752,14753,14754,14755,14756,14757,y
0,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,pop
1,1,0,0,0,1,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,hiphop
2,1,0,0,0,1,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,pop
3,1,1,0,1,1,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,hiphop
4,1,0,0,0,1,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,country
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3931,1,0,0,0,1,0,1,1,1,0,...,0,0,0,0,0,0,0,0,0,rock
3932,1,1,1,0,1,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,pop
3933,1,0,0,0,1,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,country
3934,1,1,0,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,hiphop


In [116]:
# Create Raw Count encoding for nonredundant data
def encode_rawcount(lyrics):
    bigram_counts = lyrics_to_bigrams(lyrics)
    words = lyrics_to_vocab(lyrics)
    lyrics_counts = {}
    for word in words:
        lyrics_counts[word] = lyrics_counts.get(word, 0) + 1
    rawcount = [lyrics_counts.get(vocab_word, lyrics_counts.get('<UNK>', 0)) for vocab_word in vocab_counts_unk.keys()]
    assert len(rawcount) == len(vocab_counts_unk)
    encoding = rawcount + [count for count in bigram_counts.values()]
    return encoding

In [117]:
# Encoding Raw Count training and testing data
log('Encoding training data')
rawcountX_train = train_df['lyrics'].map(encode_rawcount)
rawcountX_train.reset_index(inplace=True, drop=True)
log('Done\n')

log('Encoding testing data')
rawcountX_test = test_df['lyrics'].map(encode_rawcount)
rawcountX_test.reset_index(inplace=True, drop=True)
log('Done\n')

14:42:05 - Encoding training data
14:42:24 - Done

14:42:24 - Encoding testing data
14:42:28 - Done



In [118]:
# Saving Raw Count training and testing data as a .zip pickle
log('Loading training and testing data into dataframes')
rawcount_train = pd.DataFrame(rawcountX_train.values.tolist(), index=rawcountX_train.index)
rawcount_train['y'] = genres_train
rawcount_test = pd.DataFrame(rawcountX_test.values.tolist(), index=rawcountX_test.index)
rawcount_test['y'] = genres_train
log('Done\n')

log('Saving rawcount encodings as pickles')
rawcount_train.to_pickle('train_test_encoded/rawcount_train.zip')
rawcount_test.to_pickle('train_test_encoded/rawcount_test.zip')
log('Done\n')

14:43:19 - Loading training and testing data into dataframes
14:43:42 - Done

14:43:42 - Saving rawcount encodings as pickles
14:43:51 - Done



In [124]:
pd.read_pickle('train_test_encoded/rawcount_train.zip')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14749,14750,14751,14752,14753,14754,14755,14756,14757,y
0,1,1,5,4,14,4,8,11,4,4,...,0,0,0,0,0,0,0,0,0,pop
1,1,1,1,1,19,1,1,1,4,4,...,0,0,0,0,0,0,0,0,0,hiphop
2,1,3,3,3,31,3,22,10,3,1,...,0,0,0,0,0,0,0,0,0,pop
3,1,1,36,1,8,36,36,10,36,2,...,0,0,0,0,0,0,0,0,0,hiphop
4,1,7,7,7,1,7,7,3,3,7,...,0,0,0,0,0,0,0,0,0,country
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3931,1,3,3,3,7,3,3,1,2,3,...,0,0,0,0,0,0,0,0,0,rock
3932,1,1,11,43,20,43,4,18,1,15,...,0,0,0,0,0,0,0,0,0,pop
3933,1,0,0,0,13,0,0,4,8,2,...,0,0,0,0,0,0,0,0,0,country
3934,1,1,4,4,9,4,4,4,4,2,...,0,0,0,0,0,0,0,0,0,hiphop


In [63]:
import gensim.models as gm
log("Generating vord2vec model")
google_kv = gm.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True)
log("Done\n")

13:54:50 - Generating vord2vec model
13:58:32 - Done



In [119]:
# Create Word2Vec encoding for nonredundant data
def encode_word2vec(lyrics):
    def sent_vector(sentence):
        vectors = []
        for word in sentence:
            try: vectors.append(google_kv.get_vector(word))
            except KeyError: vectors.append([0] * 300)
        vectors_ave = np.mean(vectors, axis=0)
        return vectors_ave
    
    bigram_counts = lyrics_to_bigrams(lyrics)
    word2vec = sent_vector(lyrics.lower().split())
    assert len(word2vec) == 300
    encoding = list(word2vec) + [count for count in bigram_counts.values()]
    return encoding

In [120]:
# pickle the encodings in .zip files to save space
log('Encoding training data')
word2vecX_train = lyrics_train.map(encode_word2vec)
word2vecX_train.reset_index(inplace=True, drop=True)
log('Done\n')

log('Encoding testing data')
word2vecX_test = lyrics_test.map(encode_word2vec)
word2vecX_test.reset_index(inplace=True, drop=True)
log('Done\n')

14:43:55 - Encoding training data
14:44:12 - Done

14:44:12 - Encoding testing data
14:44:16 - Done



In [121]:
log('Loading training and testing data into dataframes')
word2vec_train = pd.DataFrame(word2vecX_train.values.tolist(), index=word2vecX_train.index)
word2vec_train['y'] = genres_train
word2vec_test = pd.DataFrame(word2vecX_test.values.tolist(), index=word2vecX_test.index)
word2vec_test['y'] = genres_test
log('Done\n')

log('Saving onehot encodings as pickles')
word2vec_train.to_pickle('train_test_encoded/word2vec_train.zip')
word2vec_test.to_pickle('train_test_encoded/word2vec_test.zip')
log('Done\n')

14:44:16 - Loading training and testing data into dataframes
14:44:17 - Done

14:44:17 - Saving onehot encodings as pickles
14:44:18 - Done



In [122]:
pd.read_pickle('train_test_encoded/word2vec_train.zip')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,691,692,693,694,695,696,697,698,699,y
0,0.043266,0.000405,0.039034,0.091339,-0.043811,0.006128,0.048711,-0.045578,0.028493,0.058403,...,0,0,0,0,0,0,0,0,0,pop
1,0.063111,0.024554,0.043612,0.100200,-0.061247,-0.015591,0.042263,-0.079348,0.033227,0.063542,...,0,0,0,0,0,0,0,0,0,hiphop
2,0.014981,0.002022,0.061864,0.132557,-0.050852,0.012515,0.042124,-0.070243,0.032086,0.041842,...,0,0,0,0,0,0,0,0,0,pop
3,0.034050,0.017641,0.026798,0.096935,-0.052599,-0.015467,0.017258,-0.054707,0.022486,0.067634,...,0,0,0,0,0,0,0,0,0,hiphop
4,-0.006355,0.017172,0.025928,0.084886,-0.019035,-0.032178,0.007823,-0.079723,0.036854,0.059281,...,0,0,0,0,0,0,0,0,0,country
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3931,0.032377,0.020014,0.041138,0.107918,-0.060333,-0.014298,0.028753,-0.065094,0.038230,0.062111,...,0,0,0,0,0,0,0,0,0,rock
3932,0.051308,0.003736,0.029400,0.110860,-0.055710,0.000451,0.011525,-0.055293,0.026736,0.061777,...,0,0,0,0,0,0,0,0,0,pop
3933,0.046522,0.009164,0.052938,0.113449,-0.039134,-0.010338,0.018407,-0.070982,0.030533,0.079900,...,0,0,0,0,0,0,0,0,0,country
3934,0.065731,0.029908,0.011826,0.119596,-0.014202,-0.027390,0.015258,-0.086349,0.006270,0.094559,...,0,0,0,0,0,0,0,0,0,hiphop


In [152]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

# Combine lyrics into one list to be input for tf-idf vectorizer
full_lyrics = [lyrics.lower() for lyrics in full_df['lyrics']]
log(f'Training TF-IDF Vectorizer on all {len(full_lyrics)} lyrics')
tfidf_vectorizer = TfidfVectorizer().fit(full_lyrics)
tfidf_data = tfidf_vectorizer.transform(full_lyrics)
log('Fitting SVD on all lyrics')
svd = TruncatedSVD(n_components=500).fit(tfidf_data)
log('Done\n')

15:15:26 - Training TF-IDF Vectorizer on all 4921 lyrics
15:15:29 - Fitting SVD on all lyrics
15:15:37 - Done



In [158]:
# Create TFIDF encoding for nonredundant data
def encode_tfidf_svd(lyrics):
    bigram_counts = lyrics_to_bigrams(lyrics)
    tfidf_vector = tfidf_vectorizer.transform([lyrics])
    svd_vector = svd.transform(tfidf_vector)[0]
    assert len(svd_vector) == 500
    encoding = list(svd_vector) + [count for count in bigram_counts.values()]
    return encoding

In [160]:
# pickle the encodings in .zip files to save space
log('Encoding training data')
tfidfX_train = lyrics_train.map(encode_tfidf_svd)
tfidfX_train.reset_index(inplace=True, drop=True)
log('Done\n')

log('Encoding testing data')
tfidfX_test = lyrics_test.map(encode_tfidf_svd)
tfidfX_test.reset_index(inplace=True, drop=True)
log('Done\n')

15:20:11 - Encoding training data
15:23:55 - Done

15:23:55 - Encoding testing data
15:24:53 - Done



In [161]:
log('Loading training and testing data into dataframes')
tfidf_train = pd.DataFrame(tfidfX_train.values.tolist(), index=tfidfX_train.index)
tfidf_train['y'] = genres_train
tfidf_test = pd.DataFrame(tfidfX_test.values.tolist(), index=tfidfX_test.index)
tfidf_test['y'] = genres_test
log('Done\n')

log('Saving onehot encodings as pickles')
tfidf_train.to_pickle('train_test_encoded/tfidf_train.zip')
tfidf_test.to_pickle('train_test_encoded/tfidf_test.zip')
log('Done\n')

15:24:53 - Loading training and testing data into dataframes
15:24:54 - Done

15:24:54 - Saving onehot encodings as pickles
15:24:55 - Done



In [162]:
pd.read_pickle('train_test_encoded/tfidf_train.zip')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,891,892,893,894,895,896,897,898,899,y
0,0.383487,-0.000157,0.111061,0.036923,0.093074,0.024800,0.121577,0.141905,-0.130408,-0.076053,...,0,0,0,0,0,0,0,0,0,pop
1,0.458451,0.094264,-0.020039,0.032636,0.131480,0.256676,0.120940,0.000969,0.295753,0.028793,...,0,0,0,0,0,0,0,0,0,hiphop
2,0.305338,0.203174,0.172656,-0.039756,-0.036502,-0.019353,-0.037714,-0.006561,0.020564,0.022470,...,0,0,0,0,0,0,0,0,0,pop
3,0.396890,-0.020145,-0.104981,0.007337,0.001328,-0.016435,0.010096,0.040158,-0.006629,-0.082973,...,0,0,0,0,0,0,0,0,0,hiphop
4,0.153078,0.003226,-0.081552,0.005649,-0.000248,0.050439,-0.015349,-0.023108,-0.011885,-0.053865,...,0,0,0,0,0,0,0,0,0,country
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3931,0.351787,-0.049385,-0.059190,-0.005184,-0.018368,-0.008119,-0.010358,-0.026639,-0.025164,-0.095865,...,0,0,0,0,0,0,0,0,0,rock
3932,0.374776,-0.009993,-0.004814,0.011731,-0.002231,0.084878,-0.014148,0.085929,-0.051916,-0.006933,...,0,0,0,0,0,0,0,0,0,pop
3933,0.297178,-0.025890,-0.002132,-0.013385,-0.009894,0.033824,-0.029264,-0.072205,-0.012764,0.017885,...,0,0,0,0,0,0,0,0,0,country
3934,0.165281,0.018196,-0.031456,0.014466,0.019947,0.048514,0.001224,0.042415,-0.002664,-0.158949,...,0,0,0,0,0,0,0,0,0,hiphop


In [102]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

# Combine lyrics into one list to be input for tf-idf vectorizer WITH UNIGRAMS UP TO TRIGRAMS
full_lyrics = [lyrics.lower() for lyrics in full_df['lyrics']]
log(f'Training TF-IDF Vectorizer on all {len(full_lyrics)} lyrics')
tfidf_ngram_vectorizer = TfidfVectorizer(min_df=2, ngram_range=(1,3)).fit(full_lyrics)
tfidf_ngram_features = tfidf_ngram_vectorizer.get_feature_names()
tfidf_ngram_data = tfidf_ngram_vectorizer.transform(full_lyrics)
log('Fitting SVD on all lyrics')
svd_ngram = TruncatedSVD(n_components=500).fit(tfidf_ngram_data)
log('Done\n')

14:40:25 - Training TF-IDF Vectorizer on all 4921 lyrics
14:40:47 - Fitting SVD on all lyrics
14:43:19 - Done



In [103]:
# Create TFIDF up to trigram encoding for nonredundant data
def encode_tfidf_ngram_svd(df):
    def section_header_bigram(lyrics):
        bigram_counts = lyrics_to_bigrams(lyrics)
        return [count for count in bigram_counts.values()]
    
    tfidf_ngram_vec = tfidf_ngram_vectorizer.transform(df['lyrics'].values)
    svd_ngram_vec = svd_ngram.transform(tfidf_ngram_vec)
    tfidf_ngram_df = pd.DataFrame(svd_ngram_vec)
    header_bigram_vec = [section_header_bigram(lyrics) for lyrics in df['lyrics'].values]
    header_bigram_df = pd.DataFrame(header_bigram_vec, columns=header_bigrams)
    encoding = pd.concat([tfidf_ngram_df, header_bigram_df], axis=1)
    return encoding

In [106]:
# pickle the encodings in .zip files to save space
log('Encoding training data')
tfidf_ngram_train = encode_tfidf_ngram_svd(train_df)
log('Done\n')

log('Encoding testing data')
tfidf_ngram_test = encode_tfidf_ngram_svd(test_df)
log('Done\n')

15:35:59 - Encoding training data
15:36:05 - Done

15:36:05 - Encoding testing data
15:36:08 - Done



In [107]:
log('Loading training and testing data into dataframes')
tfidf_ngram_train['y'] = genres_train
tfidf_ngram_test['y'] = genres_test
log('Done\n')

log('Saving onehot encodings as pickles')
tfidf_ngram_train.to_pickle('train_test_encoded/tfidf_ngram_train.zip')
tfidf_ngram_test.to_pickle('train_test_encoded/tfidf_ngram_test.zip')
log('Done\n')

15:36:09 - Loading training and testing data into dataframes
15:36:09 - Done

15:36:09 - Saving onehot encodings as pickles
15:36:10 - Done



In [108]:
pd.read_pickle('train_test_encoded/tfidf_ngram_train.zip')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,Outro|Interlude,Outro|Skit,Outro|Collision,Outro|Instrumental,Outro|Solo,Outro|Ad-lib,Outro|Segue,Outro|Outro,Outro|END,y
0,0.184129,-0.018197,-0.005099,-0.005399,0.037754,0.045131,-0.001461,0.028579,0.005085,0.032772,...,0,0,0,0,0,0,0,0,0,pop
1,0.193870,0.018573,-0.009362,-0.005969,0.090915,-0.045035,-0.025991,0.047837,-0.055979,-0.081396,...,0,0,0,0,0,0,0,0,0,hiphop
2,0.127733,0.044721,-0.006019,-0.005293,-0.005966,0.055150,-0.003077,-0.002170,0.000687,0.001120,...,0,0,0,0,0,0,0,0,0,pop
3,0.164444,-0.035732,-0.010279,-0.003538,-0.014275,-0.030779,0.004200,-0.004996,0.007137,0.018081,...,0,0,0,0,0,0,0,0,0,hiphop
4,0.130536,-0.029946,-0.010128,-0.002602,-0.015442,-0.067553,-0.005651,0.025295,-0.022155,-0.022960,...,0,0,0,0,0,0,0,0,0,country
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3931,0.128744,-0.031099,-0.007913,-0.005776,-0.012949,-0.006316,-0.004541,-0.013524,-0.001318,-0.006042,...,0,0,0,0,0,0,0,0,0,rock
3932,0.228627,-0.043051,-0.012646,-0.009522,-0.015776,0.000420,0.006476,0.049308,-0.019672,-0.010922,...,0,0,0,0,0,0,0,0,0,pop
3933,0.134692,-0.028000,-0.010212,-0.005202,-0.007446,0.012089,-0.006490,-0.008085,-0.006383,-0.025853,...,0,0,0,0,0,0,0,0,0,country
3934,0.096240,-0.005852,-0.004261,-0.000245,-0.000341,-0.021529,-0.012501,0.249173,-0.031426,0.296219,...,0,0,0,0,0,0,0,0,0,hiphop
