# Modeling Strategies for Songwriter Index

In [1]:
import json
import os
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.preprocessing import StandardScaler 

Determining strategies to use in feature selection & engineering when building the songwriter recommender.

Current thought about ideating through the dataset is looking at each "chunk" of information that I have, and seeing specifically how I can engineer each portion

## Incorporating Audio Analysis

The audio analysis endpoint is made up of an assortment of distinct datasets within a single object:

|Key|Value Type|Value Description|
|---|---|---|
|bars|an array of time interval objects|The time intervals of the bars throughout the track. A bar (or measure) is a segment of time defined as a given number of beats. Bar offsets also indicate downbeats, the first beat of the measure|
|beats|an array of time interval objects|The time intervals of beats throughout the track. A beat is the basic time unit of a piece of music; for example, each tick of a metronome. Beats are typically multiples of tatums.|
|sections|an array of section objects|Sections are defined by large variations in rhythm or timbre, e.g. chorus, verse, bridge, guitar solo, etc. Each section contains its own descriptions of tempo, key, mode, time_signature, and loudness.|
segments|an array of segment objects|Audio segments attempts to subdivide a song into many segments, with each segment containing a roughly consitent sound throughout its duration.|
tatums|an array of time interval objects|A tatum represents the lowest regular pulse train that a listener intuitively infers from the timing of perceived musical events (segments). For more information about tatums, see Rhythm (below).|

So essentially, we're looking at **time interval objects**, **section objects**, and **segment objects**.

### Potential Ideas

#### SVD

Performing SVD on segment objects per song, perhaps limiting each to 50 components. 

Grabbing the Mean and Variance of tatum duration per song

Afterwards, performing SVD again on each of the songs tatum and segment objects tied to a single writer (no weighting applied for writing percentage). And then capturing the other dimensions with some other sort of aggregation:

- Genres?
- Key?

With each of the below, perhaps I can just use the mean and variance of all songs tied to a particular writer:

- Tempo (perhaps mean & variance?)
- Mode (perhaps just mean)
- Time Signature?
- Danceability?
- Energy?
- Valence?
- Speechiness?
- Acousticness?

##### Pros
- Seems like it would capture the necessary latent factors that make a song a song.

##### Cons
- Less interpretable, then say, computing the mean and variance of those values

#### Testing SVD

In [2]:
with open('../data/interim/new_tracks/new_tracks_20190103.json', 'r') as f:
    tracks = json.load(f)

In [3]:
len(tracks)

651517

In [4]:
tracks_to_sample = ['60xY7UGCLpsVS2vMjdgCBG',
 '1DJZOOC5AnIOkUWI5vSGR3',
 '3WpkrJzB5CqQQjqMkUR3K6',
 '3ik1CApwKh3NsUxUjWVc08',
 '5gc3A0YStseOxpUIqHBPbo',
 '1ax9G4k500dUkI1cNPMdyM',
 '29SHG7WFpiBEq1gd2S3W7j',
 '6nV4H8Ezf1qXhh7752rV1Y',
 '0Urh9a0OyVLmTIw2IzM0TH',
 '0u1lI5hmTs3hjxV4Zanv5Y']

In [5]:
tracks_to_sample

['60xY7UGCLpsVS2vMjdgCBG',
 '1DJZOOC5AnIOkUWI5vSGR3',
 '3WpkrJzB5CqQQjqMkUR3K6',
 '3ik1CApwKh3NsUxUjWVc08',
 '5gc3A0YStseOxpUIqHBPbo',
 '1ax9G4k500dUkI1cNPMdyM',
 '29SHG7WFpiBEq1gd2S3W7j',
 '6nV4H8Ezf1qXhh7752rV1Y',
 '0Urh9a0OyVLmTIw2IzM0TH',
 '0u1lI5hmTs3hjxV4Zanv5Y']

In [6]:
def retreive_json(lst):
    track_dict = {}
    for i in lst:
        with open(f'../data/interim/example_song_analysis/{i}.json', 'r') as f:
            track = json.load(f)
        track_dict[i] = track
    return track_dict

In [7]:
tracks_dict = retreive_json(tracks_to_sample)

In [8]:
tracks_dict['60xY7UGCLpsVS2vMjdgCBG']

{'meta': {'analyzer_version': '4.0.0',
  'platform': 'Linux',
  'detailed_status': 'OK',
  'status_code': 0,
  'timestamp': 1444471710,
  'analysis_time': 7.71189,
  'input_process': 'libvorbisfile L+R 44100->22050'},
 'track': {'num_samples': 4327680,
  'duration': 196.26667,
  'sample_md5': '',
  'offset_seconds': 0,
  'window_seconds': 0,
  'analysis_sample_rate': 22050,
  'analysis_channels': 1,
  'end_of_fade_in': 0.11701,
  'start_of_fade_out': 189.19039,
  'loudness': -12.827,
  'tempo': 122.691,
  'tempo_confidence': 0.842,
  'time_signature': 4,
  'time_signature_confidence': 0.687,
  'key': 5,
  'key_confidence': 0.81,
  'mode': 1,
  'mode_confidence': 0.765,
  'codestring': 'eJxVmgmO7TYMBK_iI2hf7n-xVLXeJAgQ_Iz0bIni0mxSXqOtM-9qX_lOH23MOfp31zfr3qutVb5VzjdPHXXtXr-zxtfGuqfXcb861_nqGfuWucvXyinfLLO2WvbX5nLRvtZu43xtn_Ht2VYvbXy9jOKbq7Wz59fbW3feuSbD2Yqb1lkvf41SihLNOse932CD75zSVlmTX3dH3jLqXrXcb47dfHiM0tv-5mXbVXdZx1XWYCOXQszdv7WOktXSWe58u3qestud-_b-7X6z0ym9bKRnDdU09in7fpx6fLfutktHawepH

In [31]:
np.hsplit(np.array(tracks_dict['60xY7UGCLpsVS2vMjdgCBG']['segments'][0]['pitches']), 1)

[array([0.787, 1.   , 0.729, 0.699, 0.844, 0.965, 0.989, 0.895, 0.68 ,
        0.665, 0.345, 0.234])]

In [30]:
np.array(tracks_dict['60xY7UGCLpsVS2vMjdgCBG']['segments'][0]['pitches'])

array([0.787, 1.   , 0.729, 0.699, 0.844, 0.965, 0.989, 0.895, 0.68 ,
       0.665, 0.345, 0.234])

##### Extracting Pitch and Timbre Values for each Song 

In [19]:
def pitch_timbre_grabber(dct):
    pitches = np.hsplit(np.array(dct['segments'][0]['pitches']), 12)
    timbre = np.hsplit(np.array(dct['segments'][0]['pitches']), 12)
    for i in range(1, len(dct['segments'])):
        pitches = np.hstack([pitches, np.hsplit(np.array(dct['segments'][i]['pitches']), 12)])
        timbre = np.hstack([timbre, np.hsplit(np.array(dct['segments'][i]['timbre']), 12)])
    return pitches, timbre

In [20]:
p,t = pitch_timbre_grabber(tracks_dict['0Urh9a0OyVLmTIw2IzM0TH'])

In [23]:
pca = PCA(10, random_state=333)

Pitch vector is transposed below, because the number of components must be less than the number of features (columns).

In [34]:
t.shape

(12, 723)

In [36]:
t2.shape

(8676,)

In [25]:
test_x = pca.fit_transform(t)

In [26]:
test_x.shape

(12, 10)

In [27]:
test_x

array([[ 5.63972626e+02, -2.51663799e+02, -2.68035628e+02,
        -2.84854262e+02, -9.80521118e+00,  2.34052109e+02,
        -1.16665088e+02,  2.24152930e+02,  1.15923448e+02,
        -9.93800743e+01],
       [ 7.93419731e+02,  7.67708283e+02,  9.27149942e+02,
        -6.37970561e+01, -2.18858562e+01,  2.74609425e+01,
        -3.90575203e+01,  3.99752553e+01, -6.28084771e+01,
         1.39700950e+01],
       [ 1.79343382e+03, -4.21251751e+02, -1.69859815e+02,
         1.66620985e+02,  5.87507930e+01, -9.10647343e+01,
         1.00503502e+02, -9.66143348e+01,  3.93062367e+01,
         8.28923786e+00],
       [-1.03635454e+02,  1.17408193e+03, -6.92571956e+02,
         9.21634599e+01, -4.64398643e+01, -4.09259230e+01,
         6.58098715e+01, -4.27178623e+01,  6.52303830e+01,
        -1.54332506e+01],
       [ 1.09284083e+02, -2.27479042e+02, -2.73819854e+02,
        -3.61384156e+02,  6.76747277e+01, -1.36276676e+02,
        -6.67366399e+00, -1.70292959e+02, -1.71077036e+02,
         5.

#### Doing What I Did Before

Building a pipeline with how I processed songs before. Here's what I need:

- Record key changes for each song section
- One-Hot Encoded Genres 
  - Then apply SVD
  - This should be easy; There shouldn't be any new artists, so I should be able to apply what I've got on a song level
- Gathering Mean & Variance of Audio Analysis Values