In [23]:
import pandas as pd
import math
import random
"""
Data acquisition of data for sonification, plus some preprocessing
"""
# Read the data in
df = pd.read_csv('data/backward_all.csv')

In [24]:
# Convert to negative weeks to death, so numbers are in chronological order
df = df.sort_values(by='weeks_to_death', ascending=False)

In [25]:
#df.head()

In [26]:

# Select averages for FEMALES in FMC_EUROPE
# Get subset of the dataframe to sonify - already sorted by weeks_to_death desc
test_var = df.loc[(df['database'] == 'FMC_EUROPE') & (df['MALE'] == 0), ['idwg_percent_avg']]
test_var
#test_var = df.loc[(df['database'] == 'FMC_EUROPE') & (df['MALE'] == 0), ['albumin_avg', 'idwg_percent_avg', 'pre_sbp_avg', 'crp_avg']]

#test_var = df.loc[(df['database'] == 'FMC_EUROPE') & (df['MALE'] == 0), ['albumin_avg', 'idwg_percent_avg', 'pre_sbp_avg', 'crp_avg']]

Unnamed: 0,idwg_percent_avg
310,3.2663
309,3.2370
308,3.2242
307,3.2086
306,3.2215
305,3.2374
304,3.2560
303,3.1842
302,3.2233
301,3.2112


In [27]:
#take 5 week rolling average, then normalize the values
test_var = test_var.rolling(5).mean()[5:]
test_var = (test_var - test_var.min())/(test_var.max() - test_var.min())

In [28]:
"""
Global Variables
"""
# Music Config
BPM = 120 # Beats per minute, e.g. 60, 75, 100, 120, 150
DIVISIONS_PER_BEAT = 4 # e.g. 4 = quarter notes, 8 = eighth notes
VARIANCE_MS = 20 # +/- milliseconds an instrument note should be off by to give it a little more "natural" feel
VARIANCE_RATE = 0 # for adding variance to the playback rate
BEAT_MS = round(60.0 / BPM * 1000) # ms in a beat - 500
# File config
INSTRUMENTS_INPUT_FILE = 'son_instruments.csv'
INSTRUMENTS_OUTPUT_FILE = 'data/chk_instruments.csv'
SEQUENCE_OUTPUT_FILE = 'data/chk_sequence.csv'
INSTRUMENTS_DIR = 'instruments/'

In [29]:
"""
Data acquisition of instruments
"""
# Read in instruments data, do some preprocessing
instruments = pd.read_table(INSTRUMENTS_INPUT_FILE)
instruments['Type'] = instruments['Type'].str.lower().str.replace(' ','_')
instruments['File'] = INSTRUMENTS_DIR + instruments['File']

In [30]:
"""
Writing to the sequence file that will create the musical sequence (the song)
@param  gain        The gain (volume) of the instrument - this will be equivilent to the value of the variable that week
@param  instrument  The current instrument being added
@param  ms          The current location in the sequence (song)
@param  duration    The number of ms that makes up a data point (week) in the sequence (song)
"""
"""
Explanation of Tempo Offset
    Tempo Offset of 0 will play the instrument on the fitst beat
    Tempo Offset of 0.5 will play the instrument on the offbeat
    Tempo Offset of 1 will play the instrument on the second beat
"""
def addBeatsToSequence(gain, instrument, ms, duration):
    # Will this instrument be offset? Add the amount of ms the instrument will be offset
    ms += int(instrument['Tempo Offset'] * BEAT_MS)
    # Generate a pseudo-random number to create a pseudo-random variance
    h = random.uniform(0, 1)
    variance = int(h * VARIANCE_MS * 2 - VARIANCE_MS)
    rate_variance = float(h * VARIANCE_RATE * 2 - VARIANCE_RATE) # This is currently always 0
    return({
        'instrument_index': instrument['index'],
        'instrument': instrument,
        'position': 0,
        'gain': round(gain, 2),
        'rate': 1 + rate_variance, # CRK try as 1.0 as well; do we need variance_rate
        'elapsed_ms': max([ms + variance, 0])
    })

In [31]:
"""
This is where the sequence is built
    Make a set duration for how long we will spend on each data point (in this case, each data point represents a week)
    Loop through each instrument in the given instrument csv
    For each instrument, loop through the variable values
        An instrument is played for the variable if the value of the variable is greater than the allowed variable for the instrument
            If an instrument is played, add that beat to the sequence file
        Incrument where we are in the song (ms defines where we are in the song)
"""
# Amount of time, in MS, to spend on each data point
week_duration = 500
sequence = []

# Loop through each instrument
for index, instrument in enumerate(instruments.iterrows()):
    # ms represents the current place in the sequence (song)
    ms = 0
    # Skip the first line of the instrument file (headers)
    instrument = instrument[1]
    instrument['index'] = index
    min_var = instrument['Min Var']
    #Loop through each week's value
    """"
    for week_val in test_var:
        # clarients represent idwg_percent_avg
        play_clarinet = (week_val['idwg_percent_avg']*100) > min_var
        if play_clarinet:
            sequence.append(addBeatsToSequence(week_val, instrument, ms, week_duration))
        ms += week_duration
    """
    for index, 

TypeError: '>' not supported between instances of 'str' and 'int'

In [None]:
# Sort sequence so beats happen in chronological order
sequence = sorted(sequence, key=lambda k: k['elapsed_ms'])

# Add milliseconds to sequence
elapsed = 0
for index, step in enumerate(sequence):
    sequence[index]['milliseconds'] = step['elapsed_ms'] - elapsed
    elapsed = step['elapsed_ms']


In [None]:
"""
Write to the files that will be used by ChucK
"""
import csv

# Write instruments to file
with open(INSTRUMENTS_OUTPUT_FILE, 'w') as f:
    w = csv.writer(f)
    for index, instrument in enumerate(instruments.iterrows()):
        w.writerow([index])
        w.writerow([instrument[1]['File']])
    print('Successfully wrote instruments to file: '+ INSTRUMENTS_OUTPUT_FILE)

# Write sequence to file
with open(SEQUENCE_OUTPUT_FILE, 'w') as f:
    w = csv.writer(f)
    for step in sequence:
        w.writerow([step['instrument_index']])
        w.writerow([step['position']])
        w.writerow([step['gain']])
        w.writerow([step['rate']])
        w.writerow([step['milliseconds']])
    print('Successfully wrote sequence to file: '+ SEQUENCE_OUTPUT_FILE)


In [None]:
import matplotlib.pyplot as plt

test_var.plot()
plt.show()

In [None]:
#df.head()

In [None]:
#instruments

In [None]:
#sequence