In [22]:
import pandas as pd
import numpy as np
from mido import Message, MidiFile, MidiTrack, MetaMessage
from collections import namedtuple

## Data processing: 

In [21]:
df = pd.read_csv('https://opendata.ecdc.europa.eu/covid19/casedistribution/csv')

df['dateRep'] = pd.to_datetime(df['dateRep'], format = '%d/%m/%Y')

continent_names_df = pd.read_csv(
    '~/Dropbox (BBC)/Visual Journalism/Data/2020/vjdata.26866.global.covid.interactive/\
derived/bbc_world_v2_4326_centroid_region.csv'
)

continent_df = pd.merge(df, 
         continent_names_df[['iso3', 'bbc-region-code']], 
         left_on = 'countryterritoryCode', 
         right_on = 'iso3').groupby(['dateRep', 'bbc-region-code'])[['cases', 'deaths']].sum().reset_index()

def rename_continent(bbc_region): 
    switcher = {
        'region_as': 'Asia',
        'region_af': 'Africa',
        'region_am_na': 'North America',
        'region_am_lac': 'Latin America & Caribbean',
        'region_me': 'Middle East',
        'region_eu': 'Europe',
        'region_oc': 'Oceania'
    }
    return switcher[bbc_region]

continent_df['continent_name'] = continent_df['bbc-region-code'].map(rename_continent)
continent_df = continent_df[['dateRep', 'continent_name', 'cases', 'deaths']]
continent_df.rename(columns = {'dateRep': 'date'}, inplace = True)

continent_df = continent_df.sort_values(by = ['continent_name', 'date']).reset_index(drop=True)
continent_df['cases_rolling_average'] = continent_df.groupby(
    'continent_name'
).rolling(7)['cases'].mean().reset_index(drop=True)
continent_df['deaths_rolling_average'] = continent_df.groupby(
    'continent_name'
).rolling(7)['deaths'].mean().reset_index(drop=True)
continent_df['cumulative_deaths'] = continent_df.groupby(['continent_name'])['deaths'].cumsum()

weekly_continent_df = continent_df.groupby(
    ['continent_name', continent_df['date'].dt.strftime('%W')]
)[['cases', 'deaths', 'cases_rolling_average']].agg({'cases': 'sum', 
                                                    'deaths': 'sum', 
                                                    'cases_rolling_average': 'mean'}).reset_index()
weekly_continent_df = weekly_continent_df.loc[weekly_continent_df['date'] != '52']

weekly_continent_df = weekly_continent_df.sort_values(by = ['continent_name', 'date']).reset_index(drop=True)
weekly_continent_df['cumulative_deaths'] = weekly_continent_df.groupby(['continent_name'])['deaths'].cumsum()

Produces two dataframes: continent_df, which is daily data aggregated by continent name, and weekly_continent_df, which is the same data aggregated by week and by continent name. Current plan is to use the latter. 

In [29]:
weekly_continent_df

Unnamed: 0,continent_name,date,cases,deaths,cases_rolling_average,cumulative_deaths
0,Africa,00,0,0,,0
1,Africa,01,0,0,0.000000,0
2,Africa,02,0,0,0.000000,0
3,Africa,03,0,0,0.000000,0
4,Africa,04,0,0,0.000000,0
...,...,...,...,...,...,...
261,Oceania,33,2089,109,341.000000,521
262,Oceania,34,1810,120,269.714286,641
263,Oceania,35,1286,154,210.714286,795
264,Oceania,36,919,69,152.163265,864


In [75]:
# The maximum cases value will be used to calibrate the volume of each track:
max_cases_value = weekly_continent_df['cases_rolling_average'].max()

# 41 48 55 62 55 48
# Reference for instrument values, general midi: https://en.wikipedia.org/wiki/General_MIDI
Continent = namedtuple('Continent', ['name', 'shimmer', 'root', 'pan', 'instrument', 'channel'])
asia =          Continent('Asia',                      'out',  41, 120, 42, 0) # cello
middle_east =   Continent('Middle East',               'out', 48, 100, 73, 1) # flute
africa =        Continent('Africa',                    'out',  55,  80, 41, 2) # viola
europe =        Continent('Europe',                    'out', 62,  63, 71, 3) # clarinet
latin_america = Continent('Latin America & Caribbean', 'out',  55,  40, 40, 4) # violin
north_america = Continent('North America',             'out', 48,  10, 40, 5) # violin

continents = [asia, middle_east, africa, europe, latin_america, north_america]

class Pulse(): 
    def __init__(self, chord, volume, root, offset, shimmer): 
        self.chord = chord
        self.volume = volume
        self.root = root
        self.offset = offset
        self.shimmer = shimmer
        self.notes = self.define_notes()
        self.beat_duration = 480
        
    def define_notes(self): 
        if self.chord == 'maj7': 
            return [self.root, self.root + 4, self.root + 7, self.root + 11]
        elif self.chord == 'min/maj7': 
            return [self.root, self.root + 3, self.root + 7, self.root + 11]
        elif self.chord == 'dim/maj7': 
            return [self.root, self.root + 3, self.root + 6, self.root + 11]
        elif self.chord == 'ø7': 
            return [self.root, self.root + 3, self.root + 6, self.root + 10]
        elif self.chord == 'dim': 
            return [self.root, self.root + 3, self.root + 6, self.root + 9]
        
    def add_to_track(self, track): 
        if self.shimmer == 'out':
        
            message = Message('note_on', note = self.notes[1], time = self.offset * self.beat_duration)
            track.append(message)
            track.append(Message('control_change', control=7, value=self.volume, time=0))
            
            message = Message('note_off', note = self.notes[1], time = int(self.beat_duration / 2))
            track.append(message)

            message = Message('note_on', note = self.notes[3], time = 0)
            track.append(message)

            message = Message('note_off', note = self.notes[3], time = int(self.beat_duration / 2))
            track.append(message)
        
        else: 
        
            message = Message('note_on', note = self.notes[3], time = self.offset * self.beat_duration)
            track.append(message)
            track.append(Message('control_change', control=7, value=self.volume, time=0))

            message = Message('note_off', note = self.notes[3], time = int(self.beat_duration / 2))
            track.append(message)

            message = Message('note_on', note = self.notes[1], time = 0)
            track.append(message)

            message = Message('note_off', note = self.notes[1], time = int(self.beat_duration / 2))
            track.append(message)
        
        message = Message('note_off', note = 55, time = (5 - self.offset) * self.beat_duration)
        track.append(message)
        
mid = MidiFile()
for continent in continents:
    print(continent)
    filtered_df = weekly_continent_df.loc[weekly_continent_df['continent_name'] == continent.name]
    cases_data = filtered_df['cases_rolling_average'].apply(lambda x: 0 if np.isnan(x) else int(x))
#     deaths_data = filtered_df['cumulative_deaths'].apply(lambda x: 0 if np.isnan(x) else int(x))
    deaths_data = weekly_continent_df.groupby('date')['cumulative_deaths'].sum().values
    track = MidiTrack()
    mid.tracks.append(track)
    # Set up track:
    track.append(MetaMessage('track_name', name=continent.name, time=0))
    track.append(MetaMessage('time_signature', numerator=6, denominator=8, time=0)) # does not work
    # Instrumentation:
    track.append(Message('program_change', channel=continent.channel, program=continent.instrument, time=0))
    # Pan:
    track.append(Message('control_change', channel=continent.channel, control=10, value=continent.pan, time=0))
    
    max_death_value = deaths_data.max()
    threshold_width = max_death_value / 5
    
    for cases, deaths in zip(cases_data, deaths_data): 
        if cases > 0: 
            volume = int(cases / max_cases_value * 100 + 26)
        else: 
            volume = 0
        if deaths <= threshold_width: 
            pulse = Pulse(chord='maj7', 
                          volume=volume, 
                          root=continent.root, 
                          offset=continent.channel, 
                          shimmer=continent.shimmer)
            pulse.add_to_track(track)
        elif deaths <= threshold_width * 2: 
            pulse = Pulse(chord = 'min/maj7', 
                          volume=volume, 
                          root=continent.root, 
                          offset=continent.channel, 
                          shimmer=continent.shimmer)
            pulse.add_to_track(track)
        elif deaths <= threshold_width * 3: 
            pulse = Pulse(chord = 'dim/maj7', 
                          volume=volume, 
                          root=continent.root, 
                          offset=continent.channel, 
                          shimmer=continent.shimmer)
            pulse.add_to_track(track)
        elif deaths <= threshold_width * 4: 
            pulse = Pulse(chord = 'ø7', 
                          volume=volume, 
                          root=continent.root, 
                          offset=continent.channel, 
                          shimmer=continent.shimmer)
            pulse.add_to_track(track)
        elif deaths <= threshold_width * 5: 
            pulse = Pulse(chord = 'dim', 
                          volume=volume, 
                          root=continent.root, 
                          offset=continent.channel, 
                          shimmer=continent.shimmer)
            pulse.add_to_track(track)
    
# 016 is global deaths data, 017 is continent-level deaths data
mid.save('./output/v5_test_019.mid')

Continent(name='Asia', shimmer='out', root=41, pan=120, instrument=42, channel=0)
Continent(name='Middle East', shimmer='out', root=48, pan=100, instrument=73, channel=1)
Continent(name='Africa', shimmer='out', root=55, pan=80, instrument=41, channel=2)
Continent(name='Europe', shimmer='out', root=62, pan=63, instrument=71, channel=3)
Continent(name='Latin America & Caribbean', shimmer='out', root=55, pan=40, instrument=40, channel=4)
Continent(name='North America', shimmer='out', root=48, pan=10, instrument=40, channel=5)
