In [1]:
import pandas as pd
import numpy as np
from mido import Message, MidiFile, MidiTrack, MetaMessage

In [2]:
df = pd.read_excel('./data/COVID-19-geographic-disbtribution-worldwide-2020-06-09.xlsx', 
                  sheet_name = 'COVID-19-geographic-disbtributi')

In [3]:
df.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2018,continentExp
0,2020-06-09,9,6,2020,575,12,Afghanistan,AF,AFG,37172386.0,Asia
1,2020-06-08,8,6,2020,791,30,Afghanistan,AF,AFG,37172386.0,Asia
2,2020-06-07,7,6,2020,582,18,Afghanistan,AF,AFG,37172386.0,Asia
3,2020-06-06,6,6,2020,915,9,Afghanistan,AF,AFG,37172386.0,Asia
4,2020-06-05,5,6,2020,787,6,Afghanistan,AF,AFG,37172386.0,Asia


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22383 entries, 0 to 22382
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   dateRep                  22383 non-null  datetime64[ns]
 1   day                      22383 non-null  int64         
 2   month                    22383 non-null  int64         
 3   year                     22383 non-null  int64         
 4   cases                    22383 non-null  int64         
 5   deaths                   22383 non-null  int64         
 6   countriesAndTerritories  22383 non-null  object        
 7   geoId                    22296 non-null  object        
 8   countryterritoryCode     22063 non-null  object        
 9   popData2018              22047 non-null  float64       
 10  continentExp             22383 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(5), object(4)
memory usage: 1.9+ MB


In [4]:
centroids_df = pd.read_csv('./data/country_centroids_az8.csv')

In [6]:
for code in df['countryterritoryCode'].unique(): 
    if not code in centroids_df['adm0_a3'].values:
        print(code)

nan
GIB
XKX
PSE
SSD


In [7]:
df = pd.merge(df, 
         centroids_df[['adm0_a3', 'Longitude', 'Latitude']], 
         left_on='countryterritoryCode', 
         right_on = 'adm0_a3')

In [8]:
df.sort_values(by = ['countryterritoryCode', 'dateRep'], inplace=True)

In [9]:
df['cumulative_deaths'] = df.groupby(['countryterritoryCode'])['deaths'].cumsum()
df['cumulative_cases'] = df.groupby(['countryterritoryCode'])['cases'].cumsum()

In [10]:
df.loc[df['countryterritoryCode'] == 'GBR']

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2018,continentExp,adm0_a3,Longitude,Latitude,cumulative_deaths,cumulative_cases
20769,2019-12-31,31,12,2019,0,0,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,0,0
20768,2020-01-01,1,1,2020,0,0,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,0,0
20767,2020-01-02,2,1,2020,0,0,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,0,0
20766,2020-01-03,3,1,2020,0,0,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,0,0
20765,2020-01-04,4,1,2020,0,0,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20612,2020-06-05,5,6,2020,1805,176,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,39904,281661
20611,2020-06-06,6,6,2020,1650,357,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,40261,283311
20610,2020-06-07,7,6,2020,1557,204,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,40465,284868
20609,2020-06-08,8,6,2020,1326,77,United_Kingdom,UK,GBR,66488991.0,Europe,GBR,-2.865632,54.123872,40542,286194


In [12]:
top_20_worst_hit_countries = df.groupby(
    'countryterritoryCode'
)['deaths'].sum().sort_values(ascending = False)[:20].index

In [13]:
top_50_worst_hit_countries = df.groupby(
    'countryterritoryCode'
)['deaths'].sum().sort_values(ascending = False)[:50].index

In [14]:
top_30_worst_hit_countries = df.groupby(
    'countryterritoryCode'
)['deaths'].sum().sort_values(ascending = False)[:30].index

In [15]:
diff = df['dateRep'].max() - df['dateRep'].min()

In [16]:
diff.days

161

In [17]:
df.loc[df['countryterritoryCode'].isin(top_30_worst_hit_countries), 'continentExp'].unique()

array(['Europe', 'America', 'Asia', 'Africa'], dtype=object)

## Version one below: chord progression from maj7 to diminished, daily pulse, velocity changes with cases


In [319]:
max_cases_value = df.loc[df['countryterritoryCode'].isin(top_30_worst_hit_countries), 'cumulative_cases'].max()
# max_death_value = df.loc[df['countryterritoryCode'].isin(top_30_worst_hit_countries), 'cumulative_deaths'].max()
# threshold_width = max_death_value / 5

# Different keys, not so harmonious:
# root_notes = {
#     'Asia': 55, 
#     'Europe': 60, 
#     'America': 65, 
#     'Africa': 70
# }

root_notes = {
    'Asia': 55, 
    'Europe': 55, 
    'America': 55, 
    'Africa': 55
}

instrumentation = {
    'Asia': 68, # oboe 
    'Europe': 74, # flute
    'America': 70, # bassoon
    'Africa': 12 # marimba
}

channel_numbers = {continent: channel_no for channel_no, continent in enumerate(instrumentation.keys())}

duration = 4

class Pulse(): 
    def __init__(self, chord, duration, velocity, root = 55): 
        self.chord = chord
        self.root = root
        self.notes = self.define_notes()
        self.duration = duration
        self.velocity = int(velocity)
        
    def define_notes(self): 
        if self.chord == 'maj7': 
            return [self.root, self.root + 4, self.root + 7, self.root + 11]
        elif self.chord == 'min/maj7': 
            return [self.root, self.root + 3, self.root + 7, self.root + 11]
        elif self.chord == 'dim/maj7': 
            return [self.root, self.root + 3, self.root + 6, self.root + 11]
        elif self.chord == 'ø7': 
            return [self.root, self.root + 3, self.root + 6, self.root + 10]
        elif self.chord == 'dim': 
            return [self.root, self.root + 3, self.root + 6, self.root + 9]
        
    def add_to_track(self, track): 
        for note in self.notes: 
            message = Message('note_on', note = note, velocity = self.velocity, time = 0)
            track.append(message)
        for idx, note in enumerate(self.notes): 
            if idx == 0: 
                time = 120 * self.duration
            else: 
                time = 0
            message = Message('note_on', note = note, velocity = 0, time = time)
            track.append(message)

mid = MidiFile()
            
for country_code in top_30_worst_hit_countries: 
    country_df = df.loc[df['countryterritoryCode'] == country_code]
    lon = country_df['Longitude'].values[0]
    continent = country_df['continentExp'].values[0]
    channel_no = channel_numbers[continent]
    cases_data = country_df['cumulative_cases']
    deaths_data = country_df['cumulative_deaths']
    if len(deaths_data) < diff.days: 
        missing_days = diff.days - len(deaths_data)
        cases_data = np.pad(cases_data, [missing_days, 0], mode = 'constant')
        deaths_data = np.pad(deaths_data, [missing_days, 0], mode = 'constant')
#     X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
#     X_scaled = X_std * (max - min) + min
    lon_std = (lon - -150) / (150 - -150)
    pan = int(lon_std * 127)
    
    track = MidiTrack()
    mid.tracks.append(track)
    track.append(MetaMessage('track_name', name = country_code, time = 0))
    instrument = instrumentation[continent]
    track.append(Message('program_change', channel=channel_no, program=instrument, time=0))
    track.append(Message('control_change', channel=channel_no, control=10, value=pan, time=0))
    
#     max_cases_value = cases_data.max()
    max_death_value = deaths_data.max()
    threshold_width = max_death_value / 5
    
    for cases, deaths in zip(cases_data, deaths_data): 
        if cases > 0: 
            velocity = np.log10(cases) / np.log10(max_cases_value) * 127
        else: 
            velocity = 0
        root = root_notes[continent]
        if deaths <= threshold_width: 
            pulse = Pulse(chord = 'maj7', duration = duration, velocity = velocity, root = root)
            pulse.add_to_track(track)
        elif deaths <= threshold_width * 2: 
            pulse = Pulse(chord = 'min/maj7', duration = duration, velocity = velocity, root = root)
            pulse.add_to_track(track)
        elif deaths <= threshold_width * 3: 
            pulse = Pulse(chord = 'dim/maj7', duration = duration, velocity = velocity, root = root)
            pulse.add_to_track(track)
        elif deaths <= threshold_width * 4: 
            pulse = Pulse(chord = 'ø7', duration = duration, velocity = velocity, root = root)
            pulse.add_to_track(track)
        elif deaths <= threshold_width * 5: 
            pulse = Pulse(chord = 'dim', duration = duration, velocity = velocity, root = root)
            pulse.add_to_track(track)

    mid.save('./output/test_020.mid')

## Version two: chord stays on maj7 but descends octaves, weekly pulse, velocity stays same but volume changes with cases


In [55]:
max_cases_value = df.loc[df['countryterritoryCode'].isin(top_30_worst_hit_countries), 'cumulative_cases'].max()
# max_death_value = df.loc[df['countryterritoryCode'].isin(top_30_worst_hit_countries), 'cumulative_deaths'].max()
# threshold_width = max_death_value / 5

instrumentation = {
    'Asia': 68, # oboe 
    'Europe': 74, # flute
    'America': 70, # bassoon
    'Africa': 12 # marimba
}

channel_numbers = {continent: channel_no for channel_no, continent in enumerate(instrumentation.keys())}

duration = 12

class Pulse(): 
    def __init__(self, chord, duration, velocity, root = 55, volume_changes = []): 
        self.chord = chord
        self.root = root
        self.notes = self.define_notes()
        self.duration = duration
        self.velocity = int(velocity)
        self.volume_changes = volume_changes
        
    def define_notes(self): 
        if self.chord == 'maj7': 
            return [self.root, self.root + 4, self.root + 7, self.root + 11]
        elif self.chord == 'min/maj7': 
            return [self.root, self.root + 3, self.root + 7, self.root + 11]
        elif self.chord == 'dim/maj7': 
            return [self.root, self.root + 3, self.root + 6, self.root + 11]
        elif self.chord == 'ø7': 
            return [self.root, self.root + 3, self.root + 6, self.root + 10]
        elif self.chord == 'dim': 
            return [self.root, self.root + 3, self.root + 6, self.root + 9]
        
    def add_to_track(self, track, channel): 
        for note in self.notes: 
            message = Message('note_on', channel = channel, note = note, velocity = self.velocity, time = 0)
            track.append(message)
        elapsed_time = 0
        for volume in self.volume_changes:
            delay = int(120 * self.duration / 7)
            elapsed_time += delay
            track.append(Message('control_change', channel=channel_no, control=7, value=volume, time=delay))
        for idx, note in enumerate(self.notes): 
            if idx == 0: 
                time = 120 * self.duration - elapsed_time
            else: 
                time = 0
            message = Message('note_on', channel = channel, note = note, velocity = 0, time = time)
            track.append(message)

mid = MidiFile()
            
for country_code in top_30_worst_hit_countries: 
    country_df = df.loc[df['countryterritoryCode'] == country_code]
    lon = country_df['Longitude'].values[0]
    continent = country_df['continentExp'].values[0]
    channel_no = channel_numbers[continent]
    cases_data = country_df['cumulative_cases']
    deaths_data = country_df['cumulative_deaths']
    if len(deaths_data) < diff.days: 
        missing_days = diff.days - len(deaths_data)
        cases_data = pd.Series(np.pad(cases_data, [missing_days, 0], mode = 'constant'))
        deaths_data = np.pad(deaths_data, [missing_days, 0], mode = 'constant')
#     X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
#     X_scaled = X_std * (max - min) + min
    lon_std = (lon - -150) / (150 - -150)
    pan = int(lon_std * 127)
    
    track = MidiTrack()
    mid.tracks.append(track)
    track.append(MetaMessage('track_name', name = country_code, time = 0))
    instrument = instrumentation[continent]
    track.append(Message('program_change', channel=channel_no, program=instrument, time=0))
    track.append(Message('control_change', channel=channel_no, control=10, value=pan, time=0))
    track.append(Message('control_change', channel=channel_no, control=7, value=0, time=0))
    
#     max_cases_value = cases_data.max()
    max_death_value = deaths_data.max()
    threshold_width = max_death_value / 5
    
    day_number = 0
    velocity = 100
    
    def convert_cases_to_volume(cases): 
        if cases > 0: 
            return int(np.log10(cases) / np.log10(max_cases_value) * 127)
        return 0
    for cases, deaths in zip(cases_data, deaths_data): 
        volume_changes = list(cases_data[day_number:day_number+7].map(convert_cases_to_volume))
#         print(volume_changes)
#         track.append(Message('control_change', channel=channel_no, control=7, value=volume, time=(day_number - 1)%7))
        root = 44
        interval = 7
        if day_number % 7 == 0: 
            if deaths <= threshold_width: 
                root = root + interval
                pulse = Pulse(chord = 'maj7', duration = duration, velocity = velocity, root = root, volume_changes = volume_changes)
                pulse.add_to_track(track, channel=channel_no)
            elif deaths <= threshold_width * 2: 
                root = root + interval * 2
                pulse = Pulse(chord = 'maj7', duration = duration, velocity = velocity, root = root, volume_changes = volume_changes)
                pulse.add_to_track(track, channel=channel_no)
            elif deaths <= threshold_width * 3: 
                root = root + interval * 3
                pulse = Pulse(chord = 'maj7', duration = duration, velocity = velocity, root = root, volume_changes = volume_changes)
                pulse.add_to_track(track, channel=channel_no)
            elif deaths <= threshold_width * 4: 
                root = root + interval * 4
                pulse = Pulse(chord = 'maj7', duration = duration, velocity = velocity, root = root, volume_changes = volume_changes)
                pulse.add_to_track(track, channel=channel_no)
            elif deaths <= threshold_width * 5:  
                root = root + interval * 5
                pulse = Pulse(chord = 'maj7', duration = duration, velocity = velocity, root = root, volume_changes = volume_changes)
                pulse.add_to_track(track, channel=channel_no)
        day_number += 1

    mid.save('./output/v2_test_004.mid')

In [284]:
for country_code in top_30_worst_hit_countries: 
    country_df = df.loc[df['countryterritoryCode'] == country_code]
    cases_data = country_df['cumulative_cases']
    deaths_data = country_df['cumulative_deaths']
    print(country_code, cases_data.max())

USA 1961185
GBR 287399
BRA 691758
ITA 235278
FRA 154188
ESP 241717
MEX 120102
BEL 59348
DEU 184543
IRN 173832
CAN 96233
IND 266598
NLD 47739
RUS 476658
PER 199696
TUR 171121
SWE 45133
CHN 84194
ECU 43378
CHL 138846
PAK 108317
IDN 32033
IRL 25207
CHE 30889
PRT 34885
ROU 20604
COL 40719
EGY 35444
POL 27160
ZAF 50879


Useful for debugging:

In [45]:
for msg in mid.tracks[0]:
    print(msg)

<meta message track_name name='USA' time=0>
program_change channel=2 program=70 time=0
control_change channel=2 control=10 value=15 time=0
note_on channel=2 note=51 velocity=100 time=0
note_on channel=2 note=55 velocity=100 time=0
note_on channel=2 note=58 velocity=100 time=0
note_on channel=2 note=62 velocity=100 time=0
control_change channel=2 control=7 value=0 time=4
control_change channel=2 control=7 value=0 time=8
control_change channel=2 control=7 value=0 time=12
control_change channel=2 control=7 value=0 time=16
control_change channel=2 control=7 value=0 time=20
control_change channel=2 control=7 value=0 time=24
control_change channel=2 control=7 value=0 time=28
note_on channel=2 note=51 velocity=0 time=480
note_on channel=2 note=55 velocity=0 time=0
note_on channel=2 note=58 velocity=0 time=0
note_on channel=2 note=62 velocity=0 time=0
note_on channel=2 note=51 velocity=100 time=0
note_on channel=2 note=55 velocity=100 time=0
note_on channel=2 note=58 velocity=100 time=0
note_o