In [1]:
import requests

##change this only if you want more composers
composer_names = ['Bach','Mozart','Chopin','Beethoven','Schubert','Schumann','Rachmaninoff','Satie','Czerny']

In [4]:
import pandas as pd
import numpy as np
import re

#from a requests Response object, obtain the midi file links on the page
#only works with Mutopia
def get_midis_from_page(page_html):
  mid_links = []
  for l in page_html.iter_lines():
    reu = re.match('.+\"(.+.mid)\".+', str(l))
    if reu:
      mid_links.append(reu.groups(0)[0])
  return mid_links

#from a requests Response object, get the link attached to "Next 10"
def get_next_page(page_html):
  next_page = None
  for l in page_html.iter_lines():
    reu = re.match('.+\"(.+)\">Next 10.+', str(l))
    if reu:
      next_page = reu.groups(0)[0]
  if next_page:
    url_next = 'https://www.mutopiaproject.org/cgibin/' + next_page
    ru = requests.get(url_next)
    return ru
  return next_page

#get composer midis by looping through pages until none exist
#might miss a few
def get_composer_midis(composer):
  url = 'https://www.mutopiaproject.org/cgibin/make-table.cgi?searchingfor=' + composer + '+' + 'piano'
  r = requests.get(url)
  all_midis = []
  all_midis.extend(get_midis_from_page(r))
  u = get_next_page(r)
  while u is not None:
    all_midis.extend(get_midis_from_page(u))
    u = get_next_page(u)
  return all_midis

In [5]:
from tqdm import tqdm

all_composer_midis = {}
for c in tqdm(composer_names, leave = True, position = 0):
  all_composer_midis[c] = get_composer_midis(c)
all_composer_midis

100%|██████████| 9/9 [00:21<00:00,  2.34s/it]


{'Bach': ['https://www.mutopiaproject.org/ftp/BachCPE/cpe-bach-rondo/cpe-bach-rondo.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV117a/BWV-117a/BWV-117a.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV259/bwv-259/bwv-259.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV269/bwv_269/bwv_269.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV347/bwv347/bwv347.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV454/bwv_454/bwv_454.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV462/bwv_462/bwv_462.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV510/BWV-510/BWV-510.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV511/BWV-511/BWV-511.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV512/BWV-512/BWV-512.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV515/anna-magdalena-20a/anna-magdalena-20a.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV516/BWV-516/BWV-516.mid',
  'https://www.mutopiaproject.org/ftp/BachJS/BWV772/bach-invention-01/bac

In [8]:
import os

def save_midi(link, directory):
  if not os.path.exists(directory):
    os.mkdir(directory)
  filename = link.split('/')[-1]
  response = requests.get(link, allow_redirects = True)
  open(directory + '/' + filename, 'wb').write(response.content)

for composer in all_composer_midis:
  for midi in tqdm(all_composer_midis[composer], leave = True, position = 0):
    save_midi(midi, 'Data/' + composer + 'Keyboard')

100%|██████████| 125/125 [00:46<00:00,  2.68it/s]
100%|██████████| 35/35 [00:12<00:00,  2.79it/s]
100%|██████████| 47/47 [00:15<00:00,  2.97it/s]
100%|██████████| 42/42 [00:17<00:00,  2.35it/s]
100%|██████████| 50/50 [00:18<00:00,  2.71it/s]
100%|██████████| 34/34 [00:10<00:00,  3.21it/s]
100%|██████████| 10/10 [00:03<00:00,  2.87it/s]
100%|██████████| 16/16 [00:04<00:00,  3.27it/s]
100%|██████████| 31/31 [00:09<00:00,  3.19it/s]
