## MuseScore Scraper

Tool: https://github.com/frankye8998/MusicalMusic  

[Reference 2](https://github.com/codeandproduce/music_research_dataset_midi/blob/9a5afe86f1f2d3fdbfa24de99122140e7a7cd60c/MuseScore/MuseScore%20Scraping%20Main.ipynb)

In [1]:
from lxml import html
import urllib
import requests
import shutil

In [2]:
from random import randint
from time import sleep

In [3]:
import json
from pathlib import Path
from tqdm import tqdm

In [4]:
# links = []
# url = "https://musescore.com/hub/piano/solo-piano?sort=view_count"
# # url = "https://musescore.com/hub/piano/piano-duet?sort=view_count"
# page = requests.get(url)
# tree = html.fromstring(page.content)
# print(tree.xpath("//title/text()"))

In [5]:
# articles = tree.xpath("//article[@role='article']")

In [6]:
def get_data(a):
    score = a.find("h2//a[@rel='bookmark']")
    link = score.attrib['href']
    title = score.text_content().strip()
    author = a.find("div[@class='user']//a").text_content()
    metadata = a.find("div[@class='meta']").text_content().split('•\n')
    parts, pages, duration, time, views = [m.strip() for m in metadata]
    return {
        'link': "https://musescore.com" + link,
        'score_id': Path(link).name,
        'title': title,
        'author': author,
        'parts': parts,
        'pages': pages,
        'duration': duration,
        'views': views
    }

## But that's actually just page one and there are pages 2,3,4,5... 100!
### So let's make a more general format:

In [7]:
# url_default = "https://musescore.com/hub/video_games/movie?sort=view_count&page=" # + the page number
url_default = 'https://musescore.com/hub/video_games?instruments=0&sort=view_count&page='
# url_default = "https://musescore.com/hub/piano/solo-piano?sort=view_count&page=" # + the page number
# url_default = "https://musescore.com/hub/piano/voice-piano?sort=view_count&page=" # + the page number
# url_default = "https://musescore.com/hub/piano?sort=view_count&page=" # + the page number

In [8]:
json_file = Path('musescore_video_all.json')

In [9]:
if json_file.exists():
    with open(json_file, 'r') as fp:
        links = json.load(fp)
        

In [10]:
if not json_file.exists():
    links = []
    for page in tqdm(range(1,100), total=100):
        page = requests.get(url_default+str(page)) # https://musescore.com/hub/piano?page=1,2,3,4,5,...,10
        tree = html.fromstring(page.content)

        articles = tree.xpath("//article[@role='article']")

        links.extend([get_data(a) for a in articles])

#         sleep(randint(1,4))
    with open(json_file, 'w') as fp:
        json.dump(links, fp)
print(links[:100])

[{'link': 'https://musescore.com/user/5489651/scores/1379956', 'score_id': '1379956', 'title': 'Undertale - 100 "MEGALOVANIA"', 'author': 'Jester Musician', 'parts': '1 part', 'pages': '6 pages', 'duration': '02:44', 'views': '972,868 views'}, {'link': 'https://musescore.com/user/73972/scores/1352796', 'score_id': '1352796', 'title': 'Undertale - Megalovania (Piano) [Added guitar, fixed tonality]', 'author': 'Manel Návola', 'parts': '2 parts', 'pages': '5 pages', 'duration': '02:30', 'views': '520,439 views'}, {'link': 'https://musescore.com/user/2466621/scores/1463381', 'score_id': '1463381', 'title': "One Summer's Day (Spirited Away)", 'author': 'Torby Brand', 'parts': '1 part', 'pages': '3 pages', 'duration': '03:34', 'views': '388,744 views'}, {'link': 'https://musescore.com/user/44653/scores/1291731', 'score_id': '1291731', 'title': 'Undertale OST - Hopes and Dreams/Save the World', 'author': 'jozy101', 'parts': '1 part', 'pages': '7 pages', 'duration': '04:44', 'views': '339,806 

# Scrape links found

### Musical Music Lib

In [11]:

class MuseScoreException(Exception):
    pass

class InvalidFileExtension(MuseScoreException):
    pass

class InvalidScoreID(MuseScoreException):
    pass

class InvalidCredentials(MuseScoreException):
    pass

class InvalidSearchSort(MuseScoreException):
    pass

In [12]:
import urllib
import urllib.request


import bs4
import requests

class MusicalMusic:
    """Musescore actions requiring an account."""

    def __init__(self, username, password):
        self.username = username
        url = "https://musescore.com/user/login"
        r = requests.get(url)
        soup = bs4.BeautifulSoup(r.text, "html.parser")
        csrf = soup.find("meta", {"name": "csrf-token"})["content"]
        url = "https://musescore.com/user/auth/login/process"
        cookies = {
            "mu_browser_uni": r.cookies['mu_browser_uni'],
            "_csrf": r.cookies["_csrf"]
        }
        data = {
          "username": username,
          "password": password,
          "_csrf": csrf,
          "op": "Log in"
        }
        try:
            mu_user = requests.post(url,
                                    data=data,
                                    cookies=cookies,
                                    allow_redirects=False,
                                    ).cookies["mu_user_new"]
        except KeyError as e:
            raise InvalidCredentials(
                "Please check your username and password!") from e

        mu_browser_uni = r.cookies['mu_browser_uni']

        self.mu_browser_uni = mu_browser_uni
        self.mu_user = mu_user

    def retrieve(self, id, format="pdf"):
        """Retrieves Musescore data in bytes"""

        if format not in ["mp3", "pdf", "mid", "mxl", "mscz"]:
            raise InvalidFileExtension("Must be mp3, pdf, mid, mxl, or mscz.")
        newlink = f"https://musescore.com/score/{id}/download/{format}"
        cookies = {"mu_browser_uni": self.mu_browser_uni,
                   "mu_user_new": self.mu_user}
        bytes = requests.get(newlink, cookies=cookies, verify=False)
        if bytes.status_code != 200:
            raise InvalidScoreID(str(bytes.status_code))
        return bytes.content

    def download(self, id, filename, format="mp3", proxy=None):
        if format not in ["mp3", "pdf", "mid", "mxl", "mscz"]:
            raise InvalidFileExtension("Must be mp3, pdf, mid, mxl, or mscz.")
        newlink = f"https://musescore.com/score/{id}/download/{format}"
        if proxy:
            proxy_support = urllib.request.ProxyHandler({'https' : proxy})
            opener = urllib.request.build_opener(proxy_support)
        else: opener = urllib.request.build_opener()
            
        cookieString = f"mu_browser_uni={self.mu_browser_uni};" \
                        f"mu_user_new={self.mu_user}"
        opener.addheaders = [("cookie", cookieString)]
        urllib.request.install_opener(opener)
        try:
            urllib.request.urlretrieve(newlink, filename)
        except urllib.error.HTTPError as e:
            raise e


### Trying proxy

https://codelike.pro/create-a-crawler-with-rotating-ip-proxy-in-python/

In [13]:
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import random

ua = UserAgent() # From here we generate a random user agent
proxies = [] # Will contain proxies [ip, port]

In [14]:
# Main function
  # Retrieve latest proxies
proxies_req = Request('https://www.sslproxies.org/')
proxies_req.add_header('User-Agent', ua.random)
proxies_doc = urlopen(proxies_req).read().decode('utf8')

soup = BeautifulSoup(proxies_doc, 'html.parser')
proxies_table = soup.find(id='proxylisttable')

# Save proxies in the array
for row in proxies_table.tbody.find_all('tr'):
    proxies.append({
      'ip':   row.find_all('td')[0].string,
      'port': row.find_all('td')[1].string
    })

### More proxies

https://github.com/constverum/ProxyBroker

In [15]:
import asyncio
from proxybroker import Broker

more_proxies = []

async def show(proxy_queue):
    while True:
        proxy = await proxy_queue.get()
        if proxy is None: break
        print('Found proxy: %s' % proxy)
        more_proxies.append({ 'ip': proxy.host, 'port': proxy.port })

proxy_queue = asyncio.Queue()
broker = Broker(proxy_queue)
tasks = await asyncio.gather(
    broker.find(types=['HTTPS'], limit=400),
    show(proxy_queue))

Found proxy: <Proxy FR 0.15s [HTTPS] 54.39.97.250:3128>
Found proxy: <Proxy US 0.22s [HTTPS] 50.253.229.189:45725>
Found proxy: <Proxy JP 0.34s [HTTPS] 160.16.52.185:3128>
Found proxy: <Proxy US 0.35s [HTTPS] 173.249.0.209:3128>
Found proxy: <Proxy DE 0.36s [HTTPS] 94.130.126.94:8008>
Found proxy: <Proxy RU 0.37s [HTTPS] 94.242.58.14:1448>
Found proxy: <Proxy BR 0.40s [HTTPS] 200.255.122.170:8080>
Found proxy: <Proxy RU 0.46s [HTTPS] 77.232.153.248:60950>
Found proxy: <Proxy SE 0.46s [HTTPS] 46.246.38.90:3128>
Found proxy: <Proxy PL 0.48s [HTTPS] 78.11.118.157:3128>
Found proxy: <Proxy US 0.48s [HTTPS] 68.183.180.184:8080>
Found proxy: <Proxy US 0.49s [HTTPS] 157.230.33.37:1111>
Found proxy: <Proxy AE 0.50s [HTTPS] 185.132.179.109:8080>
Found proxy: <Proxy DE 0.51s [HTTPS] 95.88.12.230:3128>
Found proxy: <Proxy ID 0.52s [HTTPS] 180.178.98.149:35871>
Found proxy: <Proxy -- 0.56s [HTTPS] 85.209.163.68:8080>
Found proxy: <Proxy BR 0.61s [HTTPS] 186.249.213.95:37960>
Found proxy: <Proxy UA

Found proxy: <Proxy RU 2.24s [HTTPS] 95.140.19.34:40434>
Found proxy: <Proxy VN 0.46s [HTTPS] 42.115.221.58:3128>
Found proxy: <Proxy ID 0.76s [HTTPS] 114.57.33.214:8080>
Found proxy: <Proxy US 0.18s [HTTPS] 157.230.137.96:3128>
Found proxy: <Proxy GR 0.19s [HTTPS] 178.128.151.123:8080>
Found proxy: <Proxy BR 0.97s [HTTPS] 187.95.225.97:8080>
Found proxy: <Proxy US 0.41s [HTTPS] 198.11.178.14:8080>
Found proxy: <Proxy BR 1.41s [HTTPS] 179.96.17.77:8080>
Found proxy: <Proxy ID 0.74s [HTTPS] 103.9.124.210:8080>
Found proxy: <Proxy US 2.38s [HTTPS] 75.151.213.85:8080>
Found proxy: <Proxy BR 1.41s [HTTPS] 177.87.63.20:8080>
Found proxy: <Proxy BR 1.41s [HTTPS] 200.233.136.177:20183>
Found proxy: <Proxy BR 0.87s [HTTPS] 177.104.123.218:8181>
Found proxy: <Proxy EC 2.48s [HTTPS] 181.112.57.34:46757>
Found proxy: <Proxy CZ 0.33s [HTTPS] 46.33.98.94:8080>
Found proxy: <Proxy CZ 2.46s [HTTPS] 193.86.229.230:8080>
Found proxy: <Proxy ES 1.39s [HTTPS] 2.139.187.123:3128>
Found proxy: <Proxy BR 1.

Found proxy: <Proxy IN 0.63s [HTTPS] 43.241.28.55:8080>
Found proxy: <Proxy PL 1.83s [HTTPS] 46.227.244.144:8080>
Found proxy: <Proxy BG 2.82s [HTTPS] 78.90.204.39:80>
Found proxy: <Proxy CA 0.21s [HTTPS] 158.69.59.171:3128>
Found proxy: <Proxy SY 0.61s [HTTPS] 185.151.151.166:3128>
Found proxy: <Proxy TH 1.66s [HTTPS] 203.113.103.54:8080>
Found proxy: <Proxy US 0.30s [HTTPS] 47.254.23.63:3128>
Found proxy: <Proxy UA 1.51s [HTTPS] 178.210.213.77:3128>
Found proxy: <Proxy BR 2.82s [HTTPS] 138.99.90.113:8080>
Found proxy: <Proxy ID 1.53s [HTTPS] 115.85.83.197:8080>
Found proxy: <Proxy BR 0.43s [HTTPS] 200.195.28.21:3128>
Found proxy: <Proxy ID 1.09s [HTTPS] 103.248.25.99:53281>
Found proxy: <Proxy UA 1.87s [HTTPS] 195.242.179.23:8080>
Found proxy: <Proxy PH 1.14s [HTTPS] 122.53.62.139:8080>
Found proxy: <Proxy DE 0.32s [HTTPS] 88.99.242.130:3128>
Found proxy: <Proxy IR 1.43s [HTTPS] 46.225.128.250:8080>
Found proxy: <Proxy BR 1.67s [HTTPS] 186.237.221.33:8080>
Found proxy: <Proxy CZ 2.66

### Random proxy

In [16]:
# all_proxies = proxies + more_proxies
all_proxies = more_proxies
# Retrieve a random index proxy (we need the index to delete it if not working)
def random_proxy():
    return random.randint(0, len(all_proxies) - 1)

proxy = None
# # Choose a random proxy
# proxy_index = random_proxy()
# proxy = all_proxies[proxy_index]
# proxy_url = proxy['ip'] + ':' + str(proxy['port']); proxy_url

## Actual scraping

In [17]:
invalid_ids = []

In [18]:
deleted_proxies = []

In [19]:
with open('accounts.json', 'r') as fp:
    accounts = json.load(fp)

instances = [MusicalMusic(username, password) for (username, password) in accounts]
instance = None

In [None]:
for idx,link in enumerate(tqdm(links, total=len(links))):
    score_id = link['score_id']
    out_file = f"data/{score_id}.mxl"
    if Path(out_file).exists() or score_id in invalid_ids: continue
    # Every 10 requests, generate a new proxy
    if idx % 10 == 0 or instance is None:
        instance_index = random.randint(0, len(instances) - 1)
        instance = instances[instance_index]
    try:
        print('Downloading score id:', score_id)
        instance.download(score_id, out_file, format='mxl')
    except Exception as e:
        print('Could not download id:', score_id)
        instance = None
    sleep(randint(1,2))
    

  0%|          | 0/1980 [00:00<?, ?it/s]

Downloading score id: 2483766


 18%|█▊        | 354/1980 [00:01<00:08, 190.71it/s]

Downloading score id: 4995819
Downloading score id: 845701
Downloading score id: 1062771


 18%|█▊        | 360/1980 [00:08<09:05,  2.97it/s] 

Downloading score id: 2983111


 18%|█▊        | 365/1980 [00:11<11:19,  2.38it/s]

Downloading score id: 2017516


 19%|█▊        | 371/1980 [00:15<13:04,  2.05it/s]

Downloading score id: 317706
Downloading score id: 2328941


 19%|█▉        | 374/1980 [00:20<22:00,  1.22it/s]

Downloading score id: 4811856


 19%|█▉        | 377/1980 [00:23<24:16,  1.10it/s]

Downloading score id: 3162316
Downloading score id: 4014446


 19%|█▉        | 379/1980 [00:28<35:10,  1.32s/it]

Downloading score id: 1836371


 19%|█▉        | 380/1980 [00:29<39:09,  1.47s/it]

Downloading score id: 1513551


 19%|█▉        | 381/1980 [00:34<1:03:37,  2.39s/it]

Downloading score id: 1822911


 19%|█▉        | 382/1980 [00:37<1:07:02,  2.52s/it]

Downloading score id: 1591631


 19%|█▉        | 383/1980 [00:40<1:15:21,  2.83s/it]

Downloading score id: 2649031


 19%|█▉        | 384/1980 [00:44<1:24:41,  3.18s/it]

Downloading score id: 5167634


 19%|█▉        | 385/1980 [00:47<1:20:09,  3.02s/it]

Downloading score id: 1939736


 19%|█▉        | 386/1980 [00:50<1:18:36,  2.96s/it]

Downloading score id: 5058536


 20%|█▉        | 387/1980 [00:54<1:24:15,  3.17s/it]

Downloading score id: 5275014


 20%|█▉        | 388/1980 [00:56<1:21:26,  3.07s/it]

Downloading score id: 620611


 20%|█▉        | 389/1980 [00:58<1:11:23,  2.69s/it]

Downloading score id: 2360031


 20%|█▉        | 390/1980 [01:01<1:14:54,  2.83s/it]

Downloading score id: 1678996


 20%|█▉        | 391/1980 [01:04<1:14:46,  2.82s/it]

Downloading score id: 1145646


 20%|█▉        | 392/1980 [01:07<1:14:02,  2.80s/it]

Downloading score id: 5301193


 20%|█▉        | 393/1980 [01:09<1:06:55,  2.53s/it]

Downloading score id: 4997446


In [38]:
len(all_proxies)

500

In [39]:
for idx,link in enumerate(tqdm(links, total=len(links))):
    score_id = link['score_id']
    out_file = f"data/{score_id}.mxl"
    if Path(out_file).exists() or score_id in invalid_ids: continue
    # Every 10 requests, generate a new proxy
    if idx % 10 == 0 or proxy is None or instance is None:
        proxy_index = random.randint(0, len(all_proxies) - 1)
        proxy = all_proxies[proxy_index]
        proxy_url = proxy['ip'] + ':' + str(proxy['port']); proxy_url
        instance_index = random.randint(0, len(instances) - 1)
        instance = instances[instance_index]
    try:
        print('Downloading score id:', score_id)
        instance.download(score_id, out_file, format='mxl', proxy=proxy_url)
    except Exception as e:
        print('Could not download id:', score_id)
        print('Error:', e)
        invalid_ids.append(score_id)
        deleted_proxies.append(all_proxies[proxy_index])
        del all_proxies[proxy_index]
        print('Proxy ' + proxy['ip'] + ':' + str(proxy['port']) + ' deleted.')
        print(instance.username)
        proxy = None
        instance = None
    


  0%|          | 0/1980 [00:00<?, ?it/s][A

Downloading score id: 622771



 17%|█▋        | 344/1980 [00:03<00:17, 93.87it/s][A

Downloading score id: 1506676
Downloading score id: 1014291
Downloading score id: 207721



 18%|█▊        | 347/1980 [00:15<31:38,  1.16s/it][A

Downloading score id: 4702361



 18%|█▊        | 348/1980 [00:18<50:22,  1.85s/it][A

Downloading score id: 2839501



 18%|█▊        | 349/1980 [00:21<1:02:08,  2.29s/it][A

Downloading score id: 4245846



 18%|█▊        | 350/1980 [00:28<1:32:39,  3.41s/it][A

Downloading score id: 3349681



 18%|█▊        | 351/1980 [00:39<2:40:14,  5.90s/it][A

Downloading score id: 1913131



 18%|█▊        | 352/1980 [02:10<14:11:50, 31.39s/it][A

Downloading score id: 3043386



 18%|█▊        | 353/1980 [02:17<10:50:26, 23.99s/it][A

Downloading score id: 2483766



 18%|█▊        | 354/1980 [02:24<8:32:24, 18.91s/it] [A

Could not download id: 2483766
Error: <urlopen error [Errno 104] Connection reset by peer>
Proxy 188.73.8.12:48353 deleted.
jurassictech
Downloading score id: 4995819



 18%|█▊        | 355/1980 [02:25<6:09:02, 13.63s/it][A

Could not download id: 4995819
Error: <urlopen error [Errno 104] Connection reset by peer>
Proxy 185.70.184.249:3128 deleted.
cwkeam
Downloading score id: 4801654



 18%|█▊        | 356/1980 [02:41<6:23:33, 14.17s/it][A

Downloading score id: 4800200



 18%|█▊        | 357/1980 [02:49<5:39:57, 12.57s/it][A

Downloading score id: 845701



 18%|█▊        | 358/1980 [02:54<4:34:39, 10.16s/it][A

Could not download id: 845701
Error: <urlopen error [Errno 104] Connection reset by peer>
Proxy 82.177.38.187:8080 deleted.
tulebo
Downloading score id: 1062771



 18%|█▊        | 359/1980 [02:55<3:18:49,  7.36s/it][A

Could not download id: 1062771
Error: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1051)>
Proxy 91.105.173.121:8080 deleted.
awesomesheets
Downloading score id: 3902706



 18%|█▊        | 360/1980 [02:57<2:39:50,  5.92s/it][A

Downloading score id: 2582191



 18%|█▊        | 361/1980 [03:06<2:57:52,  6.59s/it][A

Downloading score id: 3670316



 18%|█▊        | 362/1980 [03:13<3:05:58,  6.90s/it][A

Downloading score id: 2983111



 18%|█▊        | 363/1980 [03:19<2:56:44,  6.56s/it][A

Could not download id: 2983111
Error: <urlopen error Remote end closed connection without response>
Proxy 46.167.198.74:53281 deleted.
cwkeam
Downloading score id: 1532561



 18%|█▊        | 364/1980 [03:22<2:26:34,  5.44s/it][A

Downloading score id: 1125831



 18%|█▊        | 365/1980 [03:25<2:05:31,  4.66s/it][A

Downloading score id: 1077136



 18%|█▊        | 366/1980 [03:27<1:46:42,  3.97s/it][A

Downloading score id: 2865091



 19%|█▊        | 367/1980 [03:30<1:36:20,  3.58s/it][A

Downloading score id: 1820281



 19%|█▊        | 368/1980 [03:32<1:26:48,  3.23s/it][A

Downloading score id: 4303041



 19%|█▊        | 369/1980 [03:34<1:20:04,  2.98s/it][A

Downloading score id: 3955816



 19%|█▊        | 370/1980 [03:37<1:15:06,  2.80s/it][A

Downloading score id: 2017516
Could not download id: 2017516
Error: <urlopen error Remote end closed connection without response>
Proxy 68.107.176.159:80 deleted.
jurassictech
Downloading score id: 317706



 19%|█▉        | 372/1980 [03:37<54:21,  2.03s/it]  [A

Could not download id: 317706
Error: <urlopen error Tunnel connection failed: 400 Bad Request>
Proxy 61.219.134.55:8080 deleted.
kate.zuo
Downloading score id: 3049921



 19%|█▉        | 373/1980 [04:47<9:58:02, 22.33s/it][A

Downloading score id: 2328941



 19%|█▉        | 374/1980 [06:00<16:47:12, 37.63s/it][A

Could not download id: 2328941
Error: <urlopen error [Errno 104] Connection reset by peer>
Proxy 187.32.4.66:8080 deleted.
tulebo
Downloading score id: 1389546



 19%|█▉        | 375/1980 [06:13<13:27:08, 30.17s/it][A

Downloading score id: 1042331



 19%|█▉        | 376/1980 [06:19<10:15:39, 23.03s/it][A

Downloading score id: 4811856


KeyboardInterrupt: 

### Second run

In [23]:
# Main function
  # Retrieve latest proxies
proxies_req = Request('https://www.sslproxies.org/')
proxies_req.add_header('User-Agent', ua.random)
proxies_doc = urlopen(proxies_req).read().decode('utf8')

soup = BeautifulSoup(proxies_doc, 'html.parser')
proxies_table = soup.find(id='proxylisttable')

# Save proxies in the array
for row in proxies_table.tbody.find_all('tr'):
    proxies.append({
      'ip':   row.find_all('td')[0].string,
      'port': row.find_all('td')[1].string
    })

HTTPError: HTTP Error 403: Forbidden

In [None]:
import asyncio
from proxybroker import Broker

more_proxies = []

async def show(proxy_queue):
    while True:
        proxy = await proxy_queue.get()
        if proxy is None: break
        print('Found proxy: %s' % proxy)
        more_proxies.append({ 'ip': proxy.host, 'port': proxy.port })

proxy_queue = asyncio.Queue()
broker = Broker(proxy_queue)
tasks = await asyncio.gather(
    broker.find(types=['HTTP', 'HTTPS'], limit=400),
    show(proxy_queue))

In [None]:
all_proxies = proxies + more_proxies
# Retrieve a random index proxy (we need the index to delete it if not working)
def random_proxy():
    return random.randint(0, len(all_proxies) - 1)

proxy = None
# # Choose a random proxy
# proxy_index = random_proxy()
# proxy = all_proxies[proxy_index]
# proxy_url = proxy['ip'] + ':' + str(proxy['port']); proxy_url

In [None]:
json_file = Path('musescore_movie.json')

In [None]:
if json_file.exists():
    with open(json_file, 'r') as fp:
        links = json.load(fp)
        

In [None]:
for idx,link in enumerate(tqdm(links, total=len(links))):
    score_id = link['score_id']
    out_file = f"data/{score_id}.mxl"
    if Path(out_file).exists() or score_id in invalid_ids: continue
    # Every 10 requests, generate a new proxy
    if idx % 10 == 0 or proxy is None or instance is None:
        proxy_index = random.randint(0, len(all_proxies) - 1)
        proxy = all_proxies[proxy_index]
        proxy_url = proxy['ip'] + ':' + str(proxy['port']); proxy_url
        instance_index = random.randint(0, len(instances) - 1)
        instance = instances[instance_index]
    try:
        print('Downloading score id:', score_id)
        instance.download(score_id, out_file, format='mxl', proxy=proxy_url)
    except Exception as e:
        print('Could not download id:', score_id)
        print('Error:', e)
        invalid_ids.append(score_id)
        deleted_proxies.append(all_proxies[proxy_index])
        del all_proxies[proxy_index]
        print('Proxy ' + proxy['ip'] + ':' + str(proxy['port']) + ' deleted.')
        print(instance.username)
        proxy = None
        instance = None
    sleep(randint(1,2))
    