In [14]:
import torch, torchvision, PIL, numpy as np
import pathlib
import PIL
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from tqdm.auto import tqdm
import pandas as pd, csv
import os

In [2]:
# You can get more datasets here https://repository.cloudlab.zhaw.ch/artifactory/deepscores/archives/2017/
!wget https://tuggeluk.github.io/class_names/class_names.csv
!wget https://repository.cloudlab.zhaw.ch/artifactory/deepscores/classification/DeepScores2017_classification.zip


--2021-03-26 23:55:47--  https://tuggeluk.github.io/class_names/class_names.csv
Resolving tuggeluk.github.io (tuggeluk.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to tuggeluk.github.io (tuggeluk.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2079 (2.0K) [text/csv]
Saving to: ‘class_names.csv’


2021-03-26 23:55:47 (35.1 MB/s) - ‘class_names.csv’ saved [2079/2079]

--2021-03-26 23:55:47--  https://repository.cloudlab.zhaw.ch/artifactory/deepscores/classification/DeepScores2017_classification.zip
Resolving repository.cloudlab.zhaw.ch (repository.cloudlab.zhaw.ch)... 160.85.254.254
Connecting to repository.cloudlab.zhaw.ch (repository.cloudlab.zhaw.ch)|160.85.254.254|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 422633859 (403M) [application/zip]
Saving to: ‘DeepScores2017_classification.zip’


2021-03-26 23:56:07 (20.9 MB/s) - ‘DeepScores2017_classification.zip’ saved [42

In [3]:
!unzip DeepScores2017_classification.zip -d music_dataset/

Archive:  DeepScores2017_classification.zip
   creating: music_dataset/accidentalDoubleFlat/
   creating: music_dataset/accidentalDoubleSharp/
   creating: music_dataset/accidentalFlat/
   creating: music_dataset/accidentalFlatSmall/
   creating: music_dataset/accidentalNatural/
   creating: music_dataset/accidentalNaturalSmall/
   creating: music_dataset/accidentalSharp/
   creating: music_dataset/accidentalSharpSmall/
   creating: music_dataset/arpeggiato/
   creating: music_dataset/articAccentAbove/
   creating: music_dataset/articAccentBelow/
   creating: music_dataset/articMarcatoAbove/
   creating: music_dataset/articMarcatoBelow/
   creating: music_dataset/articStaccatissimoAbove/
   creating: music_dataset/articStaccatissimoBelow/
   creating: music_dataset/articStaccatoAbove/
   creating: music_dataset/articStaccatoBelow/
   creating: music_dataset/articTenutoAbove/
   creating: music_dataset/articTenutoBelow/
   creating: music_dataset/augmentationDot/
   creating: music_data

In [4]:
class ObjectDetectionDataset:
    def __init__(self, root_dir, transform=None, transform_label=None):
        root_dir = pathlib.Path(root_dir).resolve()
        self.root_dir = root_dir
        self.transform = transform
        self.transform_label = transform_label
        with open(root_dir / 'classes.txt') as f:
          # for every line in the data set and strip removes any spaces and leading character
            self.classes = [w.strip() for w in f] 
        # Now split all the dimensions and get their floating point numbers from the file. 
        def parse_box(line):
            kls, cx, cy, sx, sy = line.split()
            return int(kls), float(cx), float(cy), float(sx), float(sy)
        # opens the file and for each line in the file, it calls parse_box. 
        def parse_boxes(fn):
            with open(fn) as f:
                return [parse_box(l) for l in f]
        # parse the image file name and the bounding box 
        self.fns_labels = [(imgfn, parse_boxes(imgfn.with_suffix('.txt')))
                           for imgfn in sorted(root_dir.glob('*.jpg'))]

    def __len__(self):
        # return the length of the dataset
        return len(self.fns_labels)

    def __getitem__(self, i):
        if torch.is_tensor(i):
            i = i.item()
        imgfn, label = self.fns_labels[i]
        # read the image from PILLOW library
        img = PIL.Image.open(imgfn)
        if self.transform:
            # apply the transform to the image if it exists!
            img = self.transform(img)
        if self.transform_label:
            # same with the label, transform it! 
            label = self.transform_label(label)
        return (img, label)

In [8]:
classNames = pd.read_csv('/content/class_names.csv')

In [9]:
classNames

Unnamed: 0,0,brace
0,1,repeatDot
1,2,segno
2,3,coda
3,4,gClef
4,5,cClefAlto
...,...,...
118,119,fingering1
119,120,fingering2
120,121,fingering3
121,122,fingering4


In [10]:
!wget https://repository.cloudlab.zhaw.ch/artifactory/deepscores/ds_extended.zip

--2021-03-27 00:06:44--  https://repository.cloudlab.zhaw.ch/artifactory/deepscores/ds_extended.zip
Resolving repository.cloudlab.zhaw.ch (repository.cloudlab.zhaw.ch)... 160.85.254.254
Connecting to repository.cloudlab.zhaw.ch (repository.cloudlab.zhaw.ch)|160.85.254.254|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 860045718 (820M) [application/zip]
Saving to: ‘ds_extended.zip’


2021-03-27 00:07:38 (15.5 MB/s) - ‘ds_extended.zip’ saved [860045718/860045718]



In [11]:
!unzip '/content/ds_extended.zip'

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  inflating: deep_scores_dense_extended/images_png/lg-143361038-aug-gutenberg1939--page-3.png  
  inflating: deep_scores_dense_extended/images_png/lg-144423495-aug-beethoven--page-13.png  
  inflating: deep_scores_dense_extended/images_png/lg-144423495-aug-beethoven--page-59.png  
  inflating: deep_scores_dense_extended/images_png/lg-144423495-aug-emmentaler--page-13.png  
  inflating: deep_scores_dense_extended/images_png/lg-144423495-aug-emmentaler--page-59.png  
  inflating: deep_scores_dense_extended/images_png/lg-144423495-aug-gutenberg1939--page-13.png  
  inflating: deep_scores_dense_extended/images_png/lg-144423495-aug-gutenberg1939--page-59.png  
  inflating: deep_scores_dense_extended/images_png/lg-144423495-aug-lilyjazz--page-13.png  
  inflating: deep_scores_dense_extended/images_png/lg-144423495-aug-lilyjazz--page-59.png  
  inflating: deep_scores_dense_extended/images_png/lg-144604277-aug-beethoven--page-3.png  
  inflating: deep_s

In [13]:
## dataset is done here

In [None]:
x = 200
y = 200 #new size of the image

In [25]:
HashTableTrue = {}
datasetHash = pd.read_csv('/content/class_names.csv')

In [26]:
datasetHash

Unnamed: 0,0,brace
0,1,repeatDot
1,2,segno
2,3,coda
3,4,gClef
4,5,cClefAlto
...,...,...
118,119,fingering1
119,120,fingering2
120,121,fingering3
121,122,fingering4


In [27]:
for i in range(len(datasetHash)):
  HashTableTrue[datasetHash['brace'][i]] = datasetHash['0'][i]

In [28]:
HashTableTrue

{'accidentalDoubleFlat': 57,
 'accidentalDoubleSharp': 56,
 'accidentalFlat': 50,
 'accidentalFlatSmall': 51,
 'accidentalNatural': 52,
 'accidentalNaturalSmall': 53,
 'accidentalSharp': 54,
 'accidentalSharpSmall': 55,
 'arpeggiato': 113,
 'articAccentAbove': 61,
 'articAccentBelow': 62,
 'articMarcatoAbove': 69,
 'articMarcatoBelow': 70,
 'articStaccatissimoAbove': 67,
 'articStaccatissimoBelow': 68,
 'articStaccatoAbove': 63,
 'articStaccatoBelow': 64,
 'articTenutoAbove': 65,
 'articTenutoBelow': 66,
 'augmentationDot': 37,
 'cClefAlto': 5,
 'cClefAltoChange': 10,
 'cClefTenor': 6,
 'cClefTenorChange': 11,
 'caesura': 73,
 'clef15': 14,
 'clef8': 13,
 'coda': 3,
 'dynamicFF': 95,
 'dynamicFFF': 96,
 'dynamicFFFF': 97,
 'dynamicFFFFF': 98,
 'dynamicForte': 88,
 'dynamicFortePiano': 99,
 'dynamicMF': 94,
 'dynamicMP': 93,
 'dynamicMezzo': 87,
 'dynamicPP': 92,
 'dynamicPPP': 91,
 'dynamicPPPP': 90,
 'dynamicPPPPP': 89,
 'dynamicPiano': 86,
 'dynamicRinforzando2': 102,
 'dynamicSforza

In [15]:
os.mkdir('/content/User') #where users have their image uploaded
xform = torchvision.transforms.Compose([torchvision.transforms.Resize((x, y)), torchvision.transforms.ToTensor()])

for dataset_i in namefordatasetSet:
  somename = ObjectDetectionDataset(dataset_i, transform = xform)

In [None]:
n_all_det = len(SOMEDATASET)
n_used_det = int(1 * n_all_det)
n_train_det = int(0.8 * n_used_det)
n_test_det = n_used_det - n_train_det
rng_det = torch.Generator().manual_seed(291)
dataset_train_det, dataset_test_det, _ = torch.utils.data.random_split(SOMEDATASET, [n_train_det, n_test_det, n_all_det-n_train_det-n_test_det], rng_det)