In [4]:
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns

In [2]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Change the path if necessary:
root_dir = '/content/drive/Shareddrives/dh401_digital_musicology/data/'
base_dir = '/content/drive/Shareddrives/dh401_digital_musicology/'
os.chdir(root_dir)

In [6]:
s_11_3 = {}
s_12_1 = {}
s_12_2 = {}
s_12_3 = {}
s_8_1 = {}


for filename in os.listdir(root_dir):
  if filename[:4] == "11-3":
        file_path = os.path.join(root_dir, filename)
        df = pd.read_csv(file_path)
        s_11_3[filename[5:]] = df
  if filename[:4] == "12-1":
        file_path = os.path.join(root_dir, filename)
        df = pd.read_csv(file_path)
        s_12_1[filename[5:]] = df
  if filename[:4] == "12-2":
        file_path = os.path.join(root_dir, filename)
        df = pd.read_csv(file_path)
        s_12_2[filename[5:]] = df
  if filename[:4] == "12-3":
        file_path = os.path.join(root_dir, filename)
        df = pd.read_csv(file_path)
        s_12_3[filename[5:]] = df
  if filename[:3] == "8-1":
        file_path = os.path.join(root_dir, filename)
        df = pd.read_csv(file_path)
        s_8_1[filename[4:]] = df

In [15]:
def normalize_durations(dictt):
  relative_duration = []
  for df in dictt:
    dictt[df]['relative duration'] = (dictt[df]["relative duration"] - dictt[df]["relative duration"].mean())
  return relative_duration


In [16]:
relative_duration_11_3 = normalize_durations(s_11_3)
relative_duration_8_1 = normalize_durations(s_8_1)
relative_duration_12_1 = normalize_durations(s_12_1)
relative_duration_12_2 = normalize_durations(s_12_2)
relative_duration_12_3 = normalize_durations(s_12_3)

In [17]:
def extract_cdf(data):
  cdfs = {}
  sorted_values = {}
  max =  next(iter(data.values())).beat_number.max()
  for i in range(max):
    values = []
    for key, value in data.items():
      df = pd.DataFrame(value)
      beat = df[df['beat_number'] == i+1]
      values.extend(beat['relative duration'].tolist())
    sorted_values[i] = sorted(values)
    cdfs[i] = (np.searchsorted(sorted_values[i], sorted_values[i], side='right') / len(sorted_values[i]))

  return cdfs, sorted_values

In [18]:
cdfs_11_3, sorted_11_3 = extract_cdf(s_11_3)
cdfs_12_1, sorted_12_1 = extract_cdf(s_12_1)
cdfs_12_2, sorted_12_2 = extract_cdf(s_12_2)
cdfs_12_3, sorted_12_3 = extract_cdf(s_12_3)
cdfs_8_1, sorted_8_1 = extract_cdf(s_8_1)

In [19]:
def timing_function(midi_annotations_path, filename, sonata = '12-1'):
  midi_annotations = preprocessing(midi_annotations_path)
  rdm = np.random.uniform(size = len(midi_annotations))
  time_sig = None
  time = 0
  index = 0
  weight = 0
  for i, row in midi_annotations.iterrows():
    if row['metrics'] != None:
      time_sig = row['metrics']
      index = 0

    if time_sig == '4/4':
      if sonata == '12-2':
        weight = sorted_12_2[index%4][np.searchsorted(cdfs_12_2[index%4], rdm[i]) - 1]
      else:
        weight = sorted_8_1[index%4][np.searchsorted(cdfs_8_1[index%4], rdm[i]) - 1]

    if time_sig == '3/4':
      weight = sorted_12_1[index%3][np.searchsorted(cdfs_12_1[index%3], rdm[i]) - 1]

    if time_sig == '2/4':
      weight = sorted_11_3[index%2][np.searchsorted(cdfs_11_3[index%2], rdm[i]) - 1]

    if time_sig == '6/8':
      weight = sorted_12_3[index%2][np.searchsorted(cdfs_12_3[index%2], rdm[i]) - 1]

    midi_annotations.loc[i, 'time'] = time
    if i < len(midi_annotations) - 1:
      time += (midi_annotations.loc[i+1, 'time']-row['time'])*(1+weight)
    index += 1

  postprocessing(midi_annotations, filename)

In [20]:
def preprocessing(path):
  data = pd.read_csv(path, sep='\t', header=None, names=['time', 'time1', 'info'])
  data = data.drop(columns=['time1'])
  data[['info', 'metrics', 'number']] = data['info'].str.split(',', expand=True)
  return data

In [21]:
def postprocessing(data, filename):
    data['info'] = data[['info', 'metrics', 'number']].apply(lambda x: ','.join(x.dropna()), axis=1)
    data = data.drop(columns=['metrics', 'number'])
    data['time1'] = data['time']
    data=data.reindex(columns=['time', 'time1', 'info'])
    path_to_save = filename + '.txt'
    data.to_csv(path_to_save, sep='\t', index=False, header=None)
    return path_to_save

In [22]:
timing_function(base_dir+'Mozart_Piano_Sonatas/12-1/midi_score_annotations.txt', 'generated/12-1', '12-1')