In [3]:
from magenta.music import musicxml_reader
from magenta.music import note_sequence_io
from magenta.music import sequences_lib
from magenta.music import melodies_lib
from magenta.music import Melody
from magenta.music import musicxml_parser

from magenta.music import lead_sheets_lib, chords_lib

In [4]:
import os
import copy
import sys

In [5]:
from magenta.protobuf import music_pb2

# Shortcut to CHORD_SYMBOL annotation type.
CHORD_SYMBOL = music_pb2.NoteSequence.TextAnnotation.CHORD_SYMBOL


class MusicXMLConversionError(Exception):
  """MusicXML conversion error handler."""
  pass


def my_musicxml_to_sequence_proto(musicxml_document):
  """Convert MusicXML file contents to a tensorflow.magenta.NoteSequence proto.

  Converts a MusicXML file encoded as a string into a
  tensorflow.magenta.NoteSequence proto.

  Args:
    musicxml_document: A parsed MusicXML file.
        This file has been parsed by class MusicXMLDocument

  Returns:
    A tensorflow.magenta.NoteSequence proto.

  Raises:
    MusicXMLConversionError: An error occurred when parsing the MusicXML file.
  """
  sequence = music_pb2.NoteSequence()

  # Standard MusicXML fields.
  sequence.source_info.source_type = (
      music_pb2.NoteSequence.SourceInfo.SCORE_BASED)
  sequence.source_info.encoding_type = (
      music_pb2.NoteSequence.SourceInfo.MUSIC_XML)
  sequence.source_info.parser = (
      music_pb2.NoteSequence.SourceInfo.MAGENTA_MUSIC_XML)

  # Populate header.
  sequence.ticks_per_quarter = musicxml_document.midi_resolution

  # Populate time signatures.
  musicxml_time_signatures = musicxml_document.get_time_signatures()
  for musicxml_time_signature in musicxml_time_signatures:
    time_signature = sequence.time_signatures.add()
    time_signature.time = musicxml_time_signature.time_position
    time_signature.numerator = musicxml_time_signature.numerator
    time_signature.denominator = musicxml_time_signature.denominator

  # Populate key signatures.
  musicxml_key_signatures = musicxml_document.get_key_signatures()
  for musicxml_key in musicxml_key_signatures:
    key_signature = sequence.key_signatures.add()
    key_signature.time = musicxml_key.time_position
    # The Key enum in music.proto does NOT follow MIDI / MusicXML specs
    # Convert from MIDI / MusicXML key to music.proto key
    music_proto_keys = [11, 6, 1, 8, 3, 10, 5, 0, 7, 2, 9, 4, 11, 6, 1]
    key_signature.key = music_proto_keys[musicxml_key.key + 7]
    if musicxml_key.mode == "major":
      key_signature.mode = key_signature.MAJOR
    elif musicxml_key.mode == "minor":
      key_signature.mode = key_signature.MINOR

  # Populate tempo changes.
  musicxml_tempos = musicxml_document.get_tempos()
  for musicxml_tempo in musicxml_tempos:
    tempo = sequence.tempos.add()
    tempo.time = musicxml_tempo.time_position
    tempo.qpm = musicxml_tempo.qpm

  # Populate notes from each MusicXML part across all voices
  # Unlike MIDI import, notes are not sorted
  sequence.total_time = musicxml_document.total_time_secs
  for part_index, musicxml_part in enumerate(musicxml_document.parts):
    part_info = sequence.part_infos.add()
    part_info.part = part_index
    part_info.name = musicxml_part.score_part.part_name

    for musicxml_measure in musicxml_part.measures:
      for musicxml_note in musicxml_measure.notes:
        if not musicxml_note.is_rest:
          note = sequence.notes.add()
          note.part = part_index
          note.voice = musicxml_note.voice
          note.instrument = musicxml_note.midi_channel
          note.program = musicxml_note.midi_program
          note.start_time = musicxml_note.note_duration.time_position

          # Fix negative time errors from incorrect MusicXML
          if note.start_time < 0:
            note.start_time = 0

          note.end_time = note.start_time + musicxml_note.note_duration.seconds
          note.pitch = musicxml_note.pitch[1]  # Index 1 = MIDI pitch number
          note.velocity = musicxml_note.velocity

          durationratio = musicxml_note.note_duration.duration_ratio()
          note.numerator = durationratio.numerator
          note.denominator = durationratio.denominator
        else:
          note = sequence.notes.add()
          note.part = part_index
          note.voice = musicxml_note.voice
          note.instrument = musicxml_note.midi_channel
          note.program = musicxml_note.midi_program
          note.start_time = musicxml_note.note_duration.time_position

          # Fix negative time errors from incorrect MusicXML
          if note.start_time < 0:
            note.start_time = 0

          note.end_time = note.start_time + musicxml_note.note_duration.seconds
          note.pitch = 0  # Index 1 = MIDI pitch number
          note.velocity = musicxml_note.velocity

          durationratio = musicxml_note.note_duration.duration_ratio()
          note.numerator = durationratio.numerator
          note.denominator = durationratio.denominator

  musicxml_chord_symbols = musicxml_document.get_chord_symbols()
  for musicxml_chord_symbol in musicxml_chord_symbols:
    text_annotation = sequence.text_annotations.add()
    text_annotation.time = musicxml_chord_symbol.time_position
    text_annotation.text = musicxml_chord_symbol.get_figure_string()
    text_annotation.annotation_type = CHORD_SYMBOL

  return sequence


def my_musicxml_file_to_sequence_proto(musicxml_file):
  """Converts a MusicXML file to a tensorflow.magenta.NoteSequence proto.

  Args:
    musicxml_file: A string path to a MusicXML file.

  Returns:
    A tensorflow.magenta.Sequence proto.

  Raises:
    MusicXMLConversionError: Invalid musicxml_file.
  """
  try:
    musicxml_document = musicxml_parser.MusicXMLDocument(musicxml_file)
  except musicxml_parser.MusicXMLParseException as e:
    raise MusicXMLConversionError(e)
  return my_musicxml_to_sequence_proto(musicxml_document)


In [6]:
def get_files(folder):
    file_list = []
    for filename in os.listdir(folder):
        if filename.startswith('.'):
            continue
        if filename.endswith('.mxl') or filename.endswith('.xml'):
            file_list.append(os.path.join(folder, filename))
        elif os.path.isdir(os.path.join(folder, filename)):
            file_list.extend(get_files(os.path.join(folder, filename)))
    return file_list

def mxl2xml(mxl_files):
    for filename in xml_files:
        mxlzip = zipfile.ZipFile(filename)


def std_mxl2ns(full_file_path):
    sequence = musicxml_reader.musicxml_file_to_sequence_proto(full_file_path)
    sequence.filename = os.path.basename(full_file_path)
    sequence.id = note_sequence_io.generate_note_sequence_id(
      sequence.filename, sequence.collection_name, 'musicxml')
    return sequence

def my_mxl2ns(full_file_path):
    sequence = my_musicxml_file_to_sequence_proto(full_file_path)
    sequence.filename = os.path.basename(full_file_path)
    sequence.id = note_sequence_io.generate_note_sequence_id(
      sequence.filename, sequence.collection_name, 'musicxml')
    return sequence

In [7]:
mxl_files = get_files('/Users/dramatic/0524')
print('Collect {} mxl files'.format(len(mxl_files)))

Collect 2404 mxl files


In [8]:
note_sequence_list = []
for mxl_file in mxl_files:
    try:
        note_sequence_list.append(my_mxl2ns(mxl_file))
    except:
        pass
print('Collect {} ns files'.format(len(note_sequence_list)))

std_note_sequence_list = []
for mxl_file in mxl_files:
    try:
        std_note_sequence_list.append(std_mxl2ns(mxl_file))
    except:
        pass
print('Collect {} std_ns files'.format(len(std_note_sequence_list)))

Collect 2404 ns files
Collect 2404 std_ns files


In [9]:
for my_ns, std_ns in zip(note_sequence_list, std_note_sequence_list):
    for my_note, std_note in zip(list(filter(lambda x: x.pitch > 0, my_ns.notes)), std_ns.notes):
        if not (my_note.pitch == std_note.pitch and my_note.numerator == std_note.numerator and my_note.denominator == std_note.denominator):
            print(my_note)
            print(std_note)

In [62]:
cmp = 1e-3
for my_ns in note_sequence_list:
    for my_note in my_ns.notes:
        a = my_note.numerator * 1.0 / my_note.denominator
        if(my_note.numerator == 0):
            print(my_ns.filename)
            print(my_note)
        b = (my_note.end_time - my_note.start_time) / 2
        if(abs(a - b) > cmp):
            flag = 0
            for i in range(1, 8):
                for j in range(1, 33):
                    aa = float(i) / j
                    if(abs(aa - b) < cmp):
                        my_note.numerator = i
                        my_note.denominator = j
                        flag = 1
                        break
                if(flag == 1):
                    break
            print(my_note)

500 - 汪苏泷,By2 - 有点甜.xml
pitch: 67
velocity: 64
denominator: 1
voice: 1



In [11]:
ns_ts1 = list(filter(lambda x: len(x.time_signatures) == 1, note_sequence_list))
ns_ts44 = list(filter(lambda x: x.time_signatures[0].numerator == 4 and x.time_signatures[0].denominator == 4, ns_ts1))
ns_ks1_ts44 = list(filter(lambda x: len(x.key_signatures) == 1, ns_ts44))
print(len(ns_ks1_ts44))
print(ns_ks1_ts44[2135])

2136
id: "/id/musicxml//7933028801133736991550aa997cf64ec3cbb8a1"
filename: "1336 - fripSide - sister\'s noise.xml"
ticks_per_quarter: 220
time_signatures {
  numerator: 4
  denominator: 4
}
key_signatures {
  key: G_SHARP
}
tempos {
  qpm: 120.0
}
notes {
  pitch: 68
  velocity: 64
  end_time: 0.5
  numerator: 1
  denominator: 4
  voice: 1
}
notes {
  pitch: 68
  velocity: 64
  start_time: 0.5
  end_time: 0.75
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 67
  velocity: 64
  start_time: 0.75
  end_time: 0.875
  numerator: 1
  denominator: 16
  voice: 1
}
notes {
  pitch: 68
  velocity: 64
  start_time: 0.875
  end_time: 1.0
  numerator: 1
  denominator: 16
  voice: 1
}
notes {
  pitch: 68
  velocity: 64
  start_time: 1.0
  end_time: 1.25
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 70
  velocity: 64
  start_time: 1.25
  end_time: 1.5
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 72
  velocity: 64
  start_time: 1.5
  end_time: 1.75
  num

In [12]:
for my_ns in ns_ks1_ts44:
    transpose = my_ns.key_signatures[0].key
    for my_note in my_ns.notes:
        if(my_note.pitch > 0):
            my_note.pitch -= transpose
print(ns_ks1_ts44[2135])

id: "/id/musicxml//7933028801133736991550aa997cf64ec3cbb8a1"
filename: "1336 - fripSide - sister\'s noise.xml"
ticks_per_quarter: 220
time_signatures {
  numerator: 4
  denominator: 4
}
key_signatures {
  key: G_SHARP
}
tempos {
  qpm: 120.0
}
notes {
  pitch: 60
  velocity: 64
  end_time: 0.5
  numerator: 1
  denominator: 4
  voice: 1
}
notes {
  pitch: 60
  velocity: 64
  start_time: 0.5
  end_time: 0.75
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 59
  velocity: 64
  start_time: 0.75
  end_time: 0.875
  numerator: 1
  denominator: 16
  voice: 1
}
notes {
  pitch: 60
  velocity: 64
  start_time: 0.875
  end_time: 1.0
  numerator: 1
  denominator: 16
  voice: 1
}
notes {
  pitch: 60
  velocity: 64
  start_time: 1.0
  end_time: 1.25
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 62
  velocity: 64
  start_time: 1.25
  end_time: 1.5
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 64
  velocity: 64
  start_time: 1.5
  end_time: 1.75
  numerato

In [13]:
from fractions import Fraction
new_ns_ks1_ts44 = []

for my_ns in ns_ks1_ts44:
    new_notes = []
    origin_note = my_ns.notes[0]
    new_notes.append([origin_note.pitch, Fraction(origin_note.numerator, origin_note.denominator)])
    cur = new_notes[0]
    for lastnote, note in zip(my_ns.notes[:-1], my_ns.notes[1:]):
        if(lastnote.pitch == note.pitch):
            cur[1] += Fraction(note.numerator, note.denominator)
        else:z
            new_notes.append([note.pitch, Fraction(note.numerator, note.denominator)])
            cur = new_notes[-1]
    for new_note in new_notes:
        while(new_note[1] > Fraction(1, 1)):
            new_note[1] -= Fraction(1, 1)
    new_new_notes = [tuple(x) for x in new_notes]
    new_ns_ks1_ts44.append(new_new_notes)
print(new_ns_ks1_ts44[-1])

[(60, Fraction(3, 8)), (59, Fraction(1, 16)), (60, Fraction(3, 16)), (62, Fraction(1, 8)), (64, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(3, 8)), (59, Fraction(5, 8)), (57, Fraction(3, 8)), (55, Fraction(1, 16)), (57, Fraction(3, 16)), (65, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(5, 16)), (60, Fraction(3, 16)), (62, Fraction(1, 8)), (64, Fraction(7, 16)), (62, Fraction(1, 16)), (60, Fraction(1, 4)), (57, Fraction(3, 16)), (59, Fraction(3, 16)), (60, Fraction(1, 8)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (55, Fraction(3, 16)), (59, Fraction(3, 16)), (60, Fraction(1, 4)), (57, Fraction(1, 8)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (57, Fraction(7, 16)), (59, Fraction(1, 16)), (60, Fraction(1, 8)), (57, Fraction(1, 8)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (64, Fraction(3, 8)), (65, Fraction(1, 16)), (64, Fraction(1, 16)), (62, Fraction(1, 2)), (60, Fraction(3, 8)), (59, Fraction(1, 16)), (60, Fraction(3, 16)), (62, Fraction(1, 8)), (64, Fraction

In [64]:
new3_ns_ts44 = []
print(len(ns_ts44))

for my_ns in ns_ts44:
    new_notes = []
    
    for lastnote, note in zip(my_ns.notes[:-1], my_ns.notes[1:]):
        if(lastnote.numerator == 0 or note.numerator == 0):
            continue
        new_notes.append((note.pitch-lastnote.pitch, Fraction(note.numerator, note.denominator) / Fraction(lastnote.numerator, lastnote.denominator)))

    new3_ns_ts44.append(new_notes)
print(new3_ns_ts44[-1])

2320
[(0, Fraction(1, 2)), (-1, Fraction(1, 2)), (1, Fraction(1, 1)), (0, Fraction(2, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (-1, Fraction(2, 1)), (0, Fraction(1, 2)), (-5, Fraction(1, 2)), (0, Fraction(1, 1)), (0, Fraction(8, 1)), (-2, Fraction(1, 2)), (0, Fraction(1, 2)), (-2, Fraction(1, 2)), (2, Fraction(1, 1)), (0, Fraction(2, 1)), (8, Fraction(1, 1)), (-1, Fraction(1, 1)), (-2, Fraction(1, 1)), (0, Fraction(3, 2)), (-2, Fraction(1, 3)), (0, Fraction(2, 1)), (2, Fraction(1, 1)), (2, Fraction(3, 1)), (0, Fraction(1, 6)), (-2, Fraction(1, 1)), (-2, Fraction(4, 1)), (-3, Fraction(3, 4)), (2, Fraction(1, 3)), (0, Fraction(2, 1)), (1, Fraction(1, 1)), (-1, Fraction(1, 1)), (1, Fraction(1, 1)), (-5, Fraction(3, 2)), (4, Fraction(1, 3)), (0, Fraction(2, 1)), (1, Fraction(1, 1)), (0, Fraction(1, 1)), (-3, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (-3, Fraction(3, 1)), (0, Fraction(1, 6)), (2, Fraction(1, 1)), (1, Fraction(2, 1)), (-3, Fraction

In [65]:
print(ns_ts44[-1])

id: "/id/musicxml//7933028801133736991550aa997cf64ec3cbb8a1"
filename: "1336 - fripSide - sister\'s noise.xml"
ticks_per_quarter: 220
time_signatures {
  numerator: 4
  denominator: 4
}
key_signatures {
  key: G_SHARP
}
tempos {
  qpm: 120.0
}
notes {
  pitch: 60
  velocity: 64
  end_time: 0.5
  numerator: 1
  denominator: 4
  voice: 1
}
notes {
  pitch: 60
  velocity: 64
  start_time: 0.5
  end_time: 0.75
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 59
  velocity: 64
  start_time: 0.75
  end_time: 0.875
  numerator: 1
  denominator: 16
  voice: 1
}
notes {
  pitch: 60
  velocity: 64
  start_time: 0.875
  end_time: 1.0
  numerator: 1
  denominator: 16
  voice: 1
}
notes {
  pitch: 60
  velocity: 64
  start_time: 1.0
  end_time: 1.25
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 62
  velocity: 64
  start_time: 1.25
  end_time: 1.5
  numerator: 1
  denominator: 8
  voice: 1
}
notes {
  pitch: 64
  velocity: 64
  start_time: 1.5
  end_time: 1.75
  numerato

In [49]:

new2_ns_ks1_ts44 = []

for my_ns in ns_ks1_ts44:
    new_notes = []
    for note in my_ns.notes:
        new_notes.append((note.pitch, Fraction(note.numerator, note.denominator)))

    new2_ns_ks1_ts44.append(new_notes)
print(new2_ns_ks1_ts44[-1])

[(60, Fraction(1, 4)), (60, Fraction(1, 8)), (59, Fraction(1, 16)), (60, Fraction(1, 16)), (60, Fraction(1, 8)), (62, Fraction(1, 8)), (64, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 4)), (64, Fraction(1, 8)), (59, Fraction(1, 16)), (59, Fraction(1, 16)), (59, Fraction(1, 2)), (57, Fraction(1, 4)), (57, Fraction(1, 8)), (55, Fraction(1, 16)), (57, Fraction(1, 16)), (57, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (62, Fraction(3, 16)), (60, Fraction(1, 16)), (60, Fraction(1, 8)), (62, Fraction(1, 8)), (64, Fraction(3, 8)), (64, Fraction(1, 16)), (62, Fraction(1, 16)), (60, Fraction(1, 4)), (57, Fraction(3, 16)), (59, Fraction(1, 16)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (55, Fraction(3, 16)), (59, Fraction(1, 16)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (60, Fraction(1, 8)), (57, Fraction(1, 8)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (57, Fraction(3, 8)), (57, Fraction(1, 

In [231]:
print(new2_ns_ks1_ts44[-1])

[(60, Fraction(1, 4)), (60, Fraction(1, 8)), (59, Fraction(1, 16)), (60, Fraction(1, 16)), (60, Fraction(1, 8)), (62, Fraction(1, 8)), (64, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 4)), (64, Fraction(1, 8)), (59, Fraction(1, 16)), (59, Fraction(1, 16)), (59, Fraction(1, 2)), (57, Fraction(1, 4)), (57, Fraction(1, 8)), (55, Fraction(1, 16)), (57, Fraction(1, 16)), (57, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (62, Fraction(3, 16)), (60, Fraction(1, 16)), (60, Fraction(1, 8)), (62, Fraction(1, 8)), (64, Fraction(3, 8)), (64, Fraction(1, 16)), (62, Fraction(1, 16)), (60, Fraction(1, 4)), (57, Fraction(3, 16)), (59, Fraction(1, 16)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (55, Fraction(3, 16)), (59, Fraction(1, 16)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (60, Fraction(1, 8)), (57, Fraction(1, 8)), (59, Fraction(1, 8)), (60, Fraction(1, 8)), (57, Fraction(3, 8)), (57, Fraction(1, 

In [249]:
import collections
import math


def get_ngrams(segment, minn, maxn):
    """Extracts all n-grams upto a given maximum order from an input segment.
    Args:
    segment: text segment from which n-grams will be extracted.
    max_order: maximum length in tokens of the n-grams returned by this
        methods.
    Returns:
    The Counter containing all n-grams upto max_order in segment
    with a count of how many times each n-gram occurred.
    """
    
    ngram_counts = collections.Counter()
    for order in range(minn, maxn + 1):
        cur = 0
        for i in range(0, len(segment) - order + 1):
            if(cur < i):
                cur += 1
            while(cur < i + order and segment[cur][0] <= 0):
                cur += 1
            
            modify = 0
            if(cur < i + order):
                while(segment[cur][0] + modify < 60):
                    modify += 12
                while(segment[cur][0] + modify >= 72):
                    modify -= 12
            ngram = tuple((x[0] if x[0] <= 0 else x[0] + modify, x[1]) for x in segment[i:i+order])
            ngram_counts[ngram] += 1
    return ngram_counts


In [251]:
print(get_ngrams(new_ns_ks1_ts44[-1], 2, 5))

Counter({((64, Fraction(3, 8)), (57, Fraction(3, 8))): 6, ((62, Fraction(1, 8)), (64, Fraction(1, 8))): 4, ((71, Fraction(1, 8)), (72, Fraction(1, 8))): 4, ((69, Fraction(1, 8)), (68, Fraction(3, 8))): 4, ((61, Fraction(5, 8)), (64, Fraction(3, 8))): 4, ((61, Fraction(5, 8)), (64, Fraction(3, 8)), (57, Fraction(3, 8))): 4, ((60, Fraction(3, 16)), (62, Fraction(1, 8))): 3, ((64, Fraction(3, 8)), (59, Fraction(5, 8))): 3, ((60, Fraction(1, 8)), (59, Fraction(1, 8))): 3, ((62, Fraction(1, 8)), (60, Fraction(1, 8))): 3, ((60, Fraction(1, 8)), (62, Fraction(1, 8))): 3, ((69, Fraction(3, 8)), (71, Fraction(1, 8))): 3, ((71, Fraction(1, 8)), (69, Fraction(1, 8))): 3, ((68, Fraction(1, 8)), (69, Fraction(1, 8))): 3, ((64, Fraction(3, 8)), (57, Fraction(3, 8)), (59, Fraction(1, 8))): 3, ((69, Fraction(3, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8))): 3, ((64, Fraction(3, 8)), (57, Fraction(3, 8)), (59, Fraction(1, 8)), (57, Fraction(1, 8))): 3, ((60, Fraction(3, 8)), (59, Fraction(1, 16))): 

In [105]:
def std_compute_bleu(candidate, merged_ref_ngram_counts, min_ref_len, minn, maxn, smooth, advanced = False):
    can_lent = len(candidate)
    if(maxn > can_lent):
        print("too short input!")
        return (0.0, [])
    
    
    matches_by_order = [0] * (maxn+1)
    possible_matches_by_order = [0] * (maxn+1)
 

        
    candidate_ngram_counts = get_ngrams(candidate, minn, maxn)
    
    overlap = merged_ref_ngram_counts & candidate_ngram_counts
    for ngram in overlap:
        matches_by_order[len(ngram)] += overlap[ngram]
        
    
    for n in range(minn, maxn+1):
        possible_matches_by_order[n] = can_lent - n + 1
    
    precisions = [0] * (maxn+1)
    
    for n in range(minn, maxn+1):
        if smooth:
            precisions[n] = (matches_by_order[n]+1.) / (possible_matches_by_order[n] + 1.)
        else:
            precisions[n] = float(matches_by_order[n]) / possible_matches_by_order[n]
    
    precisions = precisions[minn:]
 
    if min(precisions) > 0:
        p_log_sum = sum((1. / maxn) * math.log(p) for p in precisions)
        geo_mean = math.exp(p_log_sum)
    else:
        geo_mean = 0

    ratio = float(can_lent if not advanced else len(candidate_ngram_counts)) / min_ref_len

    if ratio > 1.0:
        bp = 1.
    else:
        bp = math.exp(1 - 1. / ratio)

    bleu = geo_mean * bp

    return (-math.log(bleu) if bleu > 0 else -1, precisions)

In [106]:
def ave_compute_bleu(candidate, ref_ngram_counts_list, min_ref_len, minn, maxn, smooth, advanced = False):
    can_lent = len(candidate)
    if(maxn > can_lent):
        print("too short input!")
        return (0.0, [])
    
    possible_matches_by_order = [0] * (maxn+1)
    for n in range(minn, maxn+1):
        possible_matches_by_order[n] = can_lent - n + 1
    
    candidate_ngram_counts = get_ngrams(candidate, minn, maxn)
    
    precisions = [0] * (maxn+1)
    
    for ref_ngram_counts in ref_ngram_counts_list:
        overlap = candidate_ngram_counts & ref_ngram_counts
        
        matches_by_order = [0] * (maxn+1)
        for ngram in overlap:
            matches_by_order[len(ngram)] += overlap[ngram]
        
        for n in range(minn, maxn+1):
            if smooth:
                precisions[n] += (matches_by_order[n]+1.) / (possible_matches_by_order[n] + 1.)
            else:
                precisions[n] += float(matches_by_order[n]) / possible_matches_by_order[n]
    
    for n in range(minn, maxn + 1):
        precisions[n] /= len(ref_ngram_counts_list)
    
    precisions = precisions[minn:]
    
    if min(precisions) > 0:
        p_log_sum = sum((1. / maxn) * math.log(p) for p in precisions)
        geo_mean = math.exp(p_log_sum)
    else:
        geo_mean = 0

    
    ratio = float(can_lent if not advanced else len(candidate_ngram_counts)) / min_ref_len

    if ratio > 1.0:
        bp = 1.
    else:
        bp = math.exp(1 - 1. / ratio)

    bleu = geo_mean * bp

    return (-math.log(bleu) if bleu > 0 else -1, precisions)

In [205]:
def compute_multi_bleu(candidate_list, ref_list, minn = 2, maxn = 5, smooth = True, advanced = False):
    ref_ngram_counts_list = []
    ave_melody_variety = 0.
    for ref in ref_list:
        ref_ngram_counts_list.append(get_ngrams(ref, minn, maxn))
        ave_melody_variety += len(ref_ngram_counts_list[-1])
    
    ave_melody_variety /= len(ref_list)
    
    merged_ref_ngram_counts = collections.Counter()
    for i in range(0, len(ref_ngram_counts_list)):
        merged_ref_ngram_counts |= ref_ngram_counts_list[i]
        
    min_ref_len = min([len(x) for x in ref_list]) 
    
    my_result = []
    for candidate in candidate_list:
        print(candidate[0])
        std = std_compute_bleu(candidate[1], merged_ref_ngram_counts, min_ref_len if not advanced else ave_melody_variety, minn, maxn, smooth, advanced)
        print(std)
        ave = ave_compute_bleu(candidate[1], ref_ngram_counts_list, min_ref_len if not advanced else ave_melody_variety, minn, maxn, smooth, advanced)
        print(ave)
        my_result.append((candidate[0], std, ave))
     
    return my_result
    

In [18]:
# for my_ns in ns_ks1_ts44:
#     if "1046" in my_ns.filename:
#         print(my_ns.filename)

# for i in range(0, len(ns_ks1_ts44)):
#     my_ns = ns_ks1_ts44[i]
#     if "1046" in my_ns.filename:
#         print(my_ns.filename)
#         print(i)
print(new_ns_ks1_ts44[1265])

[(60, Fraction(3, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (60, Fraction(1, 8)), (59, Fraction(1, 4)), (57, Fraction(1, 1)), (62, Fraction(1, 8)), (57, Fraction(1, 8)), (62, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (60, Fraction(1, 4)), (59, Fraction(1, 1)), (64, Fraction(3, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 4)), (63, Fraction(1, 8)), (62, Fraction(3, 4)), (60, Fraction(1, 4)), (59, Fraction(1, 16)), (57, Fraction(1, 16)), (55, Fraction(1, 16)), (57, Fraction(13, 16)), (52, Fraction(1, 1)), (60, Fraction(1, 8)), (55, Fraction(1, 8)), (60, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (60, Fraction(1, 8)), (59, Fraction(1, 4)), (57, Fraction(1, 1)), (62, Fraction(3, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (60, Fraction(1, 4)), (59, Fraction(1, 1)), (64, Fraction(3, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 4)), (63, Fraction(1, 8)), (62,

In [21]:
import random
sample_list = random.sample(new_ns_ks1_ts44, 500)
print(ave_compute_bleu(new_ns_ks1_ts44[1265], sample_list))
print(std_compute_bleu(new_ns_ks1_ts44[1265], sample_list))

(3.157255573696915, [0.05698181818181822, 0.018385321100917354, 0.011685185185185198, 0.011383177570093389])
(1.0, [1.0, 1.0, 1.0, 1.0])


In [23]:
zjl = []
non_zjl = []
for i in range(0, len(ns_ks1_ts44)):
    if "周杰伦" in ns_ks1_ts44[i].filename:
        zjl.append(i)
    else:
        non_zjl.append(i)
        
print(len(zjl))
print(len(non_zjl))
        

87
2049


In [69]:
zjl3 = []
non_zjl3 = []
for i in range(0, len(ns_ts44)):
    if "周杰伦" in ns_ts44[i].filename:
        zjl3.append(i)
    else:
        non_zjl3.append(i)
        
print(len(zjl3))
print(len(non_zjl3))

92
2228


In [26]:
scale = [(60, Fraction(1, 8)),(62, Fraction(1, 8)),(64, Fraction(1, 8)),(65, Fraction(1, 8)),(67, Fraction(1, 8)),(69, Fraction(1, 8)),(71, Fraction(1, 8)),(72, Fraction(1, 8))]

silly_eighth = ([scale[i] for i in range(0, 8)] + [i for i in reversed(scale)]) * 16
silly_fourth = ([(scale[i][0], scale[i][1]*2) for i in range(0, 8)] + [(i[0], i[1]*2) for i in reversed(scale)]) * 8

print(silly_eighth)
print(silly_fourth)

[(60, Fraction(1, 8)), (62, Fraction(1, 8)), (64, Fraction(1, 8)), (65, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (71, Fraction(1, 8)), (72, Fraction(1, 8)), (72, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (60, Fraction(1, 8)), (60, Fraction(1, 8)), (62, Fraction(1, 8)), (64, Fraction(1, 8)), (65, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (71, Fraction(1, 8)), (72, Fraction(1, 8)), (72, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (60, Fraction(1, 8)), (60, Fraction(1, 8)), (62, Fraction(1, 8)), (64, Fraction(1, 8)), (65, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (71, Fraction(1, 8)), (72, Fraction(1, 8)), (72, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (64, Frac

In [70]:
silly3 = []
for lastnote, note in zip(silly_eighth[:-1], silly_eighth[1:]):
    silly3.append((note[0]-lastnote[0], note[1]/lastnote[1]))
print(silly3)

[(2, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (0, Fraction(1, 1)), (-1, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction(1, 1)), (-1, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction(1, 1)), (0, Fraction(1, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (0, Fraction(1, 1)), (-1, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction(1, 1)), (-1, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction(1, 1)), (0, Fraction(1, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (0, Fraction(1, 1)), (-1, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction(1, 1)), (-1, Fraction(1, 1)), (-2, Fraction(1, 1)), (-2, Fraction

In [32]:
sample_list_num = random.sample(non_zjl, 500)
sample_list = [new_ns_ks1_ts44[i] for i in sample_list_num]

print(std_compute_bleu(silly_eighth, sample_list))
print(ave_compute_bleu(silly_eighth, sample_list))
print(std_compute_bleu(silly_fourth, sample_list))
print(ave_compute_bleu(silly_fourth, sample_list))

res = []
for i in zjl:
    print(ns_ks1_ts44[i].filename)
    std = std_compute_bleu(new_ns_ks1_ts44[i], sample_list)
    print(std)
    ave = ave_compute_bleu(new_ns_ks1_ts44[i], sample_list)
    print(ave)
    res.append((ns_ks1_ts44[i].filename, std, ave))

(1.3564178210052689, [0.6875, 0.24705882352941178, 0.12992125984251968, 0.05138339920948617])
(3.7476122947607093, [0.035125, 0.00994509803921566, 0.005031496062992118, 0.004142292490118557])
(2.038258923435212, [0.5, 0.1968503937007874, 0.047619047619047616, 0.008])
(3.6521317795655612, [0.019, 0.009574803149606284, 0.008063492063492038, 0.008000000000000005])
260 - 周杰伦 - 七里香.xml
(1.731916786155157, [0.8467153284671532, 0.4117647058823529, 0.06666666666666667, 0.007462686567164179])
(3.4719802864281126, [0.04988321167883205, 0.01025000000000004, 0.007570370370370401, 0.007462686567164221])
1058 - 周杰伦 - 半岛铁盒.xml
(1.5039167151879098, [0.8548387096774194, 0.3902439024390244, 0.06557377049180328, 0.024793388429752067])
(3.4617456610915127, [0.043145161290322524, 0.010097560975609755, 0.008377049180327795, 0.008330578512396766])
874 - 周杰伦 - 爱在西元前.xml
(1.0833864467335772, [0.8373983739837398, 0.5, 0.18181818181818182, 0.058333333333333334])
(3.2387396377617264, [0.0685040650406502, 0.016950

(0.9082374559397537, [0.7896995708154506, 0.5517241379310345, 0.22510822510822512, 0.10869565217391304])
(3.5493908141364425, [0.048635193133047226, 0.01334482758620695, 0.006251082251082277, 0.004834782608695667])
82 - 周杰伦 - 红尘客栈.xml
(0.9856871846812311, [0.8959537572254336, 0.5930232558139535, 0.2573099415204678, 0.052941176470588235])
(3.474152645757962, [0.052624277456647474, 0.013430232558139438, 0.006725146198830442, 0.0060117647058823345])
830 - 周杰伦 - 甜甜的.xml
(1.3375113848678752, [0.8647058823529412, 0.39644970414201186, 0.10119047619047619, 0.03592814371257485])
(3.5744053530162416, [0.04536470588235306, 0.00994082840236684, 0.006321428571428552, 0.006071856287425115])
1153 - 周杰伦 - 心雨.xml
(1.3088670262477498, [0.875968992248062, 0.4609375, 0.14960629921259844, 0.023809523809523808])
(3.3746280968312554, [0.05027906976744145, 0.013765625, 0.00848818897637794, 0.007999999999999974])
1020 - 周杰伦 - 兰亭序.xml
(0.6867430659816551, [0.9917355371900827, 0.7416666666666667, 0.3697478991596

(3.346938110030049, [0.04867826086956525, 0.013543859649122804, 0.009115044247787555, 0.008982142857142904])
421 - 周杰伦 - 止战之殇.xml
(0.9774521732453274, [0.8636363636363636, 0.45871559633027525, 0.2037037037037037, 0.09345794392523364])
(3.279240177394204, [0.04954545454545461, 0.015266055045871526, 0.010481481481481475, 0.009551401869158852])
1072 - 周杰伦 - 你好吗.xml
(1.1375010205230591, [0.8865979381443299, 0.4375, 0.1368421052631579, 0.06382978723404255])
(3.1273557874140576, [0.06610309278350501, 0.01900000000000002, 0.01187368421052621, 0.01085106382978715])
906 - 周杰伦 - 雨下一整晚.xml
(2.26007496331582, [0.6631578947368421, 0.1595744680851064, 0.010752688172043012, 0.010869565217391304])
(3.4594615667048076, [0.023599999999999805, 0.011148936170212684, 0.010752688172043003, 0.010869565217391325])
1180 - 周杰伦 - 天涯过客.xml
(1.3062626096842875, [0.8527131782945736, 0.546875, 0.1968503937007874, 0.015873015873015872])
(3.272052099994349, [0.06114728682170509, 0.017703125, 0.009118110236220459, 0.00

In [50]:

sample_list2 = [new2_ns_ks1_ts44[i] for i in sample_list_num]

std_eighth2 = std_compute_bleu(silly_eighth, sample_list2)
ave_eighth2 = ave_compute_bleu(silly_eighth, sample_list2)
std_fourth2 = std_compute_bleu(silly_fourth, sample_list2)
ave_fourth2 = ave_compute_bleu(silly_fourth, sample_list2)

print(std_eighth2)
print(ave_eighth2)
print(std_fourth2)
print(ave_fourth2)



for i in zjl:
    print(ns_ks1_ts44[i].filename)
    std = std_compute_bleu(new2_ns_ks1_ts44[i], sample_list2)
    print(std)
    ave = ave_compute_bleu(new2_ns_ks1_ts44[i], sample_list2)
    print(ave)
    res2.append((ns_ks1_ts44[i].filename, std, ave))

(0.8913130128703427, [0.8671875, 0.4549019607843137, 0.24803149606299213, 0.11857707509881422])
(3.4049776836552033, [0.0686328125, 0.01785882352941172, 0.007173228346456682, 0.004592885375494051])
(1.7544824132926038, [0.59375, 0.2283464566929134, 0.07142857142857142, 0.016])
(3.6713370256633113, [0.01740625, 0.009401574803149591, 0.008126984126984102, 0.008016000000000006])
260 - 周杰伦 - 七里香.xml
(0.7312832917403622, [0.9482758620689655, 0.7662337662337663, 0.3391304347826087, 0.10480349344978165])
(3.306696645074969, [0.09591379310344822, 0.021419913419913443, 0.006800000000000042, 0.004724890829694271])
1058 - 周杰伦 - 半岛铁盒.xml
(0.6659193026886749, [0.9815668202764977, 0.7685185185185185, 0.39069767441860465, 0.12149532710280374])
(3.1888556160253385, [0.10927188940092142, 0.025351851851851914, 0.00832558139534888, 0.005158878504672872])
874 - 周杰伦 - 爱在西元前.xml
(0.3676330097954712, [0.948, 0.8313253012048193, 0.6008064516129032, 0.3360323886639676])
(3.149397834533251, [0.09121599999999984

(0.5346884221742856, [0.8394648829431438, 0.6677852348993288, 0.4612794612794613, 0.2668918918918919])
(3.192249775549037, [0.08513712374581937, 0.027134228187919423, 0.009952861952862001, 0.00508783783783783])
82 - 周杰伦 - 红尘客栈.xml
(0.7244062634968036, [0.9077669902912622, 0.697560975609756, 0.37254901960784315, 0.11330049261083744])
(3.290818923797466, [0.0818543689320389, 0.021756097560975494, 0.007627450980392134, 0.005261083743842424])
830 - 周杰伦 - 甜甜的.xml
(0.7188095711217155, [0.9352226720647774, 0.7479674796747967, 0.3306122448979592, 0.11885245901639344])
(3.339235968922823, [0.08652631578947358, 0.020934959349593413, 0.00673469387755106, 0.004598360655737668])
1153 - 周杰伦 - 心雨.xml
(0.7426114099319553, [0.9408602150537635, 0.7351351351351352, 0.358695652173913, 0.09836065573770492])
(3.3171028227712682, [0.0766881720430108, 0.019145945945945958, 0.007423913043478287, 0.005748633879781363])
1020 - 周杰伦 - 兰亭序.xml
(0.5460352961792383, [0.9793103448275862, 0.8194444444444444, 0.46153846

(0.4609256107691038, [0.8235294117647058, 0.7527675276752768, 0.5481481481481482, 0.2936802973977695])
(3.3748982635698934, [0.06767647058823531, 0.01899630996309965, 0.0075777777777777285, 0.004817843866170954])
1072 - 周杰伦 - 你好吗.xml
(0.5072646555782976, [0.9712230215827338, 0.7536231884057971, 0.4744525547445255, 0.22794117647058823])
(2.914837941992878, [0.12182733812949642, 0.03556521739130421, 0.012627737226277433, 0.008558823529411763])
906 - 周杰伦 - 雨下一整晚.xml
(1.1583059369020812, [0.8313253012048193, 0.45454545454545453, 0.16463414634146342, 0.049079754601226995])
(3.6162250642916924, [0.03178313253012034, 0.010169696969696893, 0.006902439024390158, 0.006294478527607334])
1180 - 周杰伦 - 天涯过客.xml
(0.5655417004968382, [0.9639175257731959, 0.8082901554404145, 0.453125, 0.16753926701570682])
(3.15144741711602, [0.09322680412371166, 0.026746113989637377, 0.009656249999999993, 0.005958115183246102])
508 - 周杰伦 - 完美主义.xml
(0.9406528697830703, [0.8617021276595744, 0.6268656716417911, 0.279914

In [254]:
res2 = compute_multi_bleu([("ruanruan", random_ns),("eighth", silly_eighth), ("fourth",silly_fourth)] + [(ns_ks1_ts44[i].filename, new2_ns_ks1_ts44[i]) for i in zjl], sample_list2)

ruanruan
(0.23804918481384385, [0.9696969696969697, 0.9739130434782609, 0.7729257641921398, 0.4166666666666667])
(2.652181669855677, [0.1832813852813854, 0.07146956521739119, 0.019353711790392938, 0.006868421052631585])
eighth
(0.8177021131691821, [0.890625, 0.49411764705882355, 0.28346456692913385, 0.13438735177865613])
(3.1330917780160137, [0.1147421875, 0.02978039215686266, 0.009511811023622033, 0.0048379446640316])
fourth
(1.528907721135146, [0.7421875, 0.33858267716535434, 0.11904761904761904, 0.016])
(3.5612428266504317, [0.02546875, 0.010866141732283447, 0.008333333333333307, 0.008016000000000006])
260 - 周杰伦 - 七里香.xml
(0.4997550727648705, [0.9741379310344828, 0.8441558441558441, 0.48695652173913045, 0.2052401746724891])
(2.968104623290147, [0.181758620689655, 0.038978354978354966, 0.00971304347826091, 0.005213973799126584])
1058 - 周杰伦 - 半岛铁盒.xml
(0.5765785772144965, [0.9815668202764977, 0.8379629629629629, 0.4697674418604651, 0.14485981308411214])
(2.9318264556296842, [0.1750783

(0.2998265666809436, [0.9963636363636363, 0.8905109489051095, 0.652014652014652, 0.3860294117647059])
(2.9684209421795797, [0.14893818181818194, 0.03748175182481745, 0.011589743589743559, 0.005536764705882392])
599 - 周杰伦 - 自导自演.xml
(0.4076900389659633, [0.8862876254180602, 0.7449664429530202, 0.5723905723905723, 0.34459459459459457])
(2.8533652798188975, [0.1401137123745819, 0.046046979865771775, 0.015043771043771095, 0.0065608108108108154])
82 - 周杰伦 - 红尘客栈.xml
(0.4821656723883344, [0.9514563106796117, 0.824390243902439, 0.5049019607843137, 0.22660098522167488])
(2.933316647940253, [0.15310679611650505, 0.042204878048780196, 0.011196078431372516, 0.0059014778325123755])
830 - 周杰伦 - 甜甜的.xml
(0.5960586912152475, [0.9635627530364372, 0.8048780487804879, 0.4204081632653061, 0.1557377049180328])
(3.0609203454000307, [0.14876113360323878, 0.034837398373983655, 0.008865306122448988, 0.004909836065573733])
1153 - 周杰伦 - 心雨.xml
(0.5021519059025911, [0.989247311827957, 0.8648648648648649, 0.51086

(0.5430603339900184, [0.9712643678160919, 0.7687861271676301, 0.45930232558139533, 0.19298245614035087])
(2.880389565070545, [0.16518390804597702, 0.038971098265896044, 0.012523255813953398, 0.006900584795321678])
421 - 周杰伦 - 止战之殇.xml
(0.39839425898367936, [0.8419117647058824, 0.7896678966789668, 0.6, 0.3420074349442379])
(3.0593031831910684, [0.11945588235294129, 0.032199261992619965, 0.010518518518518503, 0.005620817843866119])
1072 - 周杰伦 - 你好吗.xml
(0.4007533834312207, [0.9784172661870504, 0.8333333333333334, 0.5766423357664233, 0.2867647058823529])
(2.66881679703805, [0.20241726618705044, 0.055289855072463624, 0.01557664233576651, 0.009191176470588263])
906 - 周杰伦 - 雨下一整晚.xml
(0.5149371103522498, [0.9578313253012049, 0.8363636363636363, 0.5, 0.1901840490797546])
(3.0068721885554304, [0.13778313253012056, 0.030872727272727198, 0.009865853658536483, 0.007042944785276043])
1180 - 周杰伦 - 天涯过客.xml
(0.4235210355851551, [0.9845360824742269, 0.8808290155440415, 0.5520833333333334, 0.251308900

In [273]:
res2 += compute_multi_bleu([("ruanruan2", random_ns[:231])], sample_list2)
get_brief_ranks(res2)

ruanruan2
(0.38814762808154935, [0.8744588744588745, 0.8695652173913043, 0.6331877729257642, 0.2982456140350877])
(2.7767097664892892, [0.17010389610389615, 0.059608695652173846, 0.015423580786026141, 0.005973684210526314])
1: 622 - 周杰伦 - 开不了口.xml
2: 38 - 周杰伦 - 浪漫手机.xml
3: ruanruan
4: 1207 - 周杰伦 - 星晴.xml
5: 569 - 梦想启动 - 周杰伦.xml
6: 167 - 周杰伦 - 简单爱.xml
7: 150 - 周杰伦 - 回到过去.xml
8: 402 - 周杰伦 - 大笨钟.xml
9: 874 - 周杰伦 - 爱在西元前.xml
10: 477 - 周杰伦 - 说了再见.xml
11: 1083 - 周杰伦 - 蒲公英的约定.xml
12: 1019 - 周杰伦 - 我落泪 情绪零碎.xml
13: 625 - 周杰伦 - 暗号.xml
14: 44 - 周杰伦 - 说好的幸福呢.xml
15: 1159 - 周杰伦 - 彩虹.xml
16: 367 - 周杰伦 - 夜曲.xml
17: 1252 - 周杰伦 - 分裂.xml
18: 22 - 周杰伦 - 晴天.xml
19: 733 - 周杰伦 - 双刀.xml
20: 108 - 周杰伦 - 超人不会飞.xml
21: 768 - 周杰伦 - 我不配.xml
22: 222 - 周杰伦 - 时光机.xml
23: 1174 - 周杰伦 - 世界末日.xml
24: 1165 - 周杰伦 - 超跑女神.xml
25: 181 - 周杰伦,费玉清 - 千里之外.xml
26: 945 - 周杰伦 - 一点点.xml
27: 1020 - 周杰伦 - 兰亭序.xml
28: 200 - 周杰伦 - 算什么男人.xml
29: 1119 - 周杰伦 - 四面楚歌.xml
30: ruanruan2
31: 385 - 周杰伦 - 麦芽糖.xml
32: 421 - 周杰伦 - 止战之殇.xml
33: 1072

In [270]:
import copy
for i in range(0, len(ns_ks1_ts44)):
    if('烟花易冷' in ns_ks1_ts44[i].filename):
        #print(ns_ks1_ts44[i])
        new_test_candidate_sjmr = copy.deepcopy(new2_ns_ks1_ts44[i])
        break
        
print(new_test_candidate_sjmr)

[(0, Fraction(1, 2)), (0, Fraction(1, 8)), (64, Fraction(1, 8)), (64, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (72, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 4)), (69, Fraction(1, 8)), (64, Fraction(3, 8)), (64, Fraction(1, 8)), (64, Fraction(1, 8)), (64, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (72, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8)), (69, Fraction(1, 8)), (64, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(3, 8)), (62, Fraction(1, 8)), (69, Fraction(1, 8)), (74, Fraction(1, 4)), (69, Fraction(1, 8)), (74, Fraction(3, 8)), (74, Fraction(1, 8)), (69, Fraction(1, 8)), (74, Fraction(3, 16)), (69, Fraction(1, 16)), (71, Fraction(1, 4)), (64, Fraction(1, 4)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (64, Fraction(1, 8)), (62, Fraction(1, 8)), (67, Fraction(3, 8)), (62, Fraction(1, 8)), (67, Frac

In [271]:
print(len(new_test_candidate_sjmr))
cur = 0
for i in range(0, len(new_test_candidate_sjmr)):
    x = new_test_candidate_sjmr[i]
    if(x[0] > 0):
        new_test_candidate_sjmr[i] = (random_ns[cur][0], x[1])
        cur += 1

print(new_test_candidate_sjmr)

184
[(0, Fraction(1, 2)), (0, Fraction(1, 8)), (60, Fraction(1, 8)), (64, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (69, Fraction(1, 8)), (65, Fraction(1, 8)), (67, Fraction(1, 8)), (64, Fraction(1, 8)), (65, Fraction(1, 8)), (62, Fraction(1, 4)), (65, Fraction(1, 8)), (62, Fraction(3, 8)), (65, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (71, Fraction(1, 8)), (71, Fraction(1, 8)), (71, Fraction(1, 8)), (74, Fraction(1, 8)), (72, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(3, 8)), (62, Fraction(1, 8)), (65, Fraction(1, 8)), (69, Fraction(1, 4)), (69, Fraction(1, 8)), (67, Fraction(3, 8)), (69, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(3, 16)), (65, Fraction(1, 16)), (69, Fraction(1, 4)), (71, Fraction(1, 4)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (65, Fraction(1, 8)), (65, Fraction(3, 8)), (69, Fraction(1, 8)), (67, 

In [247]:
for i in range(0, len(ns_ks1_ts44)):
    if((ns_ks1_ts44[i].notes[0].pitch < 60 or ns_ks1_ts44[i].notes[0].pitch >= 72) and ns_ks1_ts44[i].notes[0].pitch > 0):
        print(ns_ks1_ts44[i].filename)
        modify = 0
        while(ns_ks1_ts44[i].notes[0].pitch + modify < 60):
                modify += 12
        while(ns_ks1_ts44[i].notes[0].pitch + modify >= 72):
                modify -= 12

        print(ns_ks1_ts44[i].notes[0].pitch + modify)

1809 - V.A.xml
67
933 - 林俊杰 - 2infinity And Beyond.xml
69
1550 - 坂本真綾 - やさしさに包まれたなら.xml
60
1642 - 桃井はるこ - ライブのあとはさみしいな.xml
71
954 - 张惠妹 - 我要飞.xml
67
117 - 范玮琪,郭静,张韶涵 - 微笑的起点.xml
64
1777 - 田村ゆかり - 宵待ちの花.xml
71
2253 - 莫文蔚 - 看看.xml
64
264 - 范玮琪,刘若英 - 不能跟情人说的话.xml
60
1795 - THE ROLLING GIRLS - 夕暮れ.xml
60
925 - 范玮琪 - 可不可以不勇敢.xml
62
1283 - 高鈴 - 爱してる.xml
64
2326 - Tank - 蟑螂小强.xml
69
521 - 萧煌奇 - 下个街角.xml
64
1513 - AKB48 - 桜の栞.xml
67
1679 - 茶太 - 水平线まで何マイル.xml
60
2367 - 莫文蔚 - 午夜前的十分钟.xml
62
58 - 孙燕姿 - Honey Honey.xml
69
2017 - 戸松遥,榎本夏樹 - 病名恋ワズライ.xml
60
852 - 林俊杰 - 就是我.xml
60
1368 - AKB48 - 桜の花びらたち.xml
60
731 - 李圣杰 - 祝你幸福.xml
67
195 - 五月天 - 轻功.xml
60
2203 - 金志文 - SuperBoy.xml
60
942 - 张靓颖 - Bazaar.xml
62
2054 - 森永真由美 - 色は匂へど散りぬるを.xml
69
1493 - AKB48 - LOVE TRIP.xml
60
936 - 萧煌奇 - 大富翁.xml
60
713 - 范玮琪 - 诚实.xml
67
2728 - 杨丞琳 - 下一次微笑.xml
64
1815 - 川嶋あい - 雪に咲く花.xml
64
1783 - Akie秋绘 - なんでもないや 没什么大不了的（Cover：Radwimps）.xml
64
940 - 林俊杰 - Always Online.xml
60
808 - 张杰 - 何必在一起.xml
64
1851 - 田村麻奈実 - あ

In [235]:
print(random_ns)

[(2, Fraction(1, 1)), (-2, Fraction(1, 1)), (4, Fraction(1, 1)), (0, Fraction(1, 1)), (0, Fraction(1, 1)), (0, Fraction(1, 1)), (1, Fraction(1, 1)), (0, Fraction(1, 1)), (-3, Fraction(1, 1)), (3, Fraction(1, 1)), (-3, Fraction(1, 1)), (-2, Fraction(1, 1)), (-3, Fraction(1, 1)), (0, Fraction(1, 1)), (2, Fraction(1, 1)), (3, Fraction(1, 1)), (-2, Fraction(1, 1)), (2, Fraction(1, 1)), (3, Fraction(1, 1)), (2, Fraction(1, 1)), (0, Fraction(1, 1)), (-3, Fraction(1, 1)), (-2, Fraction(1, 1)), (0, Fraction(1, 1)), (0, Fraction(1, 1)), (2, Fraction(1, 1)), (1, Fraction(1, 1)), (4, Fraction(1, 1)), (0, Fraction(1, 1)), (-4, Fraction(1, 1)), (0, Fraction(1, 1)), (0, Fraction(1, 1)), (-3, Fraction(1, 1)), (0, Fraction(1, 1)), (3, Fraction(1, 1)), (0, Fraction(1, 1)), (4, Fraction(1, 1)), (0, Fraction(1, 1)), (-2, Fraction(1, 1)), (0, Fraction(1, 1)), (0, Fraction(1, 1)), (2, Fraction(1, 1)), (3, Fraction(1, 1)), (2, Fraction(1, 1)), (0, Fraction(1, 1)), (-3, Fraction(1, 1)), (-4, Fraction(1, 1)),

In [88]:
res2 = compute_multi_bleu([("scale in eighth note", silly_eighth),("scale in fourth note", silly_fourth)] + [(ns_ks1_ts44[i].filename, new2_ns_ks1_ts44[i]) for i in zjl], sample_list2)

scale in eighth note
(0.8913130128703427, [0.8671875, 0.4549019607843137, 0.24803149606299213, 0.11857707509881422])
(3.4049776836552033, [0.0686328125, 0.01785882352941172, 0.007173228346456682, 0.004592885375494051])
scale in fourth note
(1.7544824132926038, [0.59375, 0.2283464566929134, 0.07142857142857142, 0.016])
(3.6713370256633113, [0.01740625, 0.009401574803149591, 0.008126984126984102, 0.008016000000000006])
260 - 周杰伦 - 七里香.xml
(0.7312832917403622, [0.9482758620689655, 0.7662337662337663, 0.3391304347826087, 0.10480349344978165])
(3.306696645074969, [0.09591379310344822, 0.021419913419913443, 0.006800000000000042, 0.004724890829694271])
1058 - 周杰伦 - 半岛铁盒.xml
(0.6659193026886749, [0.9815668202764977, 0.7685185185185185, 0.39069767441860465, 0.12149532710280374])
(3.1888556160253385, [0.10927188940092142, 0.025351851851851914, 0.00832558139534888, 0.005158878504672872])
874 - 周杰伦 - 爱在西元前.xml
(0.3676330097954712, [0.948, 0.8313253012048193, 0.6008064516129032, 0.3360323886639676]

KeyboardInterrupt: 

In [144]:
sample_list_num3 = random.sample(non_zjl3, 500)
sample_list3 = [new3_ns_ts44[i] for i in sample_list_num3]



In [150]:
sample_num3_set = set(sample_list_num3)
print(len(sample_num3_set))

500


In [151]:
candidates_non_sample3 = []
for i in non_zjl3:
    if not i in sample_num3_set:
        candidates_non_sample3.append(i)
print(len(candidates_non_sample3))

1728


In [152]:
candidates_non_sample3 = random.sample(candidates_non_sample3, 200)
print(len(candidates_non_sample3))

200


In [206]:
res3_expand = compute_multi_bleu([("silly scale", silly3), ("random", random_ns)]+[(ns_ts44[i].filename, new3_ns_ts44[i]) for i in candidates_non_sample3] + [(ns_ts44[i].filename, new3_ns_ts44[i]) for i in zjl3], sample_list3, minn = 2, maxn = 4, smooth = False, advanced = False)

silly scale
(0.9022879460124946, [0.43700787401574803, 0.31225296442687744, 0.1984126984126984])
(3.388155017191732, [0.05559055118110253, 0.01120948616600792, 0.0020873015873015843])
random
(0.667903585909204, [0.7816593886462883, 0.4780701754385965, 0.18502202643171806])
(2.9925593403852537, [0.12602620087336247, 0.021350877192982513, 0.002352422907488992])
1468 - AKB48 - あの日の自分.xml
(0.2399322702429099, [0.990909090909091, 0.8348623853211009, 0.46296296296296297])
(2.612074130905289, [0.17492727272727251, 0.03174311926605496, 0.005222222222222216])
904 - 梁静茹 - 无解.xml
(0.48533074626210176, [0.8636363636363636, 0.6285714285714286, 0.26436781609195403])
(3.2264040998610675, [0.10205681818181817, 0.013931428571428577, 0.001747126436781611])
1720 - MAKE-UP,山田信夫 - ペガサス幻想.xml
(0.2779927695457582, [0.9502762430939227, 0.7555555555555555, 0.4581005586592179])
(2.744044758061823, [0.15662983425414362, 0.02857777777777791, 0.0038212290502793386])
2083 - 茶太 - 夕日.xml
(0.3230854886940066, [0.985

(2.930357950356253, [0.13922881355932218, 0.020672340425531892, 0.0028205128205128164])
63 - 范玮琪 - 你在哪里.xml
(0.49623949901079994, [0.8552036199095022, 0.5863636363636363, 0.273972602739726])
(3.0388441909665165, [0.12820814479638054, 0.01782727272727275, 0.0023013698630136967])
1220 - 陈绮贞,五月天 - 私奔到月球.xml
(0.4189392481678907, [0.9634146341463414, 0.6993865030674846, 0.2777777777777778])
(3.1475975526898976, [0.12498780487804871, 0.014907975460122742, 0.001827160493827162])
2700 - 张韶涵 - 我恋爱了.xml
(0.7107069775972694, [0.8217821782178217, 0.472636815920398, 0.15])
(3.655233805048111, [0.07577227722772259, 0.007970149253731362, 0.0007400000000000004])
1943 - 東山奈央 - fragile.xml
(0.4363235498430208, [0.8811881188118812, 0.6716417910447762, 0.295])
(2.814669892502236, [0.1459801980198021, 0.025383084577114565, 0.003479999999999989])
2723 - 胡彦斌Tiger Hu - Waiting For You.xml
(0.22372679698583964, [0.9565217391304348, 0.8360655737704918, 0.510989010989011])
(2.557442700846104, [0.1697391304347827

(0.43371793414622567, [0.9395973154362416, 0.6418918918918919, 0.2925170068027211])
(3.151914257324027, [0.10492617449664421, 0.013243243243243231, 0.0024081632653061243])
1033 - 李健 - 我愿人长久.xml
(0.4026307013518347, [0.864516129032258, 0.6428571428571429, 0.35947712418300654])
(3.0501428504409125, [0.1220129032258065, 0.019701298701298695, 0.0020915032679738542])
933 - 林俊杰 - 2infinity And Beyond.xml
(0.34780488560073747, [0.9679144385026738, 0.7204301075268817, 0.3567567567567568])
(2.8129439229011393, [0.1604064171122994, 0.022967741935484016, 0.0035243243243243136])
650 - 王力宏 - 爱的就是你.xml
(0.2868523112155865, [0.9490740740740741, 0.7534883720930232, 0.4439252336448598])
(2.6340536075612757, [0.15733333333333321, 0.03295813953488374, 0.005121495327102795])
1696 - JUJU - どんなに遠くても... ~Game Ending ver.xml
(0.5209784261032321, [0.8285714285714286, 0.5614754098360656, 0.2674897119341564])
(2.916853257520511, [0.11946938775510217, 0.02125409836065578, 0.0033744855967078238])
1523 - 久石譲 - 崖の上

(0.29702291174909606, [0.8956043956043956, 0.7292817679558011, 0.4666666666666667])
(2.7186395158172836, [0.14798901098901132, 0.02767955801104977, 0.004622222222222213])
946 - 范玮琪 - 我们之间的事.xml
(0.5603981140855617, [0.7823834196891192, 0.4895833333333333, 0.2774869109947644])
(2.704611637101433, [0.11913989637305654, 0.02618750000000001, 0.006418848167539267])
841 - 孙燕姿 - 全心全意.xml
(0.46791401854778164, [0.940677966101695, 0.6324786324786325, 0.25862068965517243])
(3.119557962656418, [0.13199999999999992, 0.01673504273504269, 0.0017241379310344806])
1682 - つじあやの - 月が泣いてる.xml
(0.18352369323250162, [0.9746835443037974, 0.8535031847133758, 0.5769230769230769])
(2.2618596517330247, [0.19086075949367065, 0.04766878980891724, 0.012935897435897402])
1950 - 田村ゆかり-日高里菜-内田真礼 - あなただけのメゾンドクチュール.xml
(0.44195956550514154, [0.8741935483870967, 0.6537216828478964, 0.2987012987012987])
(2.9631055994758855, [0.13538064516129028, 0.020932038834951476, 0.002512987012987014])
1908 - 美郷あき - Unusual Days

(2.144837486391671, [0.20715199999999992, 0.060322580645161133, 0.015040650406504057])
1147 - 范玮琪 - 我们的纪念日.xml
(0.40844983946524993, [0.9458128078817734, 0.6584158415841584, 0.31343283582089554])
(3.0716974745553562, [0.13691625615763586, 0.019019801980197898, 0.0017711442786069623])
1798 - azusa - 真夏のフォトグラフ.xml
(0.4643757940166616, [0.9430051813471503, 0.6197916666666666, 0.2670157068062827])
(3.071641660518045, [0.12641450777202023, 0.017781250000000005, 0.002052356020942404])
2242 - 汪峰 - 青春.xml
(0.5030000652210759, [0.8023255813953488, 0.5555555555555556, 0.3])
(2.63020187699972, [0.10390697674418603, 0.028140350877193066, 0.009223529411764695])
2386 - 王心凌 - BFF.xml
(0.20050068235667828, [0.966183574879227, 0.8495145631067961, 0.5463414634146342])
(2.3528844981757855, [0.18070531400966186, 0.04597087378640785, 0.009843902439024397])
439 - 张靓颖 - 转眼一生转身一世.xml
(0.6795617169280646, [0.75, 0.43349753694581283, 0.20297029702970298])
(3.381421172562979, [0.09271568627450984, 0.01193103448

(0.27871815553746754, [0.9547325102880658, 0.78099173553719, 0.43983402489626555])
(2.5333277899994466, [0.15988477366255147, 0.035355371900826445, 0.007029045643153511])
1083 - 周杰伦 - 蒲公英的约定.xml
(0.24982884057509724, [0.9716312056737588, 0.7214285714285714, 0.5251798561151079])
(2.331704017985696, [0.17502127659574357, 0.044014285714285804, 0.011553956834532348])
254 - 周杰伦,杨瑞代 - 我要夏天.xml
(0.267044444801118, [0.9431818181818182, 0.7371428571428571, 0.4942528735632184])
(2.405030847009968, [0.15782954545454542, 0.038925714285714244, 0.010804597701149412])
132 - 周杰伦 - 乌克丽丽.xml
(0.39436473059518545, [0.88, 0.6733668341708543, 0.3484848484848485])
(2.686696090287521, [0.15521999999999989, 0.033386934673366744, 0.004151515151515148])
335 - 周杰伦,张惠妹 - 不该(with aMEI).xml
(0.31126037748138596, [0.8978102189781022, 0.6923076923076923, 0.4632352941176471])
(2.557240891192946, [0.14384671532846705, 0.03465934065934056, 0.007242647058823546])
929 - 周杰伦 - 迷迭香.xml
(0.438122576872328, [0.93717277486911,

(2.5380486491402343, [0.16308965517241375, 0.03750173010380623, 0.006375000000000021])
22 - 周杰伦 - 晴天.xml
(0.2690969472674643, [0.946969696969697, 0.7604562737642585, 0.4732824427480916])
(2.3827603345045, [0.16206818181818183, 0.041422053231939135, 0.010809160305343528])
345 - 周杰伦 - 最长的电影.xml
(0.370591035924961, [0.9010989010989011, 0.6574585635359116, 0.38333333333333336])
(2.855480823366122, [0.13250549450549473, 0.022475138121546973, 0.003677777777777771])
220 - 周杰伦 - 反方向的钟.xml
(0.46558302996863127, [0.8846153846153846, 0.574585635359116, 0.3055555555555556])
(3.0584159881290764, [0.12481318681318708, 0.01608839779005528, 0.002422222222222218])
285 - 周杰伦 - 蜗牛.xml
(0.39314816904286115, [0.9347826086956522, 0.6861313868613139, 0.3235294117647059])
(3.031634870292107, [0.128072463768116, 0.019036496350365, 0.0022205882352941135])
811 - 周杰伦 - 告白气球.xml
(0.3043716308731647, [0.9333333333333333, 0.7449664429530202, 0.42567567567567566])
(2.679161522225925, [0.17213333333333328, 0.028201342

In [211]:
get_brief_ranks(res3_expand)

1: 1125 - 梁静茹 - 美丽人生.xml
2: 622 - 周杰伦 - 开不了口.xml
3: 95 - 徐良,吴昕 - 星座恋人.xml
4: 169 - 梁静茹 - 丝路.xml
5: 1207 - 周杰伦 - 星晴.xml
6: 421 - 周杰伦 - 止战之殇.xml
7: 874 - 周杰伦 - 爱在西元前.xml
8: 108 - 周杰伦 - 超人不会飞.xml
9: 768 - 周杰伦 - 我不配.xml
10: 1602 - HIRO-X - future.xml
11: 2058 - 7!! - スタートライン (Acoustic Ver.xml
12: 167 - 周杰伦 - 简单爱.xml
13: 1682 - つじあやの - 月が泣いてる.xml
14: 625 - 周杰伦 - 暗号.xml
15: 441 - 周杰伦 - 最后的战役.xml
16: 150 - 周杰伦 - 回到过去.xml
17: 1565 - AKB48 - Better.xml
18: 2362 - 金莎 - 我知道我们不会有结果.xml
19: 2204 - 林宥嘉 - Runaway Mama.xml
20: 797 - 张杰 - 我想.xml
21: 2386 - 王心凌 - BFF.xml
22: 1228 - 王力宏 - 大城小爱.xml
23: 44 - 周杰伦 - 说好的幸福呢.xml
24: 1159 - 周杰伦 - 彩虹.xml
25: 1165 - 周杰伦 - 超跑女神.xml
26: 938 - 周杰伦,袁咏琳 - 傻笑.xml
27: 569 - 梦想启动 - 周杰伦.xml
28: 402 - 周杰伦 - 大笨钟.xml
29: 299 - 陶喆 - 二十二.xml
30: 177 - 许巍 - 温暖.xml
31: 634 - 周杰伦 - 枫.xml
32: 1740 - 石田耀子 - Otome no Policy.xml
33: 2723 - 胡彦斌Tiger Hu - Waiting For You.xml
34: 715 - 王力宏 - 柴米油盐酱醋茶.xml
35: 778 - 孙燕姿 - 180度.xml
36: 1262 - 许嵩-何曼婷 - 素颜.xml
37: 1694 - 戸松遥 - あなたの幸せに私がなれる

In [208]:
def get_full_ranks(myseq):

    myseq.sort(key = std_bleu)

    for i in range(0, len(myseq)):
        print(i+1)
        for m in myseq[i]:
            print(m)
        #print(str(i+1) + ': ' + myseq[i][0])
        print('--------')

    print('\n\n\n')

    myseq.sort(key = ave_bleu)
    for i in range(0, len(myseq)):
        print(i+1)
        for m in myseq[i]:
            print(m)
       # print(str(i+1)+ ': ' + myseq[i][0])
        print('========')



In [209]:
def get_brief_ranks(myseq):

    myseq.sort(key = std_bleu)

    for i in range(0, len(myseq)):
#         print(i+1)
#         for m in myseq[i]:
#             print(m)
         print(str(i+1) + ': ' + myseq[i][0])
#         print('--------')

    print('\n\n\n')

    myseq.sort(key = ave_bleu)
    for i in range(0, len(myseq)):
#         print(i+1)
#         for m in myseq[i]:
#             print(m)
         print(str(i+1)+ ': ' + myseq[i][0])
#         print('========')



In [155]:
res3 += res3_expand

In [100]:

res_silly3 = compute_multi_bleu([("silly scale", silly3)], sample_list3, minn = 1, maxn = 5)

silly scale
(1.1997198323744027, [0.703125, 0.4588235294117647, 0.35039370078740156, 0.1976284584980237, 0.1111111111111111])
(3.7679554507862694, [0.260859375, 0.06051764705882335, 0.015401574803149583, 0.00611857707509879, 0.004420634920634907])


In [146]:

res3 = compute_multi_bleu([(ns_ts44[i].filename, new3_ns_ts44[i]) for i in zjl3], sample_list3, minn = 1, maxn = 4, smooth = False, advanced = True)


260 - 周杰伦 - 七里香.xml
(0.7714295208233903, [1.0, 0.908695652173913, 0.6593886462882096, 0.3201754385964912, 0.11013215859030837])
(4.144426851374844, [0.45115151515151547, 0.13090434782608704, 0.019668122270742282, 0.0023859649122807015, 0.00036123348017621165])
1058 - 周杰伦 - 半岛铁盒.xml
(1.029501246519754, [1.0, 0.9116279069767442, 0.6635514018691588, 0.29107981220657275, 0.0330188679245283])
(4.06572173583115, [0.47600925925925947, 0.15075348837209307, 0.027093457943925144, 0.003370892018779346, 0.00022641509433962261])
874 - 周杰伦 - 爱在西元前.xml
(0.31594805882527766, [0.9839357429718876, 0.9314516129032258, 0.8502024291497976, 0.6747967479674797, 0.39183673469387753])
(3.1458161698434397, [0.4194618473895588, 0.16223387096774186, 0.04692307692307681, 0.012894308943089404, 0.003583673469387758])
1159 - 周杰伦 - 彩虹.xml
(0.46973799266299054, [1.0, 0.9817518248175182, 0.8644688644688645, 0.5257352941176471, 0.2140221402214022])
(3.3417304675460375, [0.4205963636363634, 0.16267883211678835, 0.04326739

(0.4857465041328645, [1.0, 0.9516908212560387, 0.6941747572815534, 0.45365853658536587, 0.29411764705882354])
(3.4552021470592313, [0.47554807692307716, 0.14806763285024144, 0.03268932038834956, 0.00710243902439025, 0.0019215686274509775])
200 - 周杰伦 - 算什么男人.xml
(0.5637423247583206, [0.9629629629629629, 0.9202127659574468, 0.732620320855615, 0.4731182795698925, 0.2810810810810811])
(3.5651531341151186, [0.4601904761904759, 0.13062765957446812, 0.027925133689839506, 0.007118279569892467, 0.002194594594594593])
477 - 周杰伦 - 说了再见.xml
(0.5215406803863806, [1.0, 0.9230769230769231, 0.7389705882352942, 0.4354243542435424, 0.24814814814814815])
(3.7458609629057635, [0.38795620437956146, 0.10272527472527507, 0.02105882352941182, 0.005114391143911439, 0.0017111111111111092])
599 - 周杰伦 - 自导自演.xml
(0.4503778165107334, [0.9865771812080537, 0.8787878787878788, 0.6858108108108109, 0.5254237288135594, 0.336734693877551])
(3.343267072956946, [0.3773892617449663, 0.13307070707070737, 0.03799324324324327,

(5.23440090267612, [0.4452044198895026, 0.08479999999999999, 0.009162011173184392, 0.0011123595505617989, 0.0002033898305084746])
938 - 周杰伦,袁咏琳 - 傻笑.xml
(0.601136920822019, [1.0, 0.9908256880733946, 0.8663594470046083, 0.5138888888888888, 0.1813953488372093])
(3.7841007516888623, [0.5004292237442924, 0.1701192660550457, 0.03136405529953916, 0.005194444444444441, 0.000706976744186046])
399 - 周杰伦 - 退后.xml
(0.7409236469746163, [0.994535519125683, 0.9010989010989011, 0.6795580110497238, 0.34444444444444444, 0.11731843575418995])
(4.0420438419022995, [0.5359781420765027, 0.1606703296703299, 0.0223425414364641, 0.002877777777777774, 0.0003016759776536312])
1244 - 周杰伦 - 搁浅.xml
(1.1940141818984584, [0.9871794871794872, 0.9096774193548387, 0.6298701298701299, 0.27450980392156865, 0.02631578947368421])
(4.616393802518009, [0.5316282051282053, 0.146374193548387, 0.018961038961038963, 0.00194771241830065, 5.263157894736842e-05])
707 - 周杰伦 - 伊斯坦堡.xml
(0.8790341124653384, [0.9947089947089947, 0.9095

In [108]:
res3 = compute_multi_bleu([("silly scale", silly3)]+[(ns_ts44[i].filename, new3_ns_ts44[i]) for i in zjl3], sample_list3, minn = 1, maxn = 5, smooth = False, advanced = True)

silly scale
(9.736212657286728, [0.7019607843137254, 0.4566929133858268, 0.34782608695652173, 0.19444444444444445, 0.10756972111553785])
(13.027093230036012, [0.257960784313726, 0.05681889763779551, 0.011509881422924922, 0.002174603174603171, 0.00045418326693227087])
260 - 周杰伦 - 七里香.xml
(0.7258737312629924, [1.0, 0.9391304347826087, 0.6943231441048034, 0.34210526315789475, 0.11894273127753303])
(4.114637295388938, [0.4543376623376624, 0.13339999999999994, 0.01971179039301304, 0.002508771929824561, 0.0003876651982378858])
1058 - 周杰伦 - 半岛铁盒.xml
(0.9335832950704541, [0.9907407407407407, 0.9116279069767442, 0.677570093457944, 0.29577464788732394, 0.05188679245283019])
(4.140575656417682, [0.4765648148148145, 0.15115348837209308, 0.02727102803738309, 0.0032394366197183097, 0.00016037735849056602])
874 - 周杰伦 - 爱在西元前.xml
(0.3046411315122958, [0.9799196787148594, 0.9314516129032258, 0.8259109311740891, 0.6747967479674797, 0.42857142857142855])
(3.1471262680925944, [0.4223453815261044, 0.162370

(3.629020636892884, [0.4617989417989421, 0.13184042553191494, 0.02796791443850258, 0.006344086021505372, 0.0017297297297297286])
477 - 周杰伦 - 说了再见.xml
(0.5007788384010079, [0.9963503649635036, 0.9120879120879121, 0.7683823529411765, 0.46494464944649444, 0.2518518518518518])
(3.7445833318664232, [0.389737226277372, 0.10263003663003697, 0.021588235294117707, 0.005276752767527668, 0.0016222222222222215])
599 - 周杰伦 - 自导自演.xml
(0.47113121352028303, [0.9865771812080537, 0.8383838383838383, 0.6959459459459459, 0.5152542372881356, 0.3197278911564626])
(3.3273428034015593, [0.38230872483221473, 0.13494276094276114, 0.038256756756756716, 0.01125423728813559, 0.0026802721088435353])
82 - 周杰伦 - 红尘客栈.xml
(0.8770876509995239, [0.9804878048780488, 0.8627450980392157, 0.5862068965517241, 0.297029702970297, 0.0845771144278607])
(4.401105241486638, [0.39750243902439025, 0.10703921568627459, 0.015832512315270993, 0.0018811881188118824, 0.00021890547263681595])
830 - 周杰伦 - 甜甜的.xml
(0.9025582103104124, [0.9

(3.9908845958613965, [0.5282116402116404, 0.15268085106382961, 0.027272727272727223, 0.003999999999999997, 0.0006594594594594595])
943 - 周杰伦 - 爱情悬崖.xml
(0.9254414593189082, [0.990990990990991, 0.8778280542986425, 0.6454545454545455, 0.3835616438356164, 0.14220183486238533])
(4.119135693608883, [0.44768468468468464, 0.13563800904977433, 0.023554545454545488, 0.0040456621004566155, 0.0006146788990825683])
181 - 周杰伦,费玉清 - 千里之外.xml
(0.7413023132598138, [0.992, 0.8835341365461847, 0.6451612903225806, 0.38461538461538464, 0.13008130081300814])
(4.065524314308093, [0.3675040000000006, 0.11803212851405621, 0.02274999999999995, 0.003384615384615382, 0.0005121951219512193])
733 - 周杰伦 - 双刀.xml
(0.5129322916666668, [1.0, 1.0, 1.0, 1.0, 1.0])
(3.527969483133579, [0.53748, 0.17358389261744953, 0.046472972972972905, 0.013088435374149634, 0.004999999999999995])
546 - 周杰伦 - 园游会.xml
(0.06599266055045855, [1.0, 1.0, 1.0, 1.0, 1.0])
(3.46224305783539, [0.4092796934865903, 0.14013076923076936, 0.0321158301

In [147]:
INF = 2147483647.0
std_min = ("none", INF)
ave_min = ("none", INF)
sum_std = 0.
sum_ave = 0.
sum_2_std = 0.
sum_2_ave = 0.
for (name, std, ave) in res3:
    if(std[0] < std_min[1]):
        std_min = (name, std[0])
    if(ave[0] < ave_min[1]):
        ave_min = (name, ave[0])
    sum_std += std[0]
    sum_ave += ave[0]
    sum_2_std += std[0] * std[0]
    sum_2_ave += ave[0] * ave[0]

sum_std /= len(res3)
sum_ave /= len(res3)
sum_2_std /= len(res3)
sum_2_ave /= len(res3)
var_std = sum_2_std - sum_std * sum_std
var_ave = sum_2_ave - sum_ave * sum_ave
print(std_min)
print(ave_min)
print("Average: "+" ".join([str(sum_std), str(sum_ave)]))
print("Variation:" + " ".join([str(var_std), str(var_ave)]))

('276 - 周杰伦 - 听妈妈的话.xml', -1)
('276 - 周杰伦 - 听妈妈的话.xml', -1)
Average: 0.8257320785293658 3.8686138375488412
Variation:0.460885910468819 0.7310581793846715


In [102]:
res3 += res_silly3

In [157]:

def std_bleu(elem):
    return elem[1][0]
res3.sort(key = std_bleu)

for i in range(0, len(res3)):
    print(i+1)
    for m in res3[i]:
        print(m)
    #print(str(i+1) + ': ' + res3[i][0])
    print('--------')

print('\n\n\n')
def ave_bleu(elem):
    return elem[2][0]

res3.sort(key = ave_bleu)
for i in range(0, len(res3)):
    print(i+1)
    for m in res3[i]:
        print(m)
   # print(str(i+1)+ ': ' + res3[i][0])
    print('========')



1
276 - 周杰伦 - 听妈妈的话.xml
(-1, [1.0, 0.9473684210526315, 0.4864864864864865, 0.1388888888888889, 0.0])
(-1, [0.5932820512820508, 0.10484210526315813, 0.008108108108108115, 0.0005555555555555556, 0.0])
--------
2
586 - 许巍 - 灿烂.xml
(-1, [0.9719101123595506, 0.6384180790960452, 0.2897727272727273, 0.05142857142857143, 0.0])
(-1, [0.39093258426966465, 0.06201129943502821, 0.004977272727272736, 0.00017142857142857143, 0.0])
--------
3
1125 - 梁静茹 - 美丽人生.xml
(0.28492393465091614, [1.0, 0.9893617021276596, 0.9359430604982206, 0.7107142857142857, 0.3655913978494624])
(3.2431481401772047, [0.38679858657243743, 0.16407801418439683, 0.04881850533807827, 0.011535714285714304, 0.002537634408602149])
--------
4
108 - 周杰伦 - 超人不会飞.xml
(0.29284228596120726, [0.941952506596306, 0.9232804232804233, 0.843501326259947, 0.675531914893617, 0.4666666666666667])
(3.2551942663692395, [0.310411609498681, 0.12020634920634915, 0.038408488063660526, 0.012553191489361654, 0.00474666666666667])
--------
5
421 - 周杰伦 - 止战

1003 - 杨宗纬 - 横扫天下之笑天下.xml
(1.7967771885867068, [0.9918032786885246, 0.8677685950413223, 0.49166666666666664, 0.15126050420168066, 0.00847457627118644])
(5.416014739485834, [0.5320000000000003, 0.10484297520661161, 0.010266666666666677, 0.0007731092436974794, 1.694915254237288e-05])
--------
286
2218 - 李健 - 舍得.xml
(1.8655464102384198, [1.0, 0.7966101694915254, 0.5042735042735043, 0.13793103448275862, 0.034782608695652174])
(5.285504676452747, [0.44810084033613407, 0.11922033898305079, 0.018940170940170892, 0.0005862068965517241, 0.0001217391304347826])
--------
287
1826 - 高橋李依 - ぼうやの夢よ.xml
(1.9740664300590813, [1.0, 0.8404255319148937, 0.4946236559139785, 0.16304347826086957, 0.04395604395604396])
(5.580484244117623, [0.5179157894736843, 0.11238297872340357, 0.01410752688172042, 0.0006086956521739129, 8.791208791208792e-05])
--------
288
2305 - 胡彦斌Tiger Hu - 如果.xml
(2.0095360413180035, [0.9813084112149533, 0.8773584905660378, 0.3904761904761905, 0.10576923076923077, 0.00970873786407766

In [160]:
print(res3[:3])
res3 = res3[2:]

[('276 - 周杰伦 - 听妈妈的话.xml', (-1, [1.0, 0.9473684210526315, 0.4864864864864865, 0.1388888888888889, 0.0]), (-1, [0.5932820512820508, 0.10484210526315813, 0.008108108108108115, 0.0005555555555555556, 0.0])), ('586 - 许巍 - 灿烂.xml', (-1, [0.9719101123595506, 0.6384180790960452, 0.2897727272727273, 0.05142857142857143, 0.0]), (-1, [0.39093258426966465, 0.06201129943502821, 0.004977272727272736, 0.00017142857142857143, 0.0])), ('1125 - 梁静茹 - 美丽人生.xml', (0.28492393465091614, [1.0, 0.9893617021276596, 0.9359430604982206, 0.7107142857142857, 0.3655913978494624]), (3.2431481401772047, [0.38679858657243743, 0.16407801418439683, 0.04881850533807827, 0.011535714285714304, 0.002537634408602149]))]


In [184]:
final_rank = {}

res3.sort(key = ave_bleu)
for i in range(0, len(res3)):
    final_rank[res3[i][0]] = (0, i)
res3.sort(key = std_bleu)

for i in range(0, len(res3)):
    final_rank[res3[i][0]] = (i, final_rank[res3[i][0]][1])
    tmpppp = final_rank[res3[i][0]]
    if(tmpppp[0]-tmpppp[1] > 50):
        print(res3[i][0].split('.')[0][:20] + str(tmpppp))
    
print(final_rank)

1663 - 堀江由衣 - アンダンテ(70, 6)
1085 - 张杰 - 火鸟(90, 31)
1467 - 下川みくに - 南风(94, 40)
906 - 周杰伦 - 雨下一整晚(96, 39)
177 - 许巍 - 温暖(97, 42)
1682 - つじあやの - 月が泣(100, 35)
1724 - 東山奈央 - 想い出がい(116, 36)
205 - 萧煌奇 - 不唱骊歌(118, 56)
2099 - 胡彦斌Tiger Hu -(126, 43)
2594 - 莫文蔚 - Snacks(132, 23)
301 - 林俊杰 - 无聊(146, 77)
1165 - 周杰伦 - 超跑女神(155, 38)
593 - 范玮琪 - 我们是朋友(169, 107)
56 - 周杰伦 - 公主病(176, 125)
946 - 范玮琪 - 我们之间的事(178, 126)
2703 - 林宥嘉 - 感同身受(180, 67)
1180 - 周杰伦 - 天涯过客(181, 94)
830 - 周杰伦 - 甜甜的(182, 80)
1083 - 周杰伦 - 蒲公英的约定(193, 65)
1740 - 石田耀子 - Otome (198, 140)
1424 - 水樹奈々 - 梦幻(200, 104)
1795 - THE ROLLING G(201, 74)
558 - 范玮琪 - 我就是这样(205, 98)
733 - 周杰伦 - 双刀(211, 72)
461 - 周杰伦 - 黄金甲(221, 157)
660 - 张惠妹 - 给我感觉(231, 170)
254 - 周杰伦,杨瑞代 - 我要夏天(233, 106)
1072 - 周杰伦 - 你好吗(249, 101)
2242 - 汪峰 - 青春(250, 109)
494 - 陈粒 - 易燃易爆炸(261, 135)
346 - 许巍 - 爱情(271, 182)
{'1602 - HIRO-X - future.xml': (7, 0), '44 - 周杰伦 - 说好的幸福呢.xml': (6, 1), '2362 - 金莎 - 我知道我们不会有结果.xml': (37, 2), '797 - 张杰 - 我想.xml': (12, 3), '95 - 徐良,吴昕 - 星座恋人.xml

In [170]:
final_output = [(i, j) for (i, j) in zip(final_rank.keys(), final_rank.values())]
print(final_output)

[('1602 - HIRO-X - future.xml', (7, 0)), ('44 - 周杰伦 - 说好的幸福呢.xml', (6, 1)), ('2362 - 金莎 - 我知道我们不会有结果.xml', (37, 2)), ('797 - 张杰 - 我想.xml', (12, 3)), ('95 - 徐良,吴昕 - 星座恋人.xml', (24, 4)), ('874 - 周杰伦 - 爱在西元前.xml', (4, 5)), ('1663 - 堀江由衣 - アンダンテ.xml', (70, 6)), ('622 - 周杰伦 - 开不了口.xml', (3, 7)), ('421 - 周杰伦 - 止战之殇.xml', (2, 8)), ('1294 - ghostnote - I、爱、会い.xml', (19, 9)), ('569 - 梦想启动 - 周杰伦.xml', (27, 10)), ('2204 - 林宥嘉 - Runaway Mama.xml', (11, 11)), ('2386 - 王心凌 - BFF.xml', (14, 12)), ('169 - 梁静茹 - 丝路.xml', (5, 13)), ('22 - 周杰伦 - 晴天.xml', (35, 14)), ('1125 - 梁静茹 - 美丽人生.xml', (0, 15)), ('715 - 王力宏 - 柴米油盐酱醋茶.xml', (52, 16)), ('892 - 张杰 - 逆战.xml', (55, 17)), ('108 - 周杰伦 - 超人不会飞.xml', (1, 18)), ('441 - 周杰伦 - 最后的战役.xml', (8, 19)), ('38 - 周杰伦 - 浪漫手机.xml', (39, 20)), ('530 - 薛之谦 - Memory.xml', (63, 21)), ('1228 - 王力宏 - 大城小爱.xml', (21, 22)), ('2594 - 莫文蔚 - Snacks.xml', (132, 23)), ('1030 - 杨宗纬 - 幸福的风.xml', (25, 24)), ('1565 - AKB48 - Better.xml', (13, 25)), ('1159 - 周杰伦 - 彩虹.xml', (26, 26)), ('5

In [172]:
def mysecond(elem):
    return elem[1][0] * 0.6 + elem[1][1] * 0.4
final_output.sort(key = mysecond)
print(final_output)

[('44 - 周杰伦 - 说好的幸福呢.xml', (6, 1)), ('1602 - HIRO-X - future.xml', (7, 0)), ('421 - 周杰伦 - 止战之殇.xml', (2, 8)), ('874 - 周杰伦 - 爱在西元前.xml', (4, 5)), ('622 - 周杰伦 - 开不了口.xml', (3, 7)), ('1125 - 梁静茹 - 美丽人生.xml', (0, 15)), ('108 - 周杰伦 - 超人不会飞.xml', (1, 18)), ('169 - 梁静茹 - 丝路.xml', (5, 13)), ('797 - 张杰 - 我想.xml', (12, 3)), ('2204 - 林宥嘉 - Runaway Mama.xml', (11, 11)), ('441 - 周杰伦 - 最后的战役.xml', (8, 19)), ('2386 - 王心凌 - BFF.xml', (14, 12)), ('1294 - ghostnote - I、爱、会い.xml', (19, 9)), ('95 - 徐良,吴昕 - 星座恋人.xml', (24, 4)), ('625 - 周杰伦 - 暗号.xml', (9, 30)), ('1565 - AKB48 - Better.xml', (13, 25)), ('569 - 梦想启动 - 周杰伦.xml', (27, 10)), ('1228 - 王力宏 - 大城小爱.xml', (21, 22)), ('599 - 周杰伦 - 自导自演.xml', (18, 27)), ('2362 - 金莎 - 我知道我们不会有结果.xml', (37, 2)), ('1262 - 许嵩-何曼婷 - 素颜.xml', (22, 28)), ('1030 - 杨宗纬 - 幸福的风.xml', (25, 24)), ('1207 - 周杰伦 - 星晴.xml', (10, 48)), ('1159 - 周杰伦 - 彩虹.xml', (26, 26)), ('22 - 周杰伦 - 晴天.xml', (35, 14)), ('778 - 孙燕姿 - 180度.xml', (16, 50)), ('150 - 周杰伦 - 回到过去.xml', (20, 44)), ('38 - 周杰伦 - 

In [173]:
for i in range(0, len(final_output)):
    print(str(i+1)+ ': ' + final_output[i][0])

1: 44 - 周杰伦 - 说好的幸福呢.xml
2: 1602 - HIRO-X - future.xml
3: 421 - 周杰伦 - 止战之殇.xml
4: 874 - 周杰伦 - 爱在西元前.xml
5: 622 - 周杰伦 - 开不了口.xml
6: 1125 - 梁静茹 - 美丽人生.xml
7: 108 - 周杰伦 - 超人不会飞.xml
8: 169 - 梁静茹 - 丝路.xml
9: 797 - 张杰 - 我想.xml
10: 2204 - 林宥嘉 - Runaway Mama.xml
11: 441 - 周杰伦 - 最后的战役.xml
12: 2386 - 王心凌 - BFF.xml
13: 1294 - ghostnote - I、爱、会い.xml
14: 95 - 徐良,吴昕 - 星座恋人.xml
15: 625 - 周杰伦 - 暗号.xml
16: 1565 - AKB48 - Better.xml
17: 569 - 梦想启动 - 周杰伦.xml
18: 1228 - 王力宏 - 大城小爱.xml
19: 599 - 周杰伦 - 自导自演.xml
20: 2362 - 金莎 - 我知道我们不会有结果.xml
21: 1262 - 许嵩-何曼婷 - 素颜.xml
22: 1030 - 杨宗纬 - 幸福的风.xml
23: 1207 - 周杰伦 - 星晴.xml
24: 1159 - 周杰伦 - 彩虹.xml
25: 22 - 周杰伦 - 晴天.xml
26: 778 - 孙燕姿 - 180度.xml
27: 150 - 周杰伦 - 回到过去.xml
28: 38 - 周杰伦 - 浪漫手机.xml
29: 1694 - 戸松遥 - あなたの幸せに私がなれるなら.xml
30: 1316 - AKB48 - 恋するフォーチュンクッキー.xml
31: 485 - 周杰伦 - 轨迹.xml
32: 2058 - 7!! - スタートライン (Acoustic Ver.xml
33: 2302 - Tank - 生还者.xml
34: 715 - 王力宏 - 柴米油盐酱醋茶.xml
35: 25 - 周杰伦,杨瑞代 - 爱的飞行日记.xml
36: 634 - 周杰伦 - 枫.xml
37: 385 - 周杰伦 - 麦芽糖.xml
38: 892

In [56]:
res2.append((ns_ks1_ts44[1265].filename, std_compute_bleu(new2_ns_ks1_ts44[1265], sample_list2), ave_compute_bleu(new2_ns_ks1_ts44[1265], sample_list2)))

In [186]:
for i in sample_list_num3:
    print(ns_ts44[i].filename)

2310 - 周传雄 - 离我远一点.xml
2205 - 张韶涵 - 把你信仰.xml
433 - 王力宏 - 你和我.xml
126 - 萧煌奇 - 逆风飞翔.xml
1133 - 萧煌奇 - 一定要成功.xml
481 - 徐良 - 一公里的幸福.xml
114 - 许嵩 - 我们的恋爱是对生命的严重浪费.xml
2365 - 胡彦斌 - 沉默的大多数.xml
61 - 李健 - 一往情深的恋人.xml
666 - 许嵩 - 城府.xml
18 - 林俊杰 - 你，有没有过.xml
131 - 孙燕姿 - 遇见.xml
2100 - 张韶涵 - 谁爱谁.xml
1010 - 梁静茹 - 我喜欢.xml
837 - 梁静茹 - 在晴朗的一天出发.xml
1281 - AKB48 - ヘビーローテーション.xml
2031 - ハンバート ハンバート - 日が落ちるまで.xml
415 - 陈粒 - 走马.xml
531 - 杨宗纬 - 空白格.xml
1392 - I've - L'oiseau Bleu.xml
2048 - 南壽あさ子 - やり過ごされた時間たち.xml
1284 - ZARD - Don't you see!.xml
1956 - Alex-宋智恩 - 연애세포.xml
102 - 张惠妹 - 别在伤口洒盐.xml
2356 - 张韶涵 - 第一页.xml
886 - 林俊杰 - 子弹列车.xml
947 - 张靓颖 - 他们说.xml
831 - G.E.M.xml
1426 - FictionJunction - Parallel Hearts.xml
119 - 林俊杰 - Our Singapore.xml
2602 - 林宥嘉 - 晚安.xml
1079 - 梁静茹 - 爱情之所以为爱情.xml
1878 - 张根硕 - 사랑비.xml
2036 - 鈴湯 - Love you more.xml
2368 - 胡彦斌 - 在一起.xml
920 - 萧敬腾 - Kelly.xml
930 - 梁静茹 - 转圈圈.xml
66 - 林志炫 - 进退两难.xml
1245 - 张杰 - 浩瀚.xml
1569 - 彩音 - その先にある、誰かの笑顔の為に.xml
2010 - Dali - ムーンライト伝

In [269]:
random_ns = []
now_index = 5
random_list = [52,53,55,57,59,60, 62, 64, 65, 67, 69, 71, 72, 74, 76, 77, 79, 81]


for i in range(0, 1000):
    choice = random.randint(0, 6)
    if(choice < 2):
        step = 0
    elif(choice < 5):
        step = 1
    else:
        step = 2
    if(now_index < 2):
        random_ns.append((random_list[now_index], Fraction(1, 8)))
        now_index += step
    elif(now_index > 15):
        random_ns.append((random_list[now_index], Fraction(1, 8)))
        now_index -= step
    else: # index is in [2, 15]
        if(random.randint(2, 17) > now_index):
            random_ns.append((random_list[now_index], Fraction(1, 8)))
            now_index += step
        else:
            random_ns.append((random_list[now_index], Fraction(1, 8)))
            now_index -= step
        
random_ns.append((random_list[now_index], Fraction(1, 8)))
    
#print(ns_ts44[zjl3[0]].filename, new3_ns_ts44[zjl3[0]])
# for i in range(0, 32):
#     random_ns.append()

print(random_ns)




[(60, Fraction(1, 8)), (64, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (69, Fraction(1, 8)), (65, Fraction(1, 8)), (67, Fraction(1, 8)), (64, Fraction(1, 8)), (65, Fraction(1, 8)), (62, Fraction(1, 8)), (65, Fraction(1, 8)), (62, Fraction(1, 8)), (65, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (71, Fraction(1, 8)), (71, Fraction(1, 8)), (71, Fraction(1, 8)), (74, Fraction(1, 8)), (72, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (62, Fraction(1, 8)), (65, Fraction(1, 8)), (69, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (69, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (69, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (65, Fraction(1, 8)), (65, Fraction(1, 8)), (65, Fraction(1, 8)), (69, Fraction(1, 8)), (67, Fraction(1, 8)), (71, Fraction(1, 8)), (69, Frac

In [213]:
pitchh = 0
pitchh_list = []
for i in range(0, 230):
    print(pitchh)
    pitchh_list.append(pitchh)
    pitchh += random_ns[i][0]

print(pitchh)
pitchh_list.append(pitchh)

0
2
0
4
4
4
4
5
5
2
5
2
0
-3
-3
-1
2
0
2
5
7
7
4
2
2
2
4
5
9
9
5
5
5
2
2
5
5
9
9
7
7
7
9
12
14
14
11
7
5
9
12
11
12
16
19
17
14
17
14
16
17
14
12
11
14
14
14
12
12
11
9
7
7
4
7
9
9
9
7
9
11
11
11
12
11
12
11
9
9
11
9
11
9
12
14
14
17
16
14
11
14
12
14
16
16
16
19
16
16
14
12
11
12
11
7
11
11
14
16
12
12
11
14
14
12
12
11
9
9
9
5
2
5
5
9
7
9
7
9
7
5
7
5
9
11
12
11
9
7
11
7
7
9
12
9
9
9
7
5
7
11
14
11
11
11
9
9
5
7
5
5
5
9
9
11
9
5
7
5
7
9
11
11
9
9
5
7
9
5
5
9
11
9
9
7
11
9
7
9
11
11
11
11
9
5
5
5
4
4
0
2
5
5
5
2
2
5
4
5
4
7
9
5
5
9
7
7
7
4
2
2


In [239]:
change_list = ['C', '#C', 'D', '#D', 'E', 'F', '#F', 'G', '#G', 'A', 'bB', 'B']
for pitchh, lengthh in random_ns:
    while(pitchh < 0):
        pitchh += 12
    pitchh %= 12
    print(change_list[pitchh])

C
D
F
D
D
E
F
F
G
G
E
C
A
A
B
B
B
C
C
D
E
E
F
A
G
E
F
D
C
B
D
B
C
E
G
A
F
F
F
G
G
G
G
B
C
C
A
A
G
B
A
G
E
E
F
F
E
E
E
G
G
E
F
G
E
E
D
D
F
E
F
F
F
D
F
E
G
A
G
B
A
G
A
C
C
D
E
D
D
E
E
D
B
C
D
D
D
C
C
A
C
E
D
C
B
A
G
G
F
F
A
A
A
A
A
G
G
A
F
A
A
F
F
G
G
F
F
G
A
A
A
F
G
G
B
G
F
E
F
G
G
B
B
C
B
A
G
E
E
E
D
D
F
F
G
G
G
G
F
A
A
C
B
G
E
E
E
D
E
F
E
G
G
G
G
E
E
C
A
C
A
B
D
F
G
A
F
D
D
C
C
C
B
C
C
C
D
E
D
E
E
G
A
G
B
B
G
G
F
F
D
D
D
B
C
C
E
C
C
C
E
G
F
G
G
G
F
G
F
F
A


In [215]:
randomres = compute_multi_bleu([("random", random_ns)], sample_list3, minn = 2, maxn = 5, smooth = False, advanced = False)

random
(0.1380138550472244, [0.9956331877729258, 0.8991228070175439, 0.6431718061674009])
(2.305269950140883, [0.16592139737991268, 0.05324561403508786, 0.011198237885462535])


In [216]:
get_full_ranks(res3_expand + randomres)

1
1125 - 梁静茹 - 美丽人生.xml
(0.10459517617937156, [0.9893617021276596, 0.9359430604982206, 0.7107142857142857])
(2.3223416410526267, [0.16407801418439683, 0.04881850533807827, 0.011535714285714304])
--------
2
622 - 周杰伦 - 开不了口.xml
(0.13430419367113425, [0.9803921568627451, 0.8976377952755905, 0.6640316205533597])
(2.2921960360853957, [0.16410196078431444, 0.05082677165354352, 0.012498023715415033])
--------
3
random
(0.1380138550472244, [0.9956331877729258, 0.8991228070175439, 0.6431718061674009])
(2.305269950140883, [0.16592139737991268, 0.05324561403508786, 0.011198237885462535])
--------
4
95 - 徐良,吴昕 - 星座恋人.xml
(0.14408882056156455, [0.9900990099009901, 0.9154228855721394, 0.62])
(2.2095908080986826, [0.19409900990099016, 0.053611940298507764, 0.013939999999999942])
--------
5
169 - 梁静茹 - 丝路.xml
(0.1501393746630053, [0.9675925925925926, 0.8790697674418605, 0.6448598130841121])
(2.2942441634766952, [0.18515740740740744, 0.04983255813953486, 0.011205607476635477])
--------
6
1207 - 周杰伦 - 

(0.5047732273558551, [0.8854625550660793, 0.5530973451327433, 0.27111111111111114])
(3.1746406564698977, [0.08715418502202645, 0.014716814159292052, 0.0023822222222222235])
--------
245
1583 - Supercell - 君の知らない物語.xml
(0.5077809675828098, [0.829004329004329, 0.5162689804772235, 0.3065217391304348])
(2.7408097284725113, [0.1025238095238096, 0.025175704989153894, 0.006713043478260873])
--------
246
714 - 范玮琪 - 陪你.xml
(0.5183029667001968, [0.8630952380952381, 0.6047904191616766, 0.24096385542168675])
(3.013818926732381, [0.13852380952380944, 0.018431137724550822, 0.00227710843373494])
--------
247
1993 - 北乃きい - 風華恋.xml
(0.5199201011732586, [0.875, 0.5786163522012578, 0.2468354430379747])
(2.9029805038814174, [0.16056249999999989, 0.024352201257861722, 0.0023164556962025334])
--------
248
830 - 周杰伦 - 甜甜的.xml
(0.5205963294309622, [0.8653061224489796, 0.5737704918032787, 0.25102880658436216])
(2.773402037455066, [0.12800000000000006, 0.023360655737704933, 0.005086419753086425])
--------
249
