In [13]:
import os

# カレントディレクトリをmagentaに
os.chdir(r"C:\Users\arkw\GitHub\magenta")
print(os.getcwd())

C:\Users\arkw\GitHub\magenta


In [14]:
import numpy as np

def calculate_c_diatonic(notes):
    """Cのダイアトニック・スケールに該当する音符の割合を計算
    引数:
        notes: pretty_midi.Noteオブジェクトのリスト
    戻り値:
        Cのダイアトニック・スケールに該当する音符の割合
    """
    c_diatonic_scale = {0, 2, 4, 5, 7, 9, 11}
    diatonic_notes = [(note.pitch % 12) in c_diatonic_scale for note in notes]
    return sum(diatonic_notes) / len(notes) if notes else 0

def calculate_note_density(notes, tempo):
    """ノート密度の計算"""
    second_per_beat = 60 / tempo # 一拍あたりの秒数
    note_steps = notes[-1].end / second_per_beat * 4 # 16分音符のステップ数に変換
    return len(notes) / note_steps if note_steps else 0

def calculate_average_interval(notes):
    """平均音程間隔の計算"""
    intervals = [abs(notes[i].pitch - notes[i-1].pitch) for i in range(1, len(notes))]
    return sum(intervals) / len(intervals) if intervals else 0

def calculate_average_pitch(notes):
    """平均音高の計算"""
    pitches = [note.pitch for note in notes]
    average_pitch = np.mean(pitches)
    
    return average_pitch

def calculate_syncopation(notes, tempo, syncopation_type):
    """全ての音符に対するシンコペーション音符の割合を計算する
    引数:
        notes: pretty_midi.Noteオブジェクトのリスト
        tempo: テンポ。qpmと同義
        syncopation_type: 8thか16th
    戻り値:
        シンコペーション音符の割合
    """
    syncopated_count = 0 # シンコペーション音符の数
    prev_note_start = 0 # 一つ前の音符の開始時間
    second_per_beat = 60 / tempo # 一拍あたりの秒数
    if syncopation_type == "8th":
        value_for_evaluation = 0.5
    elif syncopation_type == "16th":
        value_for_evaluation = 0.25

    for note in notes:
        is_position = (note.start / second_per_beat) % (second_per_beat * 2) == value_for_evaluation
        interval = (note.start - prev_note_start) / second_per_beat
        if(is_position and interval > value_for_evaluation):
            syncopated_count += 1
        prev_note_start = note.start
    return (syncopated_count / len(notes)) if notes else 0

In [3]:
import numpy as np

def analyze_tonality(notes):
    # 長調と短調のスケール度数の重み付け
    major_weights = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
    minor_weights = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
    
    # ノートの出現頻度を計算
    pitch_classes = [note.pitch % 12 for note in notes]
    pitch_hist = np.zeros(12)
    for pc in pitch_classes:
        pitch_hist[pc] += 1
    pitch_hist = pitch_hist / np.sum(pitch_hist)
    
    # 各調の相関を計算
    major_correlation = np.correlate(pitch_hist, major_weights)[0]
    minor_correlation = np.correlate(pitch_hist, minor_weights)[0]
    
    # 正規化
    total = major_correlation + minor_correlation
    major_likelihood = major_correlation / total
    minor_likelihood = minor_correlation / total
    
    return major_likelihood, minor_likelihood

In [4]:
import pretty_midi
import numpy as np

def analyze_tonality_all_keys(notes):
    # 長調と短調のスケール度数の重み付け
    major_weights = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
    minor_weights = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
    
    # ノートの出現頻度を計算
    pitch_classes = [note.pitch % 12 for note in notes]
    pitch_hist = np.zeros(12)
    for pc in pitch_classes:
        pitch_hist[pc] += 1
    pitch_hist = pitch_hist / np.sum(pitch_hist)
    
    # 全ての調に対して相関を計算
    correlations = []
    for i in range(12):
        major_corr = np.correlate(np.roll(pitch_hist, -i), major_weights)[0]
        minor_corr = np.correlate(np.roll(pitch_hist, -i), minor_weights)[0]
        correlations.append((major_corr, minor_corr))
    
    # 最も高い相関を持つ調を見つける
    best_key = max(range(12), key=lambda i: max(correlations[i]))
    best_major_corr, best_minor_corr = correlations[best_key]
    
    # 正規化して長調らしさと短調らしさを計算
    total = best_major_corr + best_minor_corr
    major_likelihood = best_major_corr / total
    minor_likelihood = best_minor_corr / total
    
    # 調の名前を取得
    key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    best_key_name = key_names[best_key]
    
    return major_likelihood, minor_likelihood

# 使用例
# midi_data = pretty_midi.PrettyMIDI('path_to_your_midi_file.mid')
# notes = midi_data.instruments[0].notes
# major_likelihood, minor_likelihood, key = analyze_tonality_all_keys(notes)
# print(f"最も可能性の高い調: {key}")
# print(f"長調らしさ: {major_likelihood:.2f}")
# print(f"短調らしさ: {minor_likelihood:.2f}")

In [15]:
import music21

def detect_key(melody_notes):
    """
    Krumhansl-Schmucklerアルゴリズムを用いたキー検出と相関の出力
    """
    # メロディを音符のリストとして受け取る
    stream = music21.stream.Stream()
    for note in melody_notes:
        # pretty_midiのNoteオブジェクトから音高を取得し、music21のNoteに変換
        pitch = music21.pitch.Pitch(note.pitch)
        stream.append(music21.note.Note(pitch))
    
    # キーを分析
    key = stream.analyze('key')
    key_correlation = key.correlationCoefficient
    return key.tonic.name, key.mode, key_correlation

In [16]:
import pretty_midi

# 例外オブジェクトを作るためのクラスを定義
# 読み込んだMIDIファイルが条件に合わない場合に
# このクラスによって定義される例外が投げられる
class UnsupportedMidiFileException(Exception):
  "Unsupported MIDI File"

def analyze_midi(file_path):
    midi_data = pretty_midi.PrettyMIDI(file_path)
    _, tempi = midi_data.get_tempo_changes()
    tempo = int(tempi[0])
    time_signature = midi_data.time_signature_changes[0]
    if midi_data.instruments:
      instrument = midi_data.instruments[0]

      # 属性計算
      c_diatonic = calculate_c_diatonic(instrument.notes) # Cのダイアトニック・スケールに該当する音符の割合
      note_num = len(instrument.notes) # ノートの数
      note_density = calculate_note_density(instrument.notes, tempo) # ノート密度
      average_interval = calculate_average_interval(instrument.notes) # 平均音程間隔
      syncopation_16th = calculate_syncopation(instrument.notes, tempo, "16th") # 16分シンコペーション音符の割合
      syncopation_8th = calculate_syncopation(instrument.notes, tempo, "8th") # 8分シンコペーションノートの割合
      major_likelihood, minor_likelihood = analyze_tonality_all_keys(instrument.notes)
      tonic, mode, key_correlation = detect_key(instrument.notes)
      average_pitch = calculate_average_pitch(instrument.notes)

      return (file_path, tempo, time_signature, 
              c_diatonic, note_num, note_density, average_interval, syncopation_16th, syncopation_8th,
              major_likelihood, minor_likelihood, tonic, mode, key_correlation, average_pitch)
    else:
       # 空の楽譜の場合はこう
       return (file_path, tempo, time_signature,
               0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
               0.0, 0.0, 0.0, 0.0, 0.0, 0.0)

In [17]:
import glob
import os

# CSV書き出し対象のディレクトリ
midi_path = r"C:\Users\arkw\GitHub\magenta\tmp\music_vae\generated\16bar_8192"
files = glob.glob(os.path.join(midi_path, '*.mid'))

In [18]:
import csv

def write_to_csv(results, csv_path):
    # ディレクトリが存在しない場合は作成する
    os.makedirs(os.path.dirname(csv_path), exist_ok=True)
     # CSVファイルに結果を保存
    with open(csv_path, 'w', newline='') as csvfile:  # 書き込みモード
        csv_writer = csv.writer(csvfile)
        csv_writer.writerows(results)  # 結果を1行に書き込む

In [19]:
import pandas as pd

def write_to_csv(results, csv_path):
    # DataFrameを作成してCSVファイルに結果を保存
    df = pd.DataFrame(
        results, 
        columns=['file_path', 
                 'tempo', 
                 'time_signature', 
                 'c_diatonic', 
                 'note_num', 
                 'note_density', 
                 'average_interval', 
                 'syncopation_16th', 
                 'syncopation_8th',
                 'major_likelihood', 
                 'minor_likelihood',
                 'tonic', 
                 'mode', 
                 'key_correlation', 
                 'average_pitch'])
    df.to_csv(csv_path, index=False)

In [20]:
import datetime

results = []
for file in files:
    results.append(analyze_midi(file))

now = datetime.datetime.now()
filename = 'tmp/result_' + now.strftime('%Y%m%d_%H%M%S') + '.csv'
write_to_csv(results, filename)

指定された条件を満たすMIDIファイルを絞り込み、別の場所に保存する（必要に応じて）

In [46]:
import pandas as pd
import shutil
import os

# CSVファイルの読み込み
csv_file_path = 'tmp/result_20240529_150740.csv'
df = pd.read_csv(csv_file_path)

# 条件を満たす行をフィルタリング
filtered_df = df[(df['note_num'] >= 4) & (df['average_interval'] < 8)]

# 保存先ディレクトリの指定
destination_dir = r'tmp\music_vae\generated\adjective01\filtered'
os.makedirs(destination_dir, exist_ok=True)

# 条件を満たすMIDIファイルをコピー
for file_path in filtered_df['file_path']:
    shutil.copy(file_path, destination_dir)

print(f"Filtered MIDI files have been saved to {destination_dir}")

Filtered MIDI files have been saved to tmp\music_vae\generated\adjective01\filtered


In [23]:
import pandas as pd
import numpy as np

# 取り出す属性はこれ！
attribute = 'average_pitch'

csv_path = "tmp/result_20240906_183929.csv"
df = pd.read_csv(csv_path)
# filtered_csv_path = "tmp/midi16bar_result100.csv"
# df = pd.read_csv(filtered_csv_path)

# 上位25%に属するfile_pathの一覧を配列で取得
top_25_percent = df[df[attribute] >= df[attribute].quantile(0.75)]['file_path'].tolist()

# 下位25%に属するfile_pathの一覧を配列で取得
bottom_25_percent = df[df[attribute] <= df[attribute].quantile(0.25)]['file_path'].tolist()

# dir = r"tmp\music_vae_16bar\generated\iec0007\vector\gen_001\\"

# file_path25個それぞれの末尾に.npyをつけて、np.loadする
# top_npy_files = [path + '.npy' for path in top_25_percent]
top_npy_files = [path.replace(".mid", ".npy") for path in top_25_percent]
top_npy_arrays = [np.load(npy_file) for npy_file in top_npy_files]

# bottom_npy_files = [path + '.npy' for path in bottom_25_percent]
bottom_npy_files = [path.replace(".mid", ".npy") for path in bottom_25_percent]
bottom_npy_arrays = [np.load(npy_file) for npy_file in bottom_npy_files]

# 属性ベクトルを求める
attribute_npy = np.mean(top_npy_arrays, axis=0) - np.mean(bottom_npy_arrays, axis=0)

print(attribute_npy)
np.save(attribute, attribute_npy)

[-8.19235481e-03 -2.01471411e-02  3.73014174e-02 -4.76929471e-02
 -1.88039616e-02  2.93059275e-02 -2.59748306e-02 -1.69186469e-03
 -9.79284290e-03 -1.20954048e-02  6.51223678e-03  1.55179519e-02
  3.39272711e-03  2.79504489e-02 -2.28778675e-01 -1.49427429e-02
  3.67398281e-03  1.05738295e-02 -1.65991653e-02  1.50819179e-02
  2.47900933e-03 -6.83678407e-03 -5.55382110e-02  1.47343967e-02
 -1.01564955e-02 -2.44649053e-02 -6.55720942e-03  3.47167365e-02
 -3.15086655e-02  1.94165520e-02 -3.83579060e-02 -1.66418552e-02
 -3.34817767e-02 -1.18854535e+00 -5.88875860e-02  6.02558441e-03
  4.16984595e-02  1.59276780e-02  1.46953631e-02 -9.63864010e-03
 -2.73697823e-03  4.68681306e-02  8.07550736e-03 -5.08248061e-02
 -4.39475290e-02  4.64636348e-02 -5.86091466e-02  2.01657647e-03
  1.33858053e-02 -5.69843873e-03 -3.24281827e-02 -1.97142605e-02
 -6.24588355e-02 -6.28510937e-02  2.18287650e-02  5.97167239e-02
  1.78572461e-02 -3.54564935e-03 -2.94218259e-03 -2.84113325e-02
 -2.75070779e-04 -3.02204

In [24]:
# 調については特殊な過程で算出

import pandas as pd
import numpy as np

# 取り出す属性はこれ！
attribute = 'mode'

csv_path = "tmp/result_20240906_183929.csv"
df = pd.read_csv(csv_path)
# filtered_csv_path = "tmp/midi16bar_result100.csv"
# df = pd.read_csv(filtered_csv_path)

# Filter the DataFrame for rows where mode is 'major'
major_df = df[df['mode'] == 'major']
sorted_major_df = major_df.sort_values(by='key_correlation', ascending=False)
top_2048_major = sorted_major_df.head(2048)
# major2048に属するfile_pathの一覧を配列で取得
top_25_percent = top_2048_major['file_path'].tolist()

# Filter the DataFrame for rows where mode is 'minor'
minor_df = df[df['mode'] == 'minor']
sorted_minor_df = minor_df.sort_values(by='key_correlation', ascending=False)
top_2048_minor = sorted_minor_df.head(2048)
# minor2048に属するfile_pathの一覧を配列で取得
bottom_25_percent = top_2048_minor['file_path'].tolist()

# dir = r"tmp\music_vae_16bar\generated\iec0007\vector\gen_001\\"

# file_path25個それぞれの末尾に.npyをつけて、np.loadする
# top_npy_files = [path + '.npy' for path in top_25_percent]
top_npy_files = [path.replace(".mid", ".npy") for path in top_25_percent]
top_npy_arrays = [np.load(npy_file) for npy_file in top_npy_files]

# bottom_npy_files = [path + '.npy' for path in bottom_25_percent]
bottom_npy_files = [path.replace(".mid", ".npy") for path in bottom_25_percent]
bottom_npy_arrays = [np.load(npy_file) for npy_file in bottom_npy_files]

# 属性ベクトルを求める
attribute_npy = np.mean(top_npy_arrays, axis=0) - np.mean(bottom_npy_arrays, axis=0)

print(attribute_npy)
np.save(attribute, attribute_npy)

[ 0.01474035  0.0443825   0.0090474  -0.01179035 -0.02069221  0.03332651
 -0.01247204  0.0046586  -0.00359238 -0.00771028 -0.02242981  0.0397466
 -0.00108416 -0.08108912  0.01771081  0.0298912   0.00884682  0.00916148
  0.03080139  0.0078221  -0.02497126  0.01428537  0.04769609 -0.11744177
  0.02083792 -0.02838334  0.04806837 -0.02045621  0.04282219  0.02714619
 -0.01395194 -0.02552052 -0.00975432 -0.00794076 -0.02170661 -0.011802
  0.04693358 -0.01979561 -0.03500682  0.02379969 -0.00710932  0.00963761
 -0.01285954 -0.01696977  0.04732827  0.02680675  0.08137599 -0.03160985
 -0.06524526 -0.00956668  0.0352366   0.00366865  0.07766031 -0.03111038
  0.0259076   0.01511439 -0.04501702  0.03978179 -0.00409384  0.02586513
  0.03929128 -0.02384474 -0.02450006  0.02328835  0.00084272 -0.00185494
 -0.02542683 -0.04757576  0.01524579  0.02923343 -0.05113153 -0.02980876
 -0.00483553 -0.00362603  0.0019257   0.05866157  0.02099564 -0.01157455
 -0.03354371 -0.02419335 -0.02294672 -0.05543972  0.03

In [None]:
import pandas as pd
import numpy as np

# 取り出す属性はこれ！
attribute = 'c_diatonic'

csv_path = "magenta/tmp/result_20240708_145215.csv"
df = pd.read_csv(csv_path)
# filtered_csv_path = "tmp/midi16bar_result100.csv"
# df = pd.read_csv(filtered_csv_path)

# c_diatonicの上位25%に属するfile_pathの一覧を配列で取得
top_25_percent = df[df[attribute] >= df[attribute].quantile(0.75)]['file_path'].tolist()

# c_diatonicの下位25%に属するfile_pathの一覧を配列で取得
bottom_25_percent = df[df[attribute] <= df[attribute].quantile(0.25)]['file_path'].tolist()

dir = r"tmp\music_vae_16bar\generated\iec0007\vector\gen_001\\"

# file_path25個それぞれの末尾に.npyをつけて、np.loadする
# top_npy_files = [path + '.npy' for path in top_25_percent]
top_npy_files = [dir + path for path in top_25_percent]
top_npy_arrays = [np.load(npy_file) for npy_file in top_npy_files]

# bottom_npy_files = [path + '.npy' for path in bottom_25_percent]
bottom_npy_files = [dir + path for path in bottom_25_percent]
bottom_npy_arrays = [np.load(npy_file) for npy_file in bottom_npy_files]

# 属性ベクトルを求める
attribute_npy = np.mean(top_npy_arrays, axis=0) - np.mean(bottom_npy_arrays, axis=0)

print(attribute_npy)
np.save(attribute, attribute_npy)

In [None]:
import pandas as pd
import numpy as np

filtered_csv_path = "tmp/filtered20240610.csv"
df = pd.read_csv(filtered_csv_path)

# c_diatonicの上位25%に属するfile_pathの一覧を配列で取得
top_25_percent = df[df['c_diatonic'] >= df['c_diatonic'].quantile(0.75)]['file_path'].tolist()

# c_diatonicの下位25%に属するfile_pathの一覧を配列で取得
bottom_25_percent = df[df['c_diatonic'] <= df['c_diatonic'].quantile(0.25)]['file_path'].tolist()

# file_path25個それぞれの末尾に.npyをつけて、np.loadする
top_npy_files = [path + '.npy' for path in top_25_percent]
top_npy_arrays = [np.load(npy_file) for npy_file in top_npy_files]

bottom_npy_files = [path + '.npy' for path in bottom_25_percent]
bottom_npy_arrays = [np.load(npy_file) for npy_file in bottom_npy_files]

# 上位25%平均を求める
average_npy = np.mean(top_npy_arrays, axis=0)

print(average_npy)
