In [19]:
"""Util functions"""

import re
import json

def write_json_to_file(file_path, data):
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4, sort_keys=True)
        
def load_json_from_file(file_path):
    with open(file_path) as json_file:
        return json.load(json_file)

def first_matching(iterable, predicate):
    return next(x for x in iterable if predicate(x))

def get_file_ext(file_name):
    return file_name.rsplit('.', 1)[-1]

def filter_dict_keys(orig_dict, keys_to_keep):
    return { key: orig_dict[key] for key in keys_to_keep }

def formatted_ms(total_ms, format_str='{min}:{sec}:{ms}'):
    if not type(total_ms) == int:
        return None
    time = total_ms
    ms = int(time % 1000)
    ms_str = '{0:03d}'.format(ms)
    time -= ms
    time /= 1000
    seconds = int(time % 60)
    seconds_str = '{0:02d}'.format(seconds)
    time -= seconds
    time /= 60
    minutes = int(time)
    minutes_str = '{0:02d}'.format(minutes)
    return format_str.format(**{'min': minutes_str, 'sec': seconds_str, 'ms': ms_str})

def dot_formatted_ms(total_ms):
    return formatted_ms(total_ms, '{min}.{sec}.{ms}')

def colon_formatted_ms(total_ms):
    return formatted_ms(total_ms, '{min}:{sec}:{ms}')

def multi_replacer(*key_values):
    replace_dict = dict(key_values)
    replacement_function = lambda match: replace_dict[match.group(0)]
    pattern = re.compile("|".join([re.escape(k) for k, v in key_values]), re.M)
    return lambda string: pattern.sub(replacement_function, string)

def multi_replace(string, *key_values):
    return multi_replacer(*key_values)(string)

def create_file_path(*file_path_parts):
    file_path = ''
    if file_path_parts[0][0] == '/':
        file_path = '/'
    for part in file_path_parts[:-1]:
        file_path += part.strip('/') + '/'
    file_path += file_path_parts[-1]
    return file_path
   
# replacements = ('[', '[[]'), (']', '[]]')
# multi_replace('Runaway [Explicit]', *replacements)
# SHELL_ESCAPE_CHARS = ['[', ']', '?', '!']
# multi_replace('Runaway [Explicit]', *tuple((ch, '[{}]'.format(ch)) for ch in SHELL_ESCAPE_CHARS))

In [26]:
"""SQLite helper functions"""

import sqlite3

def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d

def get_cursor(db_file):
    conn = sqlite3.connect(db_file)
    conn.row_factory = dict_factory
    return conn.cursor()

def get_itunes_cursor():
    return get_cursor('itunes.db')

def get_mb_cursor():
    return get_cursor('musicbee.db')

def db_info(db_file):
    cursor = get_cursor(db_file)
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    return cursor.fetchall()

def column_names(cursor, table_name):
    return sorted(list(cursor.execute("SELECT * FROM {}".format(table_name)).fetchone().keys()))

def escape_query_value(value):
    return value.replace("'", "''")


In [3]:
"""
Utils to work with example tracks
"""

track_search_map = {
    'Beck': 'Debra',
    'Kanye West': 'Runaway',
    'Outkast': 'The Way You Move',
    'Chromeo': 'Opening Up (- Ce soir on danse',
}

def get_example_tracks():
    c = get_cursor('itunes.db')
    clause = " OR ".join([
        "(artist='{}' AND name LIKE '%{}%')".format(artist, track_name)
            for artist, track_name in track_search_map.items()
        ])
    q = "SELECT * FROM tracks WHERE {}".format(clause)
    tracks = c.execute(q).fetchall()
    if len(tracks) != len(track_search_map.keys()):
        raise Exception('Found too many or too few tracks')
    return tracks

# [(track['artist'], track['name'], ms_to_min_sec_ms(track['stop_time']))
#     for track in tracks]
# track_debra = first_matching(tracks, lambda t: t['artist'] == 'Beck')
# (ms_to_min_sec_ms(track_debra['start_time']), ms_to_min_sec_ms(track_debra['stop_time']))

In [4]:
"""File searching utility functions"""

import os
from fnmatch import fnmatch
import subprocess

ORIG_FILE_PATH = '/home/gavin/Music/chop'
CHOPPED_REL_FILE_PATH = 'chopped/'
SHELL_ESCAPE_CHARS = ['[', ']', '?', '!']

def escape_shell_string(string):
    return multi_replace(string, *tuple((ch, '[{}]'.format(ch)) for ch in SHELL_ESCAPE_CHARS))

def find_matching_file_name(partial_track_name):
    escaped_track_name = escape_shell_string(partial_track_name)
    for file in os.listdir(ORIG_FILE_PATH):
        if fnmatch(file, '*{}*'.format(escaped_track_name)):
            return file
    raise Exception("'{}' not found in {}".format(escaped_track_name, ORIG_FILE_PATH))


In [6]:
"""
ffmpeg splitter working example
"""

import subprocess

def ffmpeg_formatted_ms(total_ms):
    return formatted_ms(total_ms, '00:{min}:{sec}.{ms}')

def ffmpeg_output_file_name(file_name):
    file_name_parts = file_name.rsplit('.', 1)
    return ''.join([file_name_parts[0], ' chopped', '.', file_name_parts[1]])

def ffmpeg_time_params(start_ms=None, stop_ms=None):
    start_ms = start_ms or 0
    formatted_start = ffmpeg_formatted_ms(start_ms)
    if stop_ms:
        formatted_duration = ffmpeg_formatted_ms(stop_ms - start_ms)
    else:
        formatted_duration = None
    return formatted_start, formatted_duration

def ffmpeg_param_list(in_file, out_file, formatted_start, formatted_duration=None):
    param_list = [
        'ffmpeg',
        '-i',
        in_file,
        '-vn',
        '-acodec',
        'copy',
        '-ss',
        formatted_start
    ]
    if formatted_duration:
        param_list += [
            '-t',
            formatted_duration
        ]
    param_list += [
        out_file
    ]
    return param_list

tracks = get_example_tracks()
process_results = []
for track in tracks:
    formatted_start, formatted_duration = ffmpeg_time_params(track['start_time'], track['stop_time'])
    file_name = find_matching_file_name(track['name'])
    full_file_path = '{}/{}'.format(ORIG_FILE_PATH, file_name)
    output_file_path = create_file_path(ORIG_FILE_PATH, CHOPPED_REL_FILE_PATH, ffmpeg_output_file_name(file_name))
    param_list = ffmpeg_param_list(full_file_path, output_file_path, formatted_start, formatted_duration)
    #print(' '.join(param_list))
    process_results.append(subprocess.run(param_list, stdout=subprocess.PIPE))

print(dir(process_results[0]))
print('')
for res in process_results:
    print(' '.join(res.args))
    print('    returncode: {}'.format(res.returncode))
    print('    stdout: {}'.format(res.stdout))

# ffmpeg -i '.mp3' -acodec copy -ss 00:00:08 -t 00:04:04 '.mp3'

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'args', 'check_returncode', 'returncode', 'stderr', 'stdout']

ffmpeg -i /home/gavin/Music/chop/9 Runaway [Explicit].mp3 -vn -acodec copy -ss 00:00:00.000 -t 00:05:47.000 /home/gavin/Music/chop/chopped/9 Runaway [Explicit] chopped.mp3
    returncode: 0
    stdout: b''
ffmpeg -i /home/gavin/Music/chop/5 The Way You Move (feat. Sleepy Brown).mp3 -vn -acodec copy -ss 00:00:00.000 -t 00:03:50.000 /home/gavin/Music/chop/chopped/5 The Way You Move (feat. Sleepy Brown) chopped.mp3
    returncode: 0
    stdout: b''
ffmpeg -i /home/gavin/Music/chop/13 Debra.mp3 -vn -acodec copy -ss 00:00:00.000 -t 00:05:32.000 /home/gavin/Music/chop/chopped/13 Debra chopped.mp3
    returncode:

In [39]:
"""
Serialize tracks with start/stop times to JSON
"""

SERIALIZED_TRACK_FILE_PATH = './data/start_stop_track_info.json'
SERIALIZE_KEYS = ['artist', 'album_artist', 'album', 'name', 'start_time', 'stop_time', 'total_time']

c = get_cursor('itunes.db')

def serialize_track(track):
    return filter_dict_keys(track, SERIALIZE_KEYS)

def serialize_start_stop_tracks():
    start_stop_time_tracks = c.execute("SELECT * FROM tracks WHERE start_time IS NOT NULL;")\
                              .fetchall()
    write_json_to_file(SERIALIZED_TRACK_FILE_PATH, [serialize_track(track) for track in start_stop_time_tracks])

# serialize_start_stop_tracks()

In [50]:
"""
Find tracks in MusicBee based on serialized start/stop track data
"""

TRACK_LENGTH_COMPARE_PRECISION = 0.98

def track_query(track):
    return """
        SELECT * FROM tracks WHERE (artist='{artist}' OR album_artist='{artist}')
        AND name='{name}' COLLATE NOCASE;
        """.format(
            **{'artist': escape_query_value(track['artist']), 'name': escape_query_value(track['name'])}
        )

def artist_all_track_query(track):
    return "SELECT * FROM tracks WHERE (artist='{artist}' OR album_artist='{artist}')".format(
        **{'artist': escape_query_value(track['artist'])}
    )

def find_missing_track_by_duration(cursor, track_to_find):
    artist_tracks = cursor.execute(artist_all_track_query(track_to_find)).fetchall()
    for artist_track in artist_tracks:
        track_lengths = (artist_track['total_time'], track_to_find['total_time'])
        if min(*track_lengths)/max(*track_lengths) >= TRACK_LENGTH_COMPARE_PRECISION:
            return artist_track
    return None


c = get_mb_cursor()
start_stop_tracks = load_json_from_file(SERIALIZED_TRACK_FILE_PATH)
start_stop_track_count = len(start_stop_tracks)

missing_tracks = []
found_tracks = []
for track in start_stop_tracks:
    matching_tracks = c.execute(track_query(track)).fetchall()
    if len(matching_tracks) > 1:
        raise Exception("Track [{} - {}] was found {} times.".format(track['artist'], track['name'], len(matching_tracks)))
    elif len(matching_tracks) == 0:
        missing_tracks.append(track)
    else:
        found_tracks.append(matching_tracks[0])
        
for missing_track in missing_tracks[:]:
    found_track = find_missing_track_by_duration(c, missing_track)
    if found_track:
        found_tracks.append(found_track)
        missing_tracks = list(filter(found_track.__eq__, missing_tracks))

found_tracks

[{'album': 'Enter the Wu-Tang - 36 Chambers',
  'album_artist': None,
  'artist': 'Wu-Tang Clan',
  'artist1': None,
  'artist2': None,
  'bit_rate': 192,
  'bpm': None,
  'comments': 'www.legalsounds.com',
  'composer': None,
  'conductor': None,
  'date_added': '2015-10-21 02:01:26',
  'date_modified': '2013-03-03 22:27:13',
  'disc_count': None,
  'disc_number': None,
  'encoder': None,
  'episode_date': None,
  'episode_description': None,
  'genre': 'Hip-Hop',
  'genre1': None,
  'genre2': None,
  'grouping': None,
  'itunes_compilation': None,
  'keywords': None,
  'kind': 'MPEG audio file',
  'location': 'file://localhost/D:/Users/Gavin/Music/Clip%20MicroSD/Wu-Tang%20Clan/Enter%20the%20Wu-Tang%20-%2036%20Chambers/03%20Clan%20In%20Da%20Front.mp3',
  'lyricist': None,
  'mood': None,
  'name': 'Clan In Da Front',
  'origin': None,
  'persistent_id': 'E014F6114D7ED6F',
  'play_count': None,
  'play_date_utc': None,
  'publisher': None,
  'rating': None,
  'relative_volume_adjustmen

In [None]:
##################################

In [41]:
c = get_cursor('musicbee.db')
# column_names(c, 'tracks')
c.execute("SELECT * FROM tracks WHERE (artist='Dead Or Alive' OR album_artist='Dead Or Alive') COLLATE NOCASE;").fetchall()

# c = get_cursor('itunes.db')
# column_names(c, 'tracks')

# c = get_cursor('musicbee.db')
# # column_names(c, 'tracks')
# c.execute("SELECT * FROM tracks WHERE name LIKE '%The Way You Move%';").fetchall()

# start_time_tracks = c.execute("SELECT * FROM tracks WHERE start_time IS NOT NULL;")\
#         .fetchall()
# [(track['artist'], track['name'], track['start_time'])
#     for track in start_stop_tracks[10:20]]

[{'album': 'Evolution: The Hits',
  'album_artist': 'Dead Or Alive',
  'artist': "80's One Hit Wonders",
  'artist1': None,
  'artist2': None,
  'bit_rate': 244,
  'bpm': None,
  'comments': 'Amazon.com Song ID: 203904628',
  'composer': 'Michael Percy',
  'conductor': None,
  'date_added': '2015-10-21 02:01:26',
  'date_modified': '2016-05-10 03:32:53',
  'disc_count': 1,
  'disc_number': 1,
  'encoder': 'Lame3.97',
  'episode_date': None,
  'episode_description': None,
  'genre': 'Pop',
  'genre1': None,
  'genre2': None,
  'grouping': None,
  'itunes_compilation': None,
  'keywords': None,
  'kind': 'MPEG audio file',
  'location': 'file://localhost/D:/Users/Gavin/Music/Clip%20MicroSD/Dead%20Or%20Alive/Evolution-%20The%20Hits/1%20You%20Spin%20Me%20Round%20(Like%20A%20Record).MP3',
  'lyricist': None,
  'mood': None,
  'name': 'You Spin Me Round (Like A Record) (Dead Or Alive)',
  'origin': None,
  'persistent_id': '24714036D9D4F04B',
  'play_count': None,
  'play_date_utc': None,
  

In [43]:
# import urllib
# urllib.parse.unquote('07%20Opening%20Up%20(-%20Ce%20soir%20on%20danse%20In.m4a') 

# time=332000
# ms = time % 1000
# time -= ms
# time /= 1000
# seconds = time % 60
# time -= seconds
# time /= 60
# minutes = time
# (minutes, seconds, ms)

# import os, fnmatch
# for file in os.listdir(ORIG_FILE_PATH):
#     #print(file)
#     if fnmatch.fnmatch(file, '*Runaway [[]Ex*'):
#         print(file)

# SHELL_ESCAPE_CHARS = ['[', ']', '?', '!']
# def escape_shell_string(string):
#     for c in SHELL_ESCAPE_CHARS:
#         string = string.replace(c, '\{}'.format(c))
#     return string
# escape_shell_string('Runaway [Explicit]')

# import re
# def multi_replacer(*key_values):
#     replace_dict = dict(key_values)
#     replacement_function = lambda match: replace_dict[match.group(0)]
#     pattern = re.compile("|".join([re.escape(k) for k, v in key_values]), re.M)
#     return lambda string: pattern.sub(replacement_function, string)
# def multi_replace(string, *key_values):
#     return multi_replacer(*key_values)(string)
# replacements = ('[', '[[]'), (']', '[]]')
# multi_replace('Runaway [Explicit]', *replacements)


4

In [None]:
"""
mp3splt working example
Fails on m4a
"""

# import subprocess

# MP3SPLT_DEFAULT_START = '0.0.0'
# MP3SPLT_DEFAULT_STOP = 'EOF'

# def mp3splt_output_file_name(file_name):
#     file_name_parts = file_name.rsplit('.', 1)
#     return ''.join([file_name_parts[0], ' chopped'])

# tracks = get_example_tracks()
# process_results = []
# for track in tracks:
#     start_arg, stop_arg = \
#         dot_formatted_ms(track['start_time']), dot_formatted_ms(track['stop_time'])
#     start_arg = start_arg or MP3SPLT_DEFAULT_START
#     stop_arg = stop_arg or MP3SPLT_DEFAULT_STOP
#     file_name = find_matching_file_name(track['name'])
#     full_file_path = '{}/{}'.format(ORIG_FILE_PATH, file_name)
#     mp3splt_param_list = [
#         'mp3splt',
#         full_file_path,
#         start_arg,
#         stop_arg,
#         '-o',
#         CHOPPED_REL_FILE_PATH + mp3splt_output_file_name(file_name)
#     ]
#     process_results.append(subprocess.run(mp3splt_param_list, stdout=subprocess.PIPE))

# process_results