## DJMix Python Package and Dataset

#### *Source:*

[The DJ Mix Dataset](https://github.com/mir-aidj/djmix-dataset)

[A Computational Analysis of Real-World DJ Mixes using Mix-To-Track Subsequence Alignment](https://github.com/mir-aidj/djmix-analysis)

In [None]:
import djmix as dj
import pandas as pd

In [None]:
mixes = pd.DataFrame(dj.mixes)

In [None]:
mixes_columns = [
    'mix_id',
    'mix_title',
    'url',
    'audio_source',
    'audio_url',
    'identified_tracks',
    'tracks',
    'transitions',
    'timestamps',
    'tracklist',
    'genre'
]

In [None]:
tracks_columns = [
    'track_id',
    'track_title'
]

In [None]:
mixes.columns = mixes_columns

In [None]:
mixes = mixes.applymap(lambda x: x[1])

In [None]:
mixes['genre'] = mixes['genre'].apply(lambda x: x[-1]['key'].replace('Category:', ''))

In [None]:
genre_counts = mixes.groupby('genre')['mix_id'].count().sort_values(ascending=False)

In [None]:
genres_to_drop = [
    'Dekmantel Festival',
    'Juno Plus Podcast'
]

genres_to_normalize = [
    'Disco',
    'Minimal',
    'Dub Techno',
    'Vocal House',
    'Techno',
    'Progressive',
    'Tech House',
    'House',
    'Deep House',
    'Drum & Bass',
    'Progressive House',
    'Progressive Trance',
    'Trance',
    'Deep Tech House',
    'Psytrance'
]

In [None]:
mixes = mixes[mixes.identified_tracks / mixes.tracks > 0.5]
len(mixes)

In [None]:
mixes['genre']

In [None]:
mixes = mixes[~mixes.genre.isin(genres_to_drop)]
len(mixes)

In [None]:
mixes = mixes[~mixes.genre.isin(genres_to_normalize)]
len(mixes)

In [None]:
mixes['year'] = mixes.mix_title.str[:4]

In [None]:
mixes.columns

In [None]:
mixes_clean = mixes[['mix_id', 'year', 'identified_tracks', 'tracks', 'genre', 'tracklist']]

In [None]:
mixes_clean = mixes_clean.reset_index(drop=True)

In [None]:
mixes_clean

In [None]:
mixes_limit = mixes_clean[mixes_clean.identified_tracks < 11]

In [None]:
mixes_limit.genre.value_counts()

In [None]:
mixes_limit

In [None]:
track_ids = []
for tracks in mixes_limit.tracklist:
    for track in tracks:
        track_ids.append(track.id)

In [None]:
len(track_ids)

In [None]:
track_ids = set(track_ids)

In [None]:
import os

file_ids = []
path = '/Users/bjornbraun/djmix/tracks/'
files = os.listdir(path)
for file in files:
    file_id = file[:-8]
    file_ids.append(file_id)

file_ids = set(file_ids)

In [None]:
track_file_ids = track_ids.intersection(file_ids)

In [None]:
len(track_ids)

In [None]:
len(track_file_ids)

In [None]:
len(track_ids) - len(track_file_ids)

In [None]:
mix_ids = list(mixes_limit['mix_id'])

In [None]:
len(mix_ids)

In [None]:
# i = 1
# j = len(mix_ids)
# for mix in dj.mixes:
#     if mix.id in mix_ids:
#         print('----------------------------------------')
#         print(f'D O W N L O A D I N G  M I X:   {mix.id}')
#         print(f'MIX NO {str(i)} of {str(j)} MIXES TOTAL')
#         print('----------------------------------------')
#         mix.download()
#         i += 1

In [None]:
# All Tracks of djmix package
tracks = pd.DataFrame(dj.tracks.values())
tracks.columns = tracks_columns
tracks = tracks.applymap(lambda x: x[1])
tracks.head()