In [2]:
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd

import json
import pickle

import streamlit as st

from tqdm import tqdm

from itertools import chain

In [7]:
WORKING_DIR = Path(os.pardir).resolve()
DATA_DIR = Path(WORKING_DIR, 'data')
EXAMPLES_PATH = Path(WORKING_DIR, 'examples')
AUDIO_PATH = Path(WORKING_DIR, 'audio')

print('WORKING_DIR:', WORKING_DIR)
print('DATA_DIR:', DATA_DIR)
print('EXAMPLES_PATH:', EXAMPLES_PATH)
print('AUDIO_PATH:', AUDIO_PATH)

WORKING_DIR: /Users/francescopapaleo/Dropbox/Mac/Documents/git-box/streamlit
DATA_DIR: /Users/francescopapaleo/Dropbox/Mac/Documents/git-box/streamlit/data
EXAMPLES_PATH: /Users/francescopapaleo/Dropbox/Mac/Documents/git-box/streamlit/examples
AUDIO_PATH: /Users/francescopapaleo/Dropbox/Mac/Documents/git-box/streamlit/audio


In [None]:
#### FIX FILES PATH IN JSON ####

def fix_file_path(d):
    # Get the original file path
    orig_path = d['file']
    # Split the path into a directory and filename
    path_parts = orig_path.split('/')
    filename = path_parts[-1]
    directory = '/'.join(path_parts[:-1])
    # Replace any backslashes with forward slashes
    directory = directory.replace('\\', '/')
    # Combine the corrected directory and filename into a new path
    new_path = f'{directory}/{filename}'
    # Update the dictionary with the new file path
    d['file'] = new_path

In [None]:
# Open the input file for reading
with open(DATA_DIR / 'descriptors_rel_path.json', 'r') as f:
    # Parse the input data into a list of dictionaries
    data = json.load(f)

# Loop over each dictionary in the data list and correct the file path
for d in data:
    fix_file_path(d)

# Write the corrected data to a new file
with open(DATA_DIR / 'descriptors_dict_fix.json', 'w') as f:
    json.dump(data, f, indent=2)

In [None]:
df_example_pickle = pd.read_pickle('/Users/francescopapaleo/Dropbox/Mac/Documents/git-box/streamlit/data/files_essentia_effnet-discogs.jsonl.pickle')
df_example_pickle.head()

In [None]:
### CONVERT JSON TO PICKLE ###

source = DATA_DIR / 'descriptors_dict_fix.json'
destination = DATA_DIR / 'converted_pickle.pickle'

with open(source, 'r') as f:
    tmp_data = json.load(f)

with open(destination, 'wb') as f:
    pickle.dump(tmp_data, f)

In [None]:
read_pickle = pd.read_pickle(EXAMPLES_PATH / 'files_essentia_effnet-discogs.jsonl.pickle')
read_pickle.head()

Unnamed: 0,Blues---Boogie Woogie,Blues---Chicago Blues,Blues---Country Blues,Blues---Delta Blues,Blues---Electric Blues,Blues---Harmonica Blues,Blues---Jump Blues,Blues---Louisiana Blues,Blues---Modern Electric Blues,Blues---Piano Blues,...,Rock---Symphonic Rock,Rock---Technical Death Metal,Rock---Thrash,Rock---Twist,Rock---Viking Metal,Rock---Yé-Yé,Stage & Screen---Musical,Stage & Screen---Score,Stage & Screen---Soundtrack,Stage & Screen---Theme
audio/audio.005/6x/6xsW2a8hcDGVuU4wjNVtg3.mp3,1.921259e-08,1.2e-05,8.927022e-06,1.066856e-06,2.96021e-06,5.494672e-07,5.312226e-08,1.424986e-06,4e-06,2.3e-05,...,1.2e-05,3.55262e-08,6.897279e-07,9.82045e-07,2e-06,3.149176e-07,0.000271,1.7e-05,0.003258,0.000103
audio/audio.005/0O/0OyG3pHJLDlhAZnBPjfa99.mp3,3.531321e-07,6e-06,8.706954e-07,1.626603e-06,7.460184e-06,4.044373e-07,4.234967e-07,1.323576e-05,4e-06,1e-06,...,4.1e-05,2.385509e-05,6.520665e-05,8.265182e-07,4e-06,3.734779e-06,0.000363,0.000182,0.006023,0.000445
audio/audio.004/1B/1BAwmNAkNPc3rG6rwcRHJW.mp3,0.0001711634,0.000292,0.0006476456,0.0001398081,0.0006854185,0.0003262258,7.37677e-05,0.001549567,0.000556,0.000277,...,0.013644,5.917072e-06,0.0001625572,0.0004813167,9.3e-05,0.0006072484,0.01263,0.002725,0.02591,0.009837
audio/audio.003/7q/7q2kHP1fs5FMJgzVjSRf0q.mp3,0.0159042,0.021643,0.006631631,0.01463922,0.006033014,0.008941036,0.009978976,0.001977531,0.009115,0.043897,...,0.000198,6.137202e-06,1.269844e-05,0.001586579,1.1e-05,2.722128e-05,0.011233,0.011296,0.01927,0.012415
audio/audio.005/59/59ABTmSBYoGKlNbRf9FbeV.mp3,2.059015e-08,2e-06,2.260118e-06,1.341552e-07,6.331785e-07,4.42403e-07,7.054783e-08,8.087318e-07,2e-06,4e-06,...,4e-06,3.575041e-08,4.709626e-07,1.85316e-06,2.3e-05,1.019867e-07,9.8e-05,9e-06,0.000916,4.3e-05


In [None]:
#### ABSOLUTE PATHS ####

def get_files_absolute(path):
    file_list = []
    for root, dirs, files in os.walk(path):
        for file in files:
            file_list.append(os.path.join(root, file))
    return file_list

audio_files_absolute = get_files_absolute(AUDIO_PATH)
len(audio_files_absolute)

2102

In [None]:
#### RELATIVE PATHS ####

def get_files_relative(path):
    file_list = []
    for root, dirs, files in os.walk(path):
        for file in files:
            file_list.append(os.path.relpath(os.path.join(root, file), start=WORKING_DIR))
    return file_list

audio_files_relative = get_files_absolute(AUDIO_PATH)
len(audio_files_relative)

2102

In [None]:
'''
audio_files_read = pd.read_json('/Users/francescopapaleo/Dropbox/Mac/Documents/git-box/streamlit/data_test/audio_files_streamlit.json')
descriptors_read = pd.read_json('/Users/francescopapaleo/Dropbox/Mac/Documents/git-box/streamlit/data_test/descriptors_output.json')

descriptors_read['file'] = descriptors_read['file'].str.replace('/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/', 'audio/')
descriptors_read.head()

descriptors_read = descriptors_read.drop('relative_path', axis=1)
descriptors_read.head()

descriptors_read.to_json('/Users/francescopapaleo/Dropbox/Mac/Documents/git-box/streamlit/data_test/descriptors_rel_path.json', orient='records', indent=1)
'''