In [18]:
import pandas as pd
from typing import Optional, Dict
import numpy as np
import json
from collections import defaultdict
import logging
# Set option to display all columns
pd.set_option('display.max_columns', None)

# evaluate filepaths
naive_eval_path = "/Users/abcheng/Documents/workspace/mashup_eval/out/eval_out/naive_eval_output.csv"
auto_eval_path = "/Users/abcheng/Documents/workspace/mashup_eval/out/eval_out/auto_eval_output.csv"

# match filepaths
naive_match_path = "/Users/abcheng/Documents/workspace/mashup_eval/out/eval_out/psc_naive_match.jsonl"
auto_match_path = "/Users/abcheng/Documents/workspace/mashup_eval/out/cocola_out/full_match_out.jsonl"

def extract_id(df: pd.DataFrame) -> pd.DataFrame:
    """
    Extracts the id from a filepath and replaces the column.\\
    @param df: the dataframe to operate on.\\
    @returns a dataframe with a new id variable.
    """
    df['id'] = df['file_path'].apply(lambda x: x.split("/")[-1].split(".")[0])
    del df['file_path'] # save memory
    return df


def load_df(eval_filepath: str, match_filepath: Optional[str]) -> pd.DataFrame:
    """
    Loads in a dataframe and merges it with its match scores.\\
    @param eval_filepath: path to the evaluation dataframe.\\
    @param match_filepath: path to the match information, if available.\\
    @returns dataframe with necessary information.
    """
    df = pd.read_csv(eval_filepath)
    df = extract_id(df)
    # should also load in the match scores, if available
    if match_filepath:
        with open(match_filepath, 'r') as f:
            matches = [json.loads(line) for line in f]
        # need to coerce into a string first to remove nans
        for match in matches:
            match['songs_str'] = min(match['songs']) + max(match['songs'])
        match_df = pd.DataFrame(matches)
        for col in match_df:
            print(f"for match df, col {col} has datatype{match_df[col].dtype}")
        for col in df:
            print(f"for df, col {col} has datatype{df[col].dtype}")
        print(f"match df head is {match_df.head()}")
        result = df.merge(match_df, on="id", how="left")
        assert(result['C_MU'].isna().sum() <= 0) # assert that the merge is correct
        print(f"resulthead is {result.head()}")
        return result
    # otherwise, return dataframe as is
    return df


def get_composite_scores(match_filepath: str) -> Dict[int, float]:
    print("sata andagi")
    result = defaultdict(lambda: 0)
    with open(match_filepath, 'r') as f:
        matches = [json.loads(line) for line in f]    
    for match in matches:
        hash_value = hash(match['songs_str'])
        if hash_value in result:
            result[hash_value] = (result[hash_value] + match['score']) / 2
        else:
            result[hash_value] = match['score']
    print("getting length of result...")
    print(len(result))
    return result

In [19]:
df = pd.read_csv(auto_eval_path)
df = extract_id(df)
with open(auto_match_path, 'r') as f:
    matches = [json.loads(line) for line in f]
# need to coerce into a string first to remove nans
for match in matches:
    match['songs_str'] = min(match['songs']) + max(match['songs'])
match_df = pd.DataFrame(matches)

In [8]:
df.head()

Unnamed: 0,is_centroid_distance,d_HO,NLL,C_MU,id
0,True,517.314636,8.683788,59.572461,80c696e3-0371-42d8-ae6f-bce0d8b1a8a7
1,True,551.847229,9.041912,61.032138,698c2410-affc-4d80-a6e7-9585201b2e9e
2,True,542.197083,8.8857,61.019061,625b8f51-87fe-4938-afc0-a35cb68e84cb
3,True,511.693146,9.16011,55.861023,7849a4d5-d040-4f03-a97c-1a0f52ba54c3
4,True,489.054962,9.002111,54.326695,2be4a231-53cc-4903-99f8-d341d334e194


In [9]:
match_df.head()

Unnamed: 0,id,directory,songs,score,layers,songs_str
0,599c63d0-1c16-4869-a3c6-a256cab76048,/Users/abcheng/Documents/workspace/mashup_eval...,"[deef - Ein sonniger Tag mit dir.mp3, The Oran...",0.0,,The Oranges Band - Ride the Nuclear Wave.mp3de...
1,a46657e6-67ce-40ad-bc27-cd99a2ee346c,/Users/abcheng/Documents/workspace/mashup_eval...,"[deef - Ein sonniger Tag mit dir.mp3, Mia Doi ...",0.0,,Mia Doi Todd - I gave you my home.mp3deef - Ei...
2,3a2595d6-228c-49aa-8cca-66b97cb64f18,/Users/abcheng/Documents/workspace/mashup_eval...,"[deef - Ein sonniger Tag mit dir.mp3, Los Stea...",0.0,,Los Steaks - Sunday Girls.mp3deef - Ein sonnig...
3,126d1495-07b0-4ee3-81f7-90424a5a0cc0,/Users/abcheng/Documents/workspace/mashup_eval...,"[deef - Ein sonniger Tag mit dir.mp3, David Ro...",0.0,,David Rovics - We Just Want The World.mp3deef ...
4,0c97777f-a7a3-45ed-98aa-ef8a26d2da36,/Users/abcheng/Documents/workspace/mashup_eval...,"[deef - Ein sonniger Tag mit dir.mp3, Bessie S...",0.0,,Bessie Smith - My Sweetie Went Away.mp3deef - ...


In [20]:
df.merge(match_df, on="id")

Unnamed: 0,is_centroid_distance,d_HO,NLL,C_MU,id,directory,songs,score,layers,songs_str
0,True,543.205566,9.165122,59.268776,bcec5ad0-1e62-4908-a2a9-d235e75fd48a,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[Orange Peels - Grey Holiday.mp3, deef - Ein s...",40.485481,"{'vocals': 'Orange Peels - Grey Holiday.mp3', ...",Orange Peels - Grey Holiday.mp3deef - Ein sonn...
1,True,535.271362,7.809341,68.542450,912ea1cf-7c92-4a8f-84e3-45bb0d17aaa6,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[The Oranges Band - Ride the Nuclear Wave.mp3,...",30.839315,{'vocals': 'The Oranges Band - Ride the Nuclea...,Bessie Smith - My Sweetie Went Away.mp3The Ora...
2,True,545.862976,8.865593,61.570949,d2243c2c-af5c-4ad6-9c88-20e30a0ab433,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[Los Steaks - Sunday Girls.mp3, Wann - Happy B...",31.721567,"{'vocals': 'Los Steaks - Sunday Girls.mp3', 'b...",Los Steaks - Sunday Girls.mp3Wann - Happy Birt...
3,True,532.818298,9.213314,57.831340,9cbf5a76-eef4-4e5a-8ebc-860be1282bf7,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[Orange Peels - Grey Holiday.mp3, Bessie Smith...",42.118645,"{'vocals': 'Orange Peels - Grey Holiday.mp3', ...",Bessie Smith - My Sweetie Went Away.mp3Orange ...
4,True,537.380615,8.598825,62.494660,071d1b2e-0a39-48fa-b22f-aff5fd738fdd,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[Mia Doi Todd - I gave you my home.mp3, Wann -...",35.401386,{'vocals': 'Mia Doi Todd - I gave you my home....,Mia Doi Todd - I gave you my home.mp3Wann - Ha...
...,...,...,...,...,...,...,...,...,...,...
415,True,519.182495,8.639333,60.095207,d2cbad27-559a-43e6-838c-04e163ed993c,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[Grand Mal - Children of Light.mp3, Dragon Or ...",40.910202,{'vocals': 'Grand Mal - Children of Light.mp3'...,Dragon Or Emperor - Part of Me Says.mp3Grand M...
416,True,528.529114,8.783486,60.173044,65a4f0f9-d847-4478-9027-44125a088617,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[ZOE.LEELA - Jewel.mp3, Los Steaks - Sunday Gi...",40.068680,"{'vocals': 'ZOE.LEELA - Jewel.mp3', 'bass': 'L...",Los Steaks - Sunday Girls.mp3ZOE.LEELA - Jewel...
417,True,493.928314,8.452973,58.432494,4d28d08a-ea37-4c75-bd40-d9cbf338d0e0,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[Shearer - Itch.mp3, Bam Bam - Hi-Q.mp3]",42.090725,"{'vocals': 'Shearer - Itch.mp3', 'bass': 'Bam ...",Bam Bam - Hi-Q.mp3Shearer - Itch.mp3
418,True,547.553955,8.364971,65.457961,986cce48-f2b5-4262-944a-b98f51da3c2b,/ocean/projects/cis250057p/acheng8/mashup_eval...,"[Poland - Lying Machine.mp3, Bam Bam - Hi-Q.mp3]",35.947594,"{'vocals': 'Poland - Lying Machine.mp3', 'bass...",Bam Bam - Hi-Q.mp3Poland - Lying Machine.mp3
