# Code for Testing how to recognize songs

Notes: moviepy requires installation of ffmpeg, you can download from [this link](https://ffmpeg.org/download.html). You will also need ffprobe to run ShazamAPI, which can be downloaded from the same link. Move the executable to your usr/local/bin file

* We use moviepy to convert the file to mp3
* We pass the mp3 to ShazamAPI to get the relevant information

Author: Audrey Yip and Jyontika Kapoor

In [1]:
# !pip3 install moviepy
# !pip3 install ShazamAPI

In [34]:
import os
import json
from moviepy.editor import *
from ShazamAPI import Shazam
import pandas as pd
cwd = os.getcwd()

#### First, we need to convert the .mp4 files to .mp3
##### This is so it is compatible with the Shazam API 

In [3]:
folder_path = "/Users/jyontika/Documents/GitHub/CS315-Final-Project/data-collection/videos"

input_path = os.path.join(folder_path, "videos_mp4")

# create a new folder for .mp3 files
output_folder = os.path.join(folder_path, "videos_mp3")
os.makedirs(output_folder, exist_ok=True)

#initalize counter vars
num_original_files = 0
num_converted_files = 0
missing_files = []

In [7]:
# # no need to run this again - we have all the .mp3 now

# #iterate through each .mp4
# for filename in os.listdir(input_path):
#     if filename.endswith(".mp4"):
#         num_original_files += 1
        
#         video_path = os.path.join(input_path, filename)
#         output_path = os.path.join(output_folder, filename.replace(".mp4", ".mp3"))
        
#         # load the video
#         video = VideoFileClip(video_path)
        
#         # convert video to audio and save as .mp3
#         video.audio.write_audiofile(output_path)
        
#         num_converted_files += 1

# print("Conversion complete!")

In [6]:
# #Double check the numbers of files are the same

# num_files_original = len([name for name in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, name))])

# # Count the number of files in the new folder
# num_files_converted = len([name for name in os.listdir(output_folder) if os.path.isfile(os.path.join(output_folder, name))])


### Now we can use Shazam API on these .mp3 files

In [35]:
# Function to extract song information using Shazam API

def extract_song_info(mp3_file_content):
    try:
        shazam = Shazam(mp3_file_content)
        recognize_generator = shazam.recognizeSong()
        song_info = next(recognize_generator)
        return song_info
    except json.JSONDecodeError:
        return None

In [17]:
## testing this function out
#want to compare structure of data of when there is a Shazam API match vs no match

mp3_test = open('/Users/jyontika/Documents/GitHub/CS315-Final-Project/data-collection/videos/videos_mp3/share_video_6958280269531057413_.mp3', 'rb').read()

no_match = extract_song_info(mp3_test)
#no_match

mp3_test2 = open('test_sound_.mp3', 'rb').read()
match_found = extract_song_info(mp3_test2)
#match_found


In [33]:

mp3_test = open('/Users/jyontika/Documents/GitHub/CS315-Final-Project/data-collection/videos/videos_mp3/share_video_7319250533057875232_.mp3', 'rb').read()

no_match = extract_song_info(mp3_test)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [32]:
# empty lists 
no_song_found = [] 
songs_info = []

mp3_folder = output_folder


# loop through each file in the folder
for filename in os.listdir(mp3_folder):
    if filename.endswith(".mp3"):
        mp3_path = os.path.join(mp3_folder, filename)
        mp3_file_content = open(mp3_path, 'rb').read()
        
        print("Processing file:", filename)
        
        
        # use function to extract song data
        song_info = extract_song_info(mp3_file_content)
        if song_info is None:
            print("Error: Failed to decode JSON response for file:", filename)
            continue  # Skip to the next iteration of the loop or handle the error as needed
        
        # check if there are any matches
        if len(song_info[1]['matches']) == 0:

            # if no matched, add filename to no_song_found DF
            no_song_found.append({'File_Name': filename})

        else:
            # if matches found, extract relevant information
            title = song_info[1]['track']['title']
            artist = song_info[1]['track']['subtitle']
            subject = song_info[1]["track"]["share"]["subject"]

              # Check if 'actions' key exists
            if 'actions' in song_info[1]['track']['hub']:
                
                # Check if there are any actions available
                if len(song_info[1]['track']['hub']['actions']) > 1:
                    preview_link = song_info[1]['track']['hub']['actions'][1]['uri']
                else:
                    preview_link = None

            # If 'actions' key doesn't exist, look for preview link in other possible locations
            else:
                # Check if preview link exists in 'share' key
                if 'href' in song_info[1]['track']['share']:
                    preview_link = song_info[1]['track']['share']['href']
                # If preview link not found, set it to None
                else:
                    preview_link = None

            ###preview_link = song_info[1]['track']['hub']['actions'][1]['uri']
            
            # append information to the songs_info list
            songs_info.append({'Title': title, 'Artist': artist, 'Subject': subject, 'Preview_Link': preview_link})

print("Song information extraction complete!")


Processing file: share_video_7309247847898090794_.mp3
Processing file: share_video_7333834299415760171_.mp3
Processing file: share_video_7329274198096973099_.mp3
Processing file: share_video_7330250329839160607_.mp3
Processing file: share_video_7339685917759769899_.mp3
Processing file: share_video_7329990500528753953_.mp3
Processing file: share_video_7325577933299485958_.mp3
Processing file: share_video_7324083003284573482_.mp3
Processing file: share_video_7306802296640326945_.mp3
Processing file: share_video_7325879255898934561_.mp3
Processing file: share_video_7303373812387679490_.mp3
Processing file: share_video_7306351228974288159_.mp3
Processing file: share_video_7339675920799730987_.mp3
Processing file: share_video_7334356059106053409_.mp3
Processing file: share_video_7341080338803821866_.mp3
Processing file: share_video_7340493999309933867_.mp3
Processing file: share_video_7339691972225010975_.mp3
Processing file: share_video_7326288796163607841_.mp3
Processing file: share_video

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
song_df = pd.DataFrame(songs_info)
no_songs_df = pd.DataFrame(no_song_found)

#### Below is Audrey's code, testing Shazam

In [13]:
# testing ShazamAPI

from ShazamAPI import Shazam

mp3_file_content_to_recognize = open('test_sound_.mp3', 'rb').read()

shazam = Shazam(
    mp3_file_content_to_recognize,
    #lang='en',
    #time_zone='Europe/Paris'
)
recognize_generator = shazam.recognizeSong()

# this gets all recognized songs
# while True:
#	print(next(recognize_generator))

# get the first recognized song
first_song = next(recognize_generator)

print(first_song)

(8.0, {'matches': [{'id': '213384455', 'offset': -4.565788574, 'timeskew': 0.0004633665, 'frequencyskew': -0.000120818615}, {'id': '687594120', 'offset': -4.081568847, 'timeskew': 0.00014019012, 'frequencyskew': 0.0}, {'id': '472505492', 'offset': -3.623958496, 'timeskew': 0.0013375282, 'frequencyskew': -0.00016111135}], 'location': {'accuracy': 0.01}, 'timestamp': 1713574365441, 'timezone': 'Europe/Moscow', 'track': {'layout': '5', 'type': 'MUSIC', 'key': '245559138', 'title': 'Whistle (Edit)', 'subtitle': 'Joel Merry', 'images': {'background': 'https://is1-ssl.mzstatic.com/image/thumb/Music116/v4/1d/95/d8/1d95d899-ca27-20e9-dd25-edb0a2127b21/5037300064451.jpg/400x400cc.jpg', 'coverart': 'https://is1-ssl.mzstatic.com/image/thumb/Music116/v4/1d/95/d8/1d95d899-ca27-20e9-dd25-edb0a2127b21/5037300064451.jpg/400x400cc.jpg', 'coverarthq': 'https://is1-ssl.mzstatic.com/image/thumb/Music116/v4/1d/95/d8/1d95d899-ca27-20e9-dd25-edb0a2127b21/5037300064451.jpg/400x400cc.jpg', 'joecolor': 'b:e9f1f

In [4]:
# get title
song = first_song[1]["track"]["title"]

# get artist (?)
artist = first_song[1]["track"]["subtitle"]

# subject appears to give both track name and artist
track_subject = first_song[1]["track"]["share"]["subject"]

# downloads preview of the song
song_preview = first_song[1]["track"]["hub"]["actions"][1]["uri"]

print("Song name: ", song)
print("Artist name: ", artist)
print("Song subject: ", track_subject)
print("Link to song preview: ", song_preview)



Song name:  Whistle (Edit)
Artist name:  Joel Merry
Song subject:  Whistle (Edit) - Joel Merry
Link to song preview:  https://audio-ssl.itunes.apple.com/itunes-assets/AudioPreview126/v4/af/98/a6/af98a6d6-9b48-bdd7-4646-793974e66e23/mzaf_15997244971500515573.plus.aac.ep.m4a


This will require further testing to see which snippets of data we will need.