In [4]:
import requests
import pandas as pd

In [48]:
API_KEY = "ENV_"

In [25]:
def search_youtube_covers(song_title, api_key):
    base_url = "https://www.googleapis.com/youtube/v3/search"
    search_query = f"{song_title} cover"
    video_ids = []
    page_token = None

    while True:
        params = {
            'part': 'snippet',
            'q': search_query,
            'type': 'video',
            'maxResults': 50,
            'key': api_key,
            'pageToken': page_token
        }
        response = requests.get(base_url, params=params).json()

        # Error handling: Check if 'items' key exists in the response
        if 'items' in response:
            video_ids.extend([item['id']['videoId'] for item in response['items']])
            page_token = response.get('nextPageToken')
            if not page_token:
                break
        else:
            # If 'items' key is missing, print the response for debugging
            print("API Response does not contain 'items':", response)
            break

    return video_ids


In [26]:
import requests
import pandas as pd

def get_video_details(video_id, api_key):
    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        'part': 'snippet,statistics',
        'id': video_id,
        'key': api_key
    }
    response = requests.get(url, params=params).json()
    video_data = response['items'][0]

    channel_id = video_data['snippet']['channelId']
    channel_data = get_channel_details(channel_id, api_key)

    return {
        'video_id': video_id,
        'upload_date': video_data['snippet'].get('publishedAt', 'N/A'),
        'views': video_data['statistics'].get('viewCount', 'N/A'),
        'likes': video_data['statistics'].get('likeCount', 'N/A'),
        'dislikes': video_data['statistics'].get('dislikeCount', 'N/A'), 
        'comments': video_data['statistics'].get('commentCount', 'N/A'),
        'subscribers': channel_data.get('subscriberCount', 'N/A')
    }


def get_channel_details(channel_id, api_key):
    url = "https://www.googleapis.com/youtube/v3/channels"
    params = {
        'part': 'statistics',
        'id': channel_id,
        'key': api_key
    }
    response = requests.get(url, params=params).json()

    # Check if 'items' key exists in the response
    if 'items' in response and response['items']:
        return response['items'][0]['statistics']
    else:
        # Return a default value if 'items' key is missing
        return {'subscriberCount': 'N/A'}


In [50]:
import pandas as pd

file_name = "song_titles.csv"
#song_titles = pd.read_csv(file_name)

In [51]:
# Usage
song_title = "If I Ain't Got You - Alicia Keys"
video_ids = search_youtube_covers(song_title, API_KEY)

data = []
for index, video_id in enumerate(video_ids):
    print(f"Processing video {index + 1} / {len(video_ids)}: {video_id}")
    video_data = get_video_details(video_id, API_KEY)
    
    # Add the song_title as 'song_id' for each entry
    video_data['song_id'] = song_title
    
    data.append(video_data)

df = pd.DataFrame(data)

# Display the DataFrame to check the new 'song_id' column
df.head()


API Response does not contain 'items': {'error': {'code': 400, 'message': 'API key not valid. Please pass a valid API key.', 'errors': [{'message': 'API key not valid. Please pass a valid API key.', 'domain': 'global', 'reason': 'badRequest'}], 'status': 'INVALID_ARGUMENT', 'details': [{'@type': 'type.googleapis.com/google.rpc.ErrorInfo', 'reason': 'API_KEY_INVALID', 'domain': 'googleapis.com', 'metadata': {'service': 'youtube.googleapis.com'}}]}}


# 2. save output

In [43]:
df.head(n=10)

Unnamed: 0,video_id,upload_date,views,likes,dislikes,comments,subscribers,song_id
0,Duaxp1nc5po,2016-11-03T18:30:00Z,6342336,113146,,1551,2310000,If I Ain't Got You - Alicia Keys
1,TnjS5kqyu3E,2022-11-02T01:05:46Z,1129167,22417,,313,3870,If I Ain't Got You - Alicia Keys
2,jw3MqySX9qw,2021-10-01T10:00:23Z,1603208,48048,,974,244000,If I Ain't Got You - Alicia Keys
3,vFny_UvyT0s,2023-05-24T14:00:40Z,409868,15201,,516,681000,If I Ain't Got You - Alicia Keys
4,tq9nmRqNJZM,2015-02-05T11:01:15Z,6921650,77987,,1218,25100,If I Ain't Got You - Alicia Keys
5,TOgOhMWA6oM,2023-02-02T18:38:39Z,181418,3090,,90,39100,If I Ain't Got You - Alicia Keys
6,YJHs_P4voBM,2023-09-27T13:00:33Z,90855,5427,,251,99600,If I Ain't Got You - Alicia Keys
7,jNV60kqZGNY,2019-03-27T14:11:30Z,197171,2811,,102,53500,If I Ain't Got You - Alicia Keys
8,ddSoLkxzxyQ,2018-07-16T14:00:06Z,1304509,21579,,527,1570000,If I Ain't Got You - Alicia Keys
9,g_xcxu0TTvo,2017-10-02T17:27:18Z,8357621,150908,,3417,1140000,If I Ain't Got You - Alicia Keys


In [46]:
# output to CSV
file_name = "./report in R/covers.csv"
df.head(n=3).to_csv(file_name, index=False)

# 3. populate df of original song

In [39]:
# Usage

# TODO: refactor this out, and append this code to run right after the youtube_covers dataframe above
song_title = "If I Ain't Got You - Alicia Keys"  # Song title as a unique identifier

video_ids_of_original_songs = [
 'Ju8Hr50Ckwk'   
]

data_original_songs = []
for index, video_id in enumerate(video_ids_of_original_songs):
    print(f"Processing video {index + 1} / {len(video_ids_of_original_songs)}: {video_id}")
    video_data = get_video_details(video_id, API_KEY)
    
    # Add the song_title as 'song_id' for each entry
    video_data['song_id'] = song_title
    
    data_original_songs.append(video_data)

df_original_songs = pd.DataFrame(data_original_songs)

# Display the DataFrame to check the new 'song_id' column
df_original_songs

Processing video 1 / 1: Ju8Hr50Ckwk


Unnamed: 0,video_id,upload_date,views,likes,dislikes,comments,subscribers,song_id
0,Ju8Hr50Ckwk,2009-10-25T08:39:48Z,319491772,2303455,,33686,2370000,If I Ain't Got You - Alicia Keys


In [44]:
# output to CSV
file_name = "./data/metadata/original_songs.csv"
df_original_songs.to_csv(file_name, index=False)

In [45]:
df_original_songs

Unnamed: 0,video_id,upload_date,views,likes,dislikes,comments,subscribers,song_id
0,Ju8Hr50Ckwk,2009-10-25T08:39:48Z,319491772,2303455,,33686,2370000,If I Ain't Got You - Alicia Keys
