<a href="https://colab.research.google.com/github/giaranjan/spotify_languagesorting/blob/main/Spotify_playlist_sorting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install spotipy boto3

In [None]:
# Import necessary libraries
import os
import pandas as pd
import requests
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import boto3
import time

In [None]:
# Spotify credentials and setup
spotify_client_id = 'YOUR_CLIENT_ID'
spotify_client_secret = 'YOUR_CLIENT_SECRET'
redirect_uri = 'http://localhost:8888/callback'
os.environ["SPOTIPY_CLIENT_ID"] = spotify_client_id
os.environ["SPOTIPY_CLIENT_SECRET"] = spotify_client_secret
os.environ["SPOTIPY_REDIRECT_URI"] = redirect_uri

In [None]:
# Musixmatch API key
musixmatch_api_key = "YOUR_MUSIXMATCH_API_KEY"

In [None]:
# AWS credentials
os.environ['AWS_ACCESS_KEY_ID'] = 'YOUR_AWS_ACCESS_KEY_ID'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'YOUR_AWS_SECRET_ACCESS_KEY'
os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'

In [None]:
# Spotify authentication
scope = "user-library-read playlist-modify-public playlist-modify-private"
auth_manager = SpotifyOAuth(scope=scope, show_dialog=True)
sp = spotipy.Spotify(auth_manager=auth_manager)

In [None]:
# Function to get lyrics from Musixmatch
def get_lyrics(track_name, artist_name):
    url = "http://api.musixmatch.com/ws/1.1/matcher.lyrics.get"
    params = {
        "q_track": track_name,
        "q_artist": artist_name,
        "apikey": musixmatch_api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        if 'message' in data and 'body' in data['message'] and 'lyrics' in data['message']['body'] and 'lyrics_body' in data['message']['body']['lyrics']:
            return data['message']['body']['lyrics']['lyrics_body']
        else:
            return "Lyrics not found"
    else:
        return "Error: Unable to retrieve lyrics"

In [None]:
# Function to detect language using AWS Comprehend
def detect_language(text):
    if text == "Lyrics not found" or not text.strip():
        return "UNCERTAIN"
    try:
        response = comprehend.detect_dominant_language(Text=text)
        if response['Languages']:
            return response['Languages'][0]['LanguageCode']
        else:
            return "UNCERTAIN"
    except Exception as e:
        print(f"An error occurred: {e}")
        return "UNCERTAIN"

In [None]:
# Function to handle API requests robustly
def robust_spotify_request(call, *args, **kwargs):
    while True:
        try:
            return call(*args, **kwargs)
        except spotipy.SpotifyException as e:
            if e.http_status == 429:
                wait_time = int(e.headers.get('Retry-After', 1))
                print(f"Rate limited. Waiting for {wait_time} seconds before retrying...")
                time.sleep(wait_time)
            else:
                raise

In [None]:
# Function to create a playlist and return its ID
def create_playlist_for_language(language):
    playlist_name = f"Songs in {language}" if language != "UNCERTAIN" else "Songs with Uncertain Language"
    playlist_description = f"Collection of songs in {language} language." if language != "UNCERTAIN" else "Songs with language detection uncertain."
    playlist = robust_spotify_request(sp.user_playlist_create, user=sp.current_user()['id'], name=playlist_name, description=playlist_description)
    return playlist['id']

In [None]:
# Function to batch add tracks to a playlist
def batch_add_tracks_to_playlist(playlist_id, track_ids):
    for i in range(0, len(track_ids), 100):
        robust_spotify_request(sp.playlist_add_items, playlist_id, track_ids[i:i+100])

In [None]:
# Fetch the first 50 liked songs from Spotify
results = sp.current_user_saved_tracks(limit=50)

In [None]:
# List to store song details
songs_details = []

In [None]:
# Fetch and store the details for each song
for idx, item in enumerate(results['items']):
    track = item['track']
    artist_name = track['artists'][0]['name']
    track_name = track['name']
    lyrics = get_lyrics(track_name, artist_name)
    songs_details.append({
        'song_name': track_name,
        'singer': artist_name,
        'lyrics': lyrics
    })

In [None]:
# Convert the list to a pandas DataFrame
songs_df = pd.DataFrame(songs_details)

# Apply language detection to the lyrics column
songs_df['language'] = songs_df['lyrics'].apply(detect_language)

# Add track IDs to the DataFrame
songs_df['track_id'] = songs_df.apply(lambda row: get_spotify_track_id(row['song_name'], row['singer']), axis=1)

In [None]:
# Main process to create playlists for each language
language_groups = songs_df.groupby('language')
for language, group in language_groups:
    print(f"Creating playlist for language: {language}")
    playlist_id = create_playlist_for_language(language)
    print(f"Playlist created with ID: {playlist_id}")
    track_ids = group['track_id'].dropna().tolist()
    if track_ids:
        print(f"Adding {len(track_ids)} tracks to playlist for language: {language}")
        batch_add_tracks_to_playlist(playlist_id, track_ids)
        print(f"Tracks added to playlist for language: {language}")
    else:
        print(f"No tracks to add for language: {language}")
    print(f"Finished processing for language: {language}\n")