## Step 1: Install Required Packages
The following cell installs necessary Python packages for transliteration and text processing.

### Step 1: Install Required Packages
This step installs the necessary libraries like `indic-transliteration` for text processing in various Indian scripts.

In [1]:
!pip install indic-transliteration

Collecting indic-transliteration
  Downloading indic_transliteration-2.3.68-py3-none-any.whl.metadata (1.4 kB)
Collecting backports.functools-lru-cache (from indic-transliteration)
  Downloading backports.functools_lru_cache-2.0.0-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting roman (from indic-transliteration)
  Downloading roman-4.2-py3-none-any.whl.metadata (3.6 kB)
Downloading indic_transliteration-2.3.68-py3-none-any.whl (155 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.3/155.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading backports.functools_lru_cache-2.0.0-py2.py3-none-any.whl (6.7 kB)
Downloading roman-4.2-py3-none-any.whl (5.5 kB)
Installing collected packages: roman, backports.functools-lru-cache, indic-transliteration
Successfully installed backports.functools-lru-cache-2.0.0 indic-transliteration-2.3.68 roman-4.2


## Step 2: Import Libraries
In this step, we import essential libraries, including modules for transliteration, text processing, and data handling.

In [2]:
import json
import os
import requests
import re
import nltk

from itertools import islice
from collections import Counter, defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
from nltk.corpus import words

## Step 3: Download NLTK Resources
We download the NLTK words dataset, which will be useful for text processing tasks.

In [3]:
# Download nltk words list if not already downloaded
nltk.download('words')

[nltk_data] Downloading package words to /usr/share/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [4]:
# Dictionary to hold the song data
lib_songs = {}

# Loop through the files in the directory
for dirname, _, filenames in os.walk('/kaggle/input/my-apple-music-library'):
    for filename in filenames:
        if filename.startswith('lib_songs_v') and filename.endswith('.json'):
            # Extract version number
            version = filename.split('_')[-1].split('.')[0]
            # Load the JSON file
            with open(os.path.join(dirname, filename), 'r') as f:
                lib_songs[f'lib_songs_{version}'] = json.load(f)

# Now lib_songs['lib_songs_v1'], lib_songs['lib_songs_v2'], etc. will contain the respective data

In [5]:
print("lib_songs: ",lib_songs.keys())
print("lib_songs_v2: ",lib_songs['lib_songs_v2'].keys())
print("resources: ",lib_songs['lib_songs_v2']['resources'].keys())
print("songs: ",list(islice(lib_songs['lib_songs_v2']['resources']['library-songs'].keys(), 5)))
print("song detail: ", lib_songs['lib_songs_v2']['resources']['library-songs']['i.V7Bv4lehZzk93Xq'].keys())
print("song attribute: ", lib_songs['lib_songs_v2']['resources']['library-songs']['i.V7Bv4lehZzk93Xq']['attributes'].keys())

lib_songs:  dict_keys(['lib_songs_v2', 'lib_songs_v1', 'lib_songs_v5', 'lib_songs_v3', 'lib_songs_v4'])
lib_songs_v2:  dict_keys(['next', 'data', 'resources', 'meta'])
resources:  dict_keys(['library-songs', 'songs'])
songs:  ['i.V7Bv4lehZzk93Xq', 'i.LVk62JgSlGqNpYM', 'i.O1RGKbrHVxWpzrY', 'i.LVk6oXLtlGqNpYM', 'i.6xpNNGNtva0B5dE']
song detail:  dict_keys(['id', 'type', 'href', 'attributes', 'relationships'])
song attribute:  dict_keys(['discNumber', 'albumName', 'genreNames', 'hasLyrics', 'trackNumber', 'releaseDate', 'durationInMillis', 'name', 'artistName', 'artwork', 'playParams'])


In [6]:
# List of attributes to extract
extracted_attributes = ["albumName", "genreNames", "hasLyrics", "releaseDate", "durationInMillis", "name", "artistName"]

# Dictionary to store extracted attributes for each lib_songs_v file
extracted_data = {}

# Loop through all the files in the lib_songs dictionary
for lib_song_version, data in lib_songs.items():
    # Initialize a list to store the extracted attributes for the current version
    extracted_data[lib_song_version] = []
    
    # Access the 'library-songs' dictionary in each version
    library_songs = data['resources']['library-songs']
    
    # Loop through each song in the 'library-songs' dictionary
    for song_id, song_data in library_songs.items():
        # Access the 'attributes' for the current song
        attributes = song_data.get('attributes', {})
        
        # Extract only the required attributes
        extracted_song = {attr: attributes.get(attr) for attr in extracted_attributes}
        
        # Add the extracted song data to the list
        extracted_data[lib_song_version].append(extracted_song)

In [7]:
print(extracted_data.keys())

dict_keys(['lib_songs_v2', 'lib_songs_v1', 'lib_songs_v5', 'lib_songs_v3', 'lib_songs_v4'])


In [8]:
# Initialize a counter for the total number of songs
total_songs = 0

# Loop through the extracted data to count songs
for lib_song_version, songs in extracted_data.items():
    total_songs += len(songs)

# Display the total count of songs
print(f"Total number of songs: {total_songs}")


Total number of songs: 441


In [9]:
# Initialize a list to collect all genre names
all_genres = []

# Loop through the extracted data to collect genre names
for lib_song_version, songs in extracted_data.items():
    for song in songs:
        genres = song.get("genreNames", [])
        if genres:
            # Add genres to the list (some songs may have multiple genres)
            all_genres.extend(genres)

# Count the occurrence of each genre
genre_counts = Counter(all_genres)

# Display the top 10 most common genres
print("Top 10 most common genres:")
print(genre_counts.most_common(10))


Top 10 most common genres:
[('Indian Pop', 80), ('Hip-Hop/Rap', 78), ('Bollywood', 65), ('Regional Indian', 46), ('Pop', 36), ('Worldwide', 29), ('Punjabi Pop', 29), ('Dance', 11), ('Indie Pop', 11), ('Punjabi', 10)]


In [10]:
unique_genres = set(all_genres)
print(f"Number of unique genres: {len(unique_genres)}")
print("Unique genres:", unique_genres)

Number of unique genres: 24
Unique genres: {'', 'Electronic', 'Indian Pop', 'New Age', 'R&B/Soul', 'Dance', 'Hip-Hop', 'Haryanvi', 'Indie Pop', 'Soundtrack', 'Regional Indian', 'Asia', 'Comedy', 'Bollywood', 'Punjabi', 'House', 'Indian', 'Afrobeats', 'Worldwide', 'Hip-Hop/Rap', 'Pop', 'Rap', 'Singer/Songwriter', 'Punjabi Pop'}


In [11]:
# Dictionary to hold songs by genre
songs_by_genre = defaultdict(list)

# Loop through the extracted data to collect songs by genre
for lib_song_version, songs in extracted_data.items():
    for song in songs:
        genres = song.get("genreNames", [])
        song_name = song.get("name", "Unknown")
        artist_name = song.get("artistName", "Unknown Artist")
        release_date = song.get("releaseDate", "0000-00-00")  # Set a default for missing dates
        
        # Add each song to its respective genres
        for genre in genres:
            songs_by_genre[genre].append({
                'song_name': song_name,
                'artist_name': artist_name,
                'release_date': release_date
            })

# Function to display the top 10 songs in each genre
def display_top_songs_by_genre(songs_by_genre):
    for genre, songs in songs_by_genre.items():
        # Sort songs by release date, but handle cases where the date might be invalid
#         sorted_songs = sorted(songs, key=lambda x: x['release_date'] if x['release_date'] != "Unknown" else "0000-00-00", reverse=True)
        
        print(f"\nTop 10 songs in genre: {genre}")
        for i, song in enumerate(songs[:10], start=1):
            print(f"{i}. {song['song_name']} by {song['artist_name']} (Released: {song['release_date']})")

# Display top 10 songs for each genre
display_top_songs_by_genre(songs_by_genre)


Top 10 songs in genre: Indian Pop
1. Dj Waley Babu (feat. Aastha Gill) by Badshah (Released: 2015-07-17)
2. House of Lies by Ikka, Karan Aujla & Sanjoy (Released: 2024-05-18)
3. ILL.I.Am by Badshah (Released: 2018-08-17)
4. Driving Slow by Badshah (Released: 2016-08-17)
5. Ghar Jaane De by Mandys (Released: 2024-01-09)
6. Dream Boy by Babbal Rai (Released: 2017-06-29)
7. Hot Launde (feat. Fotty Seven & Bali) by Badshah (Released: 2020-08-07)
8. Gall Goriye by Raftaar & Maninder Buttar (Released: 2017-07-25)
9. Cyclone by UpsideDown & Jaz Dhami (Released: 2018-08-02)
10. Coco by Sukh-E Muzical Doctorz (Released: 2021-12-09)

Top 10 songs in genre: Pop
1. Familiar by Liam Payne & J Balvin (Released: 2018-04-20)
2. Delilah by MIKOLAS & Mark Neve (Released: 2023-06-09)
3. Feel Hai by Bali & Badshah (Released: 2021-09-29)
4. Faasla (feat. Hasan Raheem) by Shamoon Ismail (Released: 2021-09-24)
5. God Damn by Badshah, Karan Aujla & Hiten (Released: 2024-03-18)
6. Dead Mangde by Navaan Sandhu

In [12]:
# List to store songs with lyrics
songs_with_lyrics = []

# Loop through the extracted data to find songs with lyrics
for lib_song_version, songs in extracted_data.items():
    for song in songs:
        if song.get("hasLyrics", False):  # Check if the song has lyrics
            song_name = song.get("name", "Unknown")
            artist_name = song.get("artistName", "Unknown Artist")
            
            # Add the song to the list
            songs_with_lyrics.append({
                'song_name': song_name,
                'artist_name': artist_name,
                'lyrics': ""
            })

# Display the songs that have lyrics
print(f"Total songs with lyrics: {len(songs_with_lyrics)}")
for song in songs_with_lyrics:
    print(f"{song['song_name']} by {song['artist_name']}")
    
# # Define the path where you want to save the JSON file
# output_file_path = 'songs_with_lyrics.json'

# # Save the songs with lyrics to a JSON file
# with open(output_file_path, 'w', encoding='utf-8') as json_file:
#     json.dump(songs_with_lyrics, json_file, ensure_ascii=False, indent=4)

# print(f"Songs with lyrics saved to {output_file_path}.")

Total songs with lyrics: 311
Dj Waley Babu (feat. Aastha Gill) by Badshah
Familiar by Liam Payne & J Balvin
Hattrick (feat. Yaygo Musalini) by Imran Khan
Do It To It (feat. Cherish) by Acraze
ILL.I.Am by Badshah
Driving Slow by Badshah
Delilah by MIKOLAS & Mark Neve
Feel Hai by Bali & Badshah
Enjaay (feat. Sminil Agale, K. Shah & Kya.scene) by Chaar Diwaari
Crazy n Love by Mickey Singh & Jay Skilly
Impossible by Zack Knight
Drinks On Me by Badshah & MC Stan
Ichiban by Badshah
Double Addi (feat. Dj Ice & 2 Nyce) by Mickey Singh & Amar Sandhu
Faasla (feat. Hasan Raheem) by Shamoon Ismail
Ghar Jaane De by Mandys
I'm Outside by Mickey Singh & Jay Skilly
Galtiyan by Zack Knight
Dekho Nashe Mein by Shaan, KK, Sunidhi Chauhan & Pritam
Imaginary by Imran Khan
God Damn by Badshah, Karan Aujla & Hiten
Funk Song by Kidjaywest, Talwiinder & Ikath
Got It All by UpsideDown & The PropheC
Dream Boy by Babbal Rai
Hot Launde (feat. Fotty Seven & Bali) by Badshah
Gall Goriye by Raftaar & Maninder Buttar


In [13]:
# Musixmatch API key
MUSIXMATCH_API_KEY = 'your_api_key'

# Function to fetch lyrics using Musixmatch API
def fetch_lyrics_musixmatch(song):
    song_name = song['song_name']
    artist_name = song['artist_name']

    # Search for the song using Musixmatch API
    search_url = 'https://api.musixmatch.com/ws/1.1/track.search'
    params = {
        'q_track': song_name,
        'q_artist': artist_name,
        'page_size': 1,
        's_track_rating': 'desc',
        'apikey': MUSIXMATCH_API_KEY
    }

    response = requests.get(search_url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        if data['message']['header']['status_code'] == 200 and data['message']['body']['track_list']:
            track_id = data['message']['body']['track_list'][0]['track']['track_id']

            # Get the lyrics for the found track
            lyrics_url = 'https://api.musixmatch.com/ws/1.1/track.lyrics.get'
            lyrics_params = {
                'track_id': track_id,
                'apikey': MUSIXMATCH_API_KEY
            }
            lyrics_response = requests.get(lyrics_url, params=lyrics_params)

            if lyrics_response.status_code == 200:
                lyrics_data = lyrics_response.json()
                if lyrics_data['message']['header']['status_code'] == 200:
                    lyrics = lyrics_data['message']['body']['lyrics']['lyrics_body']
                    lyrics = lyrics.replace("******* This Lyrics is NOT for Commercial use *******", "").strip()
                    return {
                        'song_name': song_name,
                        'artist_name': artist_name,
                        'lyrics': lyrics
                    }
    
    return {
        'song_name': song_name,
        'artist_name': artist_name,
        'lyrics': None
    }

# # Load the songs from the JSON file
# input_file_path = 'songs_with_lyrics.json'

# with open(input_file_path, 'r', encoding='utf-8') as json_file:
#     songs_with_lyrics = json.load(json_file)

# Filter for songs without lyrics
songs_without_lyrics = [song for song in songs_with_lyrics if song['lyrics'] == ""]

# Use ThreadPoolExecutor to fetch lyrics in parallel
with ThreadPoolExecutor(max_workers=10) as executor:
    future_to_song = {executor.submit(fetch_lyrics_musixmatch, song): song for song in songs_without_lyrics}
    
    for future in as_completed(future_to_song):
        song = future_to_song[future]
        try:
            result = future.result()
            if result['lyrics']:
                # Update the song with fetched lyrics
                for song_in_list in songs_with_lyrics:
                    if song_in_list['song_name'] == result['song_name'] and song_in_list['artist_name'] == result['artist_name']:
                        song_in_list['lyrics'] = result['lyrics']
                print(f"Lyrics found for {result['song_name']} by {result['artist_name']}")
                
#                 #Append the lyrics to the JSON file
#                 with open(input_file_path, 'w', encoding='utf-8') as json_file:
#                     json.dump(songs_with_lyrics, json_file, ensure_ascii=False, indent=4)
            else:
                print(f"Lyrics not found for {song['song_name']} by {song['artist_name']}")
        except Exception as e:
            print(f"Error fetching lyrics for {song['song_name']}: {e}")

# print(f"Updated songs with lyrics saved back to {input_file_path}.")

Lyrics not found for Hattrick (feat. Yaygo Musalini) by Imran Khan
Lyrics found for Dj Waley Babu (feat. Aastha Gill) by Badshah
Lyrics not found for Do It To It (feat. Cherish) by Acraze
Lyrics found for Feel Hai by Bali & Badshah
Lyrics found for Familiar by Liam Payne & J Balvin
Lyrics found for Enjaay (feat. Sminil Agale, K. Shah & Kya.scene) by Chaar Diwaari
Lyrics found for ILL.I.Am by Badshah
Lyrics found for Driving Slow by Badshah
Lyrics found for Crazy n Love by Mickey Singh & Jay Skilly
Lyrics found for Delilah by MIKOLAS & Mark Neve
Lyrics found for Impossible by Zack Knight
Lyrics not found for Faasla (feat. Hasan Raheem) by Shamoon Ismail
Lyrics not found for Ichiban by Badshah
Lyrics found for Drinks On Me by Badshah & MC Stan
Lyrics found for Imaginary by Imran Khan
Lyrics found for I'm Outside by Mickey Singh & Jay Skilly
Lyrics found for Double Addi (feat. Dj Ice & 2 Nyce) by Mickey Singh & Amar Sandhu
Lyrics found for Galtiyan by Zack Knight
Lyrics not found for Dekh

In [14]:
# Load the songs from the JSON file
# input_file_path = '/kaggle/working/songs_with_lyrics.json'  # Replace with your file path

# with open(input_file_path, 'r', encoding='utf-8') as json_file:
#     songs_with_lyrics = json.load(json_file)

# Initialize counts
count_with_lyrics = 0
count_without_lyrics = 0

# Count songs with and without lyrics
for song in songs_with_lyrics:
    if song.get('lyrics'):
        count_with_lyrics += 1
    else:
        count_without_lyrics += 1

# Display the counts
print(f"Total songs with lyrics: {count_with_lyrics}")
print(f"Total songs without lyrics: {count_without_lyrics}")

Total songs with lyrics: 255
Total songs without lyrics: 56


In [15]:
# Function to fetch lyrics using Lyrics.ovh API
def fetch_lyrics_ovh(song):
    song_name = song['song_name']
    artist_name = song['artist_name']
    url = f"https://api.lyrics.ovh/v1/{artist_name}/{song_name}"
    response = requests.get(url)
    
    if response.status_code == 200:
        lyrics = response.json().get('lyrics')
        return {
            'song_name': song_name,
            'artist_name': artist_name,
            'lyrics': lyrics
        }
    return {
        'song_name': song_name,
        'artist_name': artist_name,
        'lyrics': None
    }

# Load the songs from the JSON file
# input_file_path = '/kaggle/working/songs_with_lyrics.json'

# with open(input_file_path, 'r', encoding='utf-8') as json_file:
#     songs_with_lyrics = json.load(json_file)

# Filter for songs without lyrics
songs_without_lyrics = [song for song in songs_with_lyrics if song['lyrics'] == ""]

# Use ThreadPoolExecutor to fetch lyrics in parallel
with ThreadPoolExecutor(max_workers=10) as executor:
    # Create a future for each song
    future_to_song = {executor.submit(fetch_lyrics_ovh, song): song for song in songs_without_lyrics}
    
    # Process the results as they complete
    for future in as_completed(future_to_song):
        song = future_to_song[future]
        try:
            result = future.result()
            if result['lyrics']:
                # Update the song with fetched lyrics
                for song_in_list in songs_with_lyrics:
                    if song_in_list['song_name'] == result['song_name'] and song_in_list['artist_name'] == result['artist_name']:
                        song_in_list['lyrics'] = result['lyrics']
                print(f"Lyrics found for {result['song_name']} by {result['artist_name']}")
                
                # Append the lyrics to the JSON file
#                 with open(input_file_path, 'w', encoding='utf-8') as json_file:
#                     json.dump(songs_with_lyrics, json_file, ensure_ascii=False, indent=4)
                    
            else:
                print(f"Lyrics not found for {song['song_name']} by {song['artist_name']}")
        except Exception as e:
            print(f"Error fetching lyrics for {song['song_name']}: {e}")

# print(f"Updated songs with lyrics saved back to {input_file_path}.")

Lyrics found for Do It To It (feat. Cherish) by Acraze
Lyrics not found for Hattrick (feat. Yaygo Musalini) by Imran Khan
Lyrics not found for Ichiban by Badshah
Lyrics not found for Harmonious by Pav Dharia
Lyrics not found for Faasla (feat. Hasan Raheem) by Shamoon Ismail
Lyrics not found for Getup Jawani by Badshah & Yo Yo Honey Singh
Lyrics not found for Galat Hogeya by Rishi Rich, Sukriti Kakar & Mumzy Stranger
Lyrics not found for Fast Cars & Superstars (feat. Reverend Haus) by Cristian Marchi
Lyrics not found for Dekho Nashe Mein by Shaan, KK, Sunidhi Chauhan & Pritam
Lyrics not found for Gangsta (feat. YG) by Karan Aujla
Lyrics not found for Feels Like by Mickey Singh & Jess Loco
Lyrics not found for Garage by Jass Manak & Avvy Sra
Lyrics not found for Body (feat. Sunny Brown & Fateh Doe) by Mickey Singh
Lyrics not found for Bitch by Dhanda Nyoliwala
Lyrics not found for Ay Papi (feat. Rameet Sandhu) by Sama Blake
Lyrics not found for Beat Pe Haley by Addy Nagar & Spoiler
Lyric

In [16]:
# Load the songs from the JSON file
# input_file_path = '/kaggle/input/song-with-lyrics/songs_with_lyrics.json'  # Replace with your file path

# Load the English words dictionary from nltk
english_words_set = set(words.words())

# with open(input_file_path, 'r', encoding='utf-8') as json_file:
#     songs_with_lyrics = json.load(json_file)

# Regex patterns for Hindi, English, and expanded pattern to allow newlines and more punctuation
hindi_pattern = re.compile(r'[\u0900-\u097F]')
english_pattern = re.compile(r'[A-Za-z]')
# Allow Hindi, English, numbers, common punctuation, and newlines
allowed_pattern = re.compile(r'^[\u0900-\u097F A-Za-z0-9,.\'\"\-?!\s\n]+$')

# Function to check if lyrics contain more Hindi than English characters
def is_hindi_majority(lyrics):
    hindi_count = len(hindi_pattern.findall(lyrics))
    english_count = len(english_pattern.findall(lyrics))
    # Return True if there are more Hindi characters, or equal to ensure majority
    return hindi_count > english_count

# Function to check if lyrics contain only allowed characters (Hindi/English)
def contains_only_hindi_and_english(lyrics):
    return True
    return bool(allowed_pattern.match(lyrics))

# Function to convert Hinglish words to Hindi, leaving valid English words intact
def convert_hinglish_to_hindi_preserving_english(lyrics):
    words_in_lyrics = lyrics.split()  # Split lyrics into words
    converted_lyrics = []
    
    for word in words_in_lyrics:
        # Check if the word is a valid English word
        if is_valid_english_word(word):
            converted_lyrics.append(word)  # Keep the English word as it is
        else:
            # Attempt to transliterate the word (treat as Hinglish)
            try:
                converted_word = transliterate(word, sanscript.ITRANS, sanscript.DEVANAGARI)
                converted_lyrics.append(converted_word)
            except Exception as e:
                # If transliteration fails, keep the word as it is
                converted_lyrics.append(word)
    
    return ' '.join(converted_lyrics)  # Return the mixed converted lyrics

# Function to check if lyrics are in Hinglish based on valid English words and Hinglish patterns
def is_hinglish(lyrics):
    words_in_lyrics = lyrics.split()  # Split lyrics into words
    hinglish_count = 0
    english_count = 0
    
    for word in words_in_lyrics:
        # Check if the word is a valid English word
        if is_valid_english_word(word):
            english_count += 1
        elif english_pattern.search(word):
            # If it contains English alphabet characters but isn't a valid English word, count it as Hinglish
            hinglish_count += 1
    
    # If the majority of words are valid English, it's not Hinglish
    if english_count >= hinglish_count:
        return False  # Consider it as English
    else:
        return True  # Consider it as Hinglish


# Function to check if a word is a valid English word
def is_valid_english_word(word):
    return word.lower() in english_words_set

    
# List to store the selected songs
selected_songs = []

non_selected_songs = []

common_hinglish_patterns = [
    r"tum", r"mein", r"hai", r"hoon", r"ke", r"ka", r"ki", r"tha", r"nahi", r"kya", r"haath", r"dekh", r"pyaar", r"jao"
]

# Iterate over the songs and select those with Hindi-majority lyrics
for song in songs_with_lyrics:
    lyrics = song.get('lyrics')
    if lyrics:
        # Remove unwanted characters (optional, clean up if needed)
        clean_lyrics = lyrics.replace("******* This Lyrics is NOT for Commercial use *******", "").strip()
        
        if is_hindi_majority(clean_lyrics) and contains_only_hindi_and_english(clean_lyrics):
            selected_songs.append(song)
        elif is_hinglish(clean_lyrics):
            clean_lyrics = convert_hinglish_to_hindi_preserving_english(clean_lyrics)
            song['lyrics'] = clean_lyrics  # Update the song's lyrics with the converted Hindi text
            selected_songs.append(song)
        else:
            non_selected_songs.append(song)

# Display the count of selected songs and their details
print(f"Total songs with Hindi-majority lyrics: {len(selected_songs)}")
for song in selected_songs:
    print(f"{song['song_name']} by {song['artist_name']}")

# Display the count of non-selected songs and their details
print(f"\nTotal non-selected songs: {len(non_selected_songs)}")
for song in non_selected_songs:
    print(f"{song['song_name']} by {song['artist_name']}")

Total songs with Hindi-majority lyrics: 154
Dj Waley Babu (feat. Aastha Gill) by Badshah
ILL.I.Am by Badshah
Driving Slow by Badshah
Feel Hai by Bali & Badshah
Enjaay (feat. Sminil Agale, K. Shah & Kya.scene) by Chaar Diwaari
Crazy n Love by Mickey Singh & Jay Skilly
Double Addi (feat. Dj Ice & 2 Nyce) by Mickey Singh & Amar Sandhu
Ghar Jaane De by Mandys
I'm Outside by Mickey Singh & Jay Skilly
Funk Song by Kidjaywest, Talwiinder & Ikath
Dream Boy by Babbal Rai
Hot Launde (feat. Fotty Seven & Bali) by Badshah
Gall Goriye by Raftaar & Maninder Buttar
Galat Karam by Panther & Raga
Coco by Sukh-E Muzical Doctorz
Emotional Fool by Sharib Toshi & Toshi Sabri
ICE by Raftaar
Hold Me by Mickey Singh
Doobey (From "Gehraiyaan") by OAFF, Savera, Lothika & Kausar Munir
Famous by Arjun Kanungo
Interstellar by Badshah
Half Window Down by Ikka, Dr Zeus & Neetu Singh
Hold Me by Mickey Singh & UpsideDown
Fall Off (Interlude) by KR$NA
Fast Cars by Badshah
GODSPEED by Tyson Sidhu
F16 by Raftaar
Ishq Ka 

In [17]:
# Append the lyrics to the JSON file
with open('hindi_songs_with_lyrics.json', 'w', encoding='utf-8') as json_file:
    json.dump(selected_songs, json_file, ensure_ascii=False, indent=4)