In [24]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
from bs4 import BeautifulSoup
import numpy as np
import json


track_data = []

def process_track(item,k):
    """Process a single track entry and extract details."""
    try:
        data = item.text.strip()
        print(f"Processing: {data}")

        # Split the data by " ×Share Track" to isolate the track info
        track_info = data.split(" ×Share Track")[0].strip()

        # Split the track info into parts
        parts = track_info.split()

        # Ensure there are enough parts to process
        if len(parts) < 3:
            print(f"Skipping entry due to insufficient data: {track_info}")
            return None

        # Extract Song Name (first two parts)
        song_name = " ".join(parts[:2])

        # Extract Artist (parts after the song name until the duration)
        artist_index = 2
        while artist_index < len(parts) and not parts[artist_index].replace(":", "").isdigit():
            artist_index += 1

        # If no valid duration is found, skip this entry
        if artist_index >= len(parts):
            print(f"Skipping entry due to missing duration: {track_info}")
            return None

        artist = " ".join(parts[2:artist_index])

        # Extract Duration (the next part after the artist)
        duration = parts[artist_index]

        # Extract Genres (remaining parts after the duration)
        genres = " ".join(parts[artist_index + 1:]).split("  ")
        main_genre = genres[0].strip() if genres else "Unknown"

        # Return the track information as a dictionary
        return {
            "Song Name": song_name,
            "Artist": artist,
            "Duration": duration,
            "Main Genre": main_genre,
            "audio-link":str(k)
        }
    except Exception as e:
        print(f"Error processing entry: {item.text}\n{e}")
        return None
# Setup WebDriver
driver = webdriver.Chrome()  # Ensure you have ChromeDriver set up properly
driver.get('https://hoopr.ai/search?q=&t=music')
time.sleep(30)
# Simulate scrolling to load all dynamic content
last_height = driver.execute_script("return document.body.scrollHeight")

# Parameters for iterations
max_iterations = 5  # Set a limit to prevent infinite scrolling
current_iteration = 0

# Scroll until all content is loaded or max iterations reached
while current_iteration < max_iterations:
    print(f"Scrolling iteration: {current_iteration + 1}")
    # Scroll to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Adjust this sleep as needed
    
    # Check if new content is loaded
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        print("No more new content to load.")
        break
    last_height = new_height
    current_iteration += 1

# Now that the page is fully loaded, parse it with BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'html.parser')

# Locate elements dynamically (e.g., a clickable button inside a specific class container)
print("Finding and clicking buttons within containers...")
containers = driver.find_elements(By.CLASS_NAME, 'ng-star-inserted')  # Replace 'x' with the actual container class name

i=0
for container in containers:
         
    
    try:
        container_html = container.get_attribute('outerHTML')
         
    # Parse the container HTML using BeautifulSoup
        soup = BeautifulSoup(container_html, 'html.parser')
        # Find the button within the container
        button = container.find_element(By.CLASS_NAME, 'play-pause-icon')
        
        
        button.click()
        time.sleep(3)  # Allow any resulting dynamic content to load
        
        # Extract updated content from the DOM
        updated_soup = BeautifulSoup(driver.page_source, 'html.parser')
        updated_global_tags = updated_soup.find('audio', {'preload': 'auto'})  # Replace with actual class name
        print("Updated global tags:")
        print(updated_global_tags)
        if i % 2 == 0:
         track = process_track(soup,updated_global_tags['src'])
         if track:
          track_data.append(track)
        i=i+1
         
    except Exception as e:
        print(f"Error while clicking or extracting: {e}")

# Keep the WebDriver open for further actions if needed
print("WebDriver remains open for further actions.")
driver.quit()

Scrolling iteration: 1
Scrolling iteration: 2
Scrolling iteration: 3
Scrolling iteration: 4
Scrolling iteration: 5
Finding and clicking buttons within containers...
Error while clicking or extracting: Message: no such element: Unable to locate element: {"method":"css selector","selector":".play-pause-icon"}
  (Session info: chrome=131.0.6778.204); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF6C589FB05+28789]
	(No symbol) [0x00007FF6C58086E0]
	(No symbol) [0x00007FF6C56A592A]
	(No symbol) [0x00007FF6C56F930E]
	(No symbol) [0x00007FF6C56F95FC]
	(No symbol) [0x00007FF6C56EC6EC]
	(No symbol) [0x00007FF6C571F47F]
	(No symbol) [0x00007FF6C56EC5B6]
	(No symbol) [0x00007FF6C571F650]
	(No symbol) [0x00007FF6C573F654]
	(No symbol) [0x00007FF6C571F1E3]
	(No symbol) [0x00007FF6C56EA938]
	(No symbol) [0x00007FF6C56EBAA1]
	GetHandleVerifier [0x00007FF6C5BD933D+

In [25]:
output_file = "track_data.json"
with open(output_file, "w", encoding="utf-8") as json_file:
    json.dump(track_data, json_file, indent=4, ensure_ascii=False)

print(f"Data saved to '{output_file}'.")

Data saved to 'track_data.json'.
