In [9]:
! pip install selenium 
! pip install webdriver-manager

Collecting webdriver-manager
  Downloading webdriver_manager-3.7.0-py2.py3-none-any.whl (25 kB)
Collecting python-dotenv
  Downloading python_dotenv-0.20.0-py3-none-any.whl (17 kB)
Installing collected packages: python-dotenv, webdriver-manager
Successfully installed python-dotenv-0.20.0 webdriver-manager-3.7.0


In [45]:
import sys
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from time import sleep, ctime
from collections import namedtuple
from threading import Thread
from os.path import isfile
import csv


## Chrome option

In [None]:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')

In [11]:
# Install last webdriver and bypass webdrive PATH install
wd =  webdriver.Chrome(ChromeDriverManager().install(),chrome_options=chrome_options)




[WDM] - Current google-chrome version is 101.0.4951
[WDM] - Get LATEST chromedriver version for 101.0.4951 google-chrome
[WDM] - There is no [win32] chromedriver for browser 101.0.4951 in cache
[WDM] - About to download new driver from https://chromedriver.storage.googleapis.com/101.0.4951.41/chromedriver_win32.zip
[WDM] - Driver has been saved in cache [C:\Users\alexa\.wdm\drivers\chromedriver\win32\101.0.4951.41]
  wd =  webdriver.Chrome(ChromeDriverManager().install(),chrome_options=chrome_options)
  wd =  webdriver.Chrome(ChromeDriverManager().install(),chrome_options=chrome_options)


In [13]:
# Access webpage
URL = 'https://bandcamp.com'
wd.get(URL)

## Launch Track

In [24]:
wd.find_elements(by=By.CLASS_NAME, value='playbutton')[0].click()

## List discover track

In [26]:
tracks = wd.find_elements(by=By.CLASS_NAME, value='discover-item')

In [30]:
tracks[0].text # music name

"God's Country\nChat Pile\nmetal"

## Exploring the catalogue

In [38]:
# get the next item
next_button = [e for e in wd.find_elements(by=By.CLASS_NAME, value='item-page') if e.text.lower().find('next') > -1][0]

In [39]:
next_button.click()

# Building the class

In [59]:
class BandLeader():
    def __init__(self, csvpath = None):
        self.database_path=csvpath # filepath
        self.database = [] # initialize database

        # Load database from disk if exist
        if isfile(self.database_path):
            with open(self.database_path, newline='') as dbfile:
                dbreader = csv.reader(dbfile)
                next(dbreader)   # To ignore the header line
                self.database = [TrackRec._make(rec) for rec in dbreader]
                
                
        # Create a headless browser
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')    
        self.browser = webdriver.Chrome(ChromeDriverManager().install(),chrome_options=chrome_options) # Install last webdriver and bypass webdrive PATH install
        self.browser.get(BANDCAMP_FRONTPAGE)

        # Track list related state
        self._current_track_number = 1
        self.track_list = []
        self.tracks()
        
        # State for the database
        self.database = []
        self._current_track_record = None

        # The database maintenance thread
        self.thread = Thread(target=self._maintain)
        self.thread.daemon = True    # Kills the thread with the main process dies
        self.thread.start()

        self.tracks()
        
        
    def _maintain(self):
        while True:
            self._update_db()
            sleep(15)          # Check every 15 seconds


    def _update_db(self): 
        """
        Append new track to database list
        """
        try:
            check = (self._current_track_record is not None
                     and self._current_track_record is not None
                     and (len(self.database) == 0
                          or self.database[-1] != self._current_track_record)
                     and self.is_playing())
            if check:
                self.database.append(self._current_track_record)
                self.save_db()

        except Exception as e:
            print('error while updating the db: {}'.format(e)) # threw error if update failed
                  
                  
    def save_db(self):
        """
        save database to a specified database_path csv file
        """
        with open(self.database_path,'w',newline='') as dbfile:
            dbwriter = csv.writer(dbfile)
            dbwriter.writerow(list(TrackRec._fields))
            for entry in self.database:
                dbwriter.writerow(list(entry))
                
                
    def tracks(self):
        '''
        Query the page to populate a list of available tracks.
        '''

        # Sleep to give the browser time to render and finish any animations
        sleep(1)

        # Get the container for the visible track list
        discover_section = self.browser.find_element_by_class_name('discover-results')
        left_x = discover_section.location['x']
        right_x = left_x + discover_section.size['width']

        # Filter the items in the list to include only those we can click
        discover_items = self.browser.find_elements_by_class_name('discover-item')
        self.track_list = [t for t in discover_items
                           if t.location['x'] >= left_x and t.location['x'] < right_x]

        # Print the available tracks to the screen
        for (i,track) in enumerate(self.track_list):
            print('[{}]'.format(i+1))
            lines = track.text.split('\n')
            print('Album  : {}'.format(lines[0]))
            print('Artist : {}'.format(lines[1]))
            if len(lines) > 2:
                print('Genre  : {}'.format(lines[2]))

    def catalogue_pages(self):
        '''
        Print the available pages in the catalogue that are presently
        accessible.
        '''
        print('PAGES')
        for e in self.browser.find_elements_by_class_name('item-page'):
            print(e.text)
        print('')


    def more_tracks(self,page='next'):
        '''
        Advances the catalogue and repopulates the track list. We can pass in a number
        to advance any of the available pages.
        '''

        next_btn = [e for e in self.browser.find_elements_by_class_name('item-page')
                    if e.text.lower().strip() == str(page)]

        if next_btn:
            next_btn[0].click()
            self.tracks()

    def play(self, track=None):
        '''
        Play a track. If no track number is supplied, the presently selected track
        will play.
        '''
        if track is None:
            self.browser.find_element_by_class_name('playbutton').click()
        elif type(track) is int and track <= len(self.track_list) and track >= 1:
            self._current_track_number = track
            self.track_list[self._current_track_number - 1].click()

        sleep(0.5)
        if self.is_playing():
            self._current_track_record = self.currently_playing()



    def play_next(self):
        '''
        Plays the next available track
        '''
        if self._current_track_number < len(self.track_list):
            self.play(self._current_track_number+1)
        else:
            self.more_tracks()
            self.play(1)


    def pause(self):
        '''
        Pauses the playback
        '''
        self.play()

        def is_playing(self):
            '''
            Returns `True` if a track is presently playing
            '''
        playbtn = self.browser.find_element_by_class_name('playbutton')
        return playbtn.get_attribute('class').find('playing') > -1


    def currently_playing(self):
        '''
        Returns the record for the currently playing track,
        or None if nothing is playing
        '''
        try:
            if self.is_playing():
                title = self.browser.find_element_by_class_name('title').text
                album_detail = self.browser.find_element_by_css_selector('.detail-album > a')
                album_title = album_detail.text
                album_url = album_detail.get_attribute('href').split('?')[0]
                artist_detail = self.browser.find_element_by_css_selector('.detail-artist > a')
                artist = artist_detail.text
                artist_url = artist_detail.get_attribute('href').split('?')[0]
                return TrackRec(title, artist, artist_url, album_title, album_url, ctime())

        except Exception as e:
            print('there was an error: {}'.format(e))

        return None


In [55]:
TrackRec = namedtuple('TrackRec', [
    'title', 
    'artist',
    'artist_url', 
    'album',
    'album_url', 
    'timestamp'  # When you played it
])