In [None]:
import pylast
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import pymongo
from pymongo import MongoClient
import logging
import auth

In [None]:
# Create logger
logger = logging.getLogger('lastfm_logger')
logger.setLevel(logging.DEBUG)

# Create handlers
f_handler = logging.FileHandler('debug.log')
f_handler.setLevel(logging.DEBUG)

# Logging formatter
f_format = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
f_handler.setFormatter(f_format)

logger.addHandler(f_handler)

In [None]:
# Import values from external .py file

API_KEY = auth.API_KEY
API_SECRET = auth.API_SECRET
username = auth.username
password = auth.password
password_hash = auth.password_hash

In [None]:
network = pylast.LastFMNetwork(api_key=API_KEY, api_secret=API_SECRET, username=username, password_hash=password_hash)
logger.info('Network connection established')

In [None]:
mongo = MongoClient()

db = mongo['project-05']

db_user = db['Usernames']

In [None]:
url_base = 'http://last.fm/music/artist/+listeners?page='
pop_artist_list = ['Taylor Swift', 'Selena Gomez', 'Lorde', 'Halsey', 'Lana Del Ray', 'Marina & The Diamonds',
                   'Tove Lo', 'Charli XCX', 'Lady Gaga', 'Gwen Stefani', 'Justin Bieber', 'Ed Sheeran', 'Dua Lipa',
                   'Ariana Grande', 'Katy Perry', 'Little Mix', 'Shawn Mendes', 'Sam Smith', 'Passenger', 'Adele']
rock_artist_list = ['The Beatles', 'Muse', 'Coldplay', 'Nirvana', 'Radiohead', 'Red Hot chili Peppers', 'Queen',
                   'Foo Fighters', 'Linkin Park', 'Led Zeppelin', 'Pink Floyd', 'The Rolling Stones', 'Green Day',
                   'The Killers', 'Oasis', 'Green Day', 'The Doors', 'Aerosmith', 'Pearl Jam', 'The Strokes']
rap_artist_list = ['Eminem', 'Kanye West', '2Pac', 'Snoop Dogg', 'Nas', '50 Cent', 'Kendrick Lamar', 'OutKast',
                   'Ludacris', 'Fort Minor', 'The Roots', 'Ice Cube', 'DMX', 'Mos Def', 'Lupe Fiasco', 'Nelly', 
                   'Nicki Minaj', 'Drake', 'Coolio', 'Rick Ross', 'Rakim', 'Warren G']



In [None]:
def url_list(artist_list):
    url_list = []

    for artist in artist_list:
        for i in range(1, 10):
            url_list.append(url_base.replace('artist',artist.replace(' ', '+'))+str(i))
            
    return url_list           

In [None]:
def follower_scrape(url):
    username_list = []
    
    browser.get(url)
    
    for i in range(1,32):
        xpath = '//*[@id="mantle_skin"]/div[3]/div[2]/div[1]/div/ol/li[{}]/div/h3/a'.format(i)
        try:
            temp_username = browser.find_elements_by_xpath(xpath)[0].text
            if db_user.count_documents({'Username': temp_username}, limit=1) != 0:
                pass
            else:
                username_list.append(temp_username)
            logger.debug(f'{i} completed')
        except IndexError:
            logger.error(f'Index error at {i}, skipping...')
    
    return username_list

In [None]:
browser = webdriver.Firefox()
browser.get('http://secure.last.fm/login')

username1 = browser.find_element_by_id('id_username')
password1 = browser.find_element_by_id('id_password')
submit = browser.find_element_by_name('submit')

username1.send_keys(username)
password1.send_keys(password)

submit.click()

In [None]:
result_list = []

pop = url_list(pop_artist_list)

rock = url_list(rock_artist_list)

rap = url_list(rap_artist_list)

for url in pop:    
    result_list = result_list + follower_scrape(url)

for url in rock:
    result_list = result_list + follower_scrape(url)
    
for url in rap:
    result_list = result_list + follower_scrape(url)

In [None]:
browser.quit()

In [None]:
un_list = set(result_list)

In [None]:
def user_top_tracks(username, limit, playcount):
    try:
        user = network.get_user(username)

        top_temp = user.get_top_tracks(limit=limit)
        top = []

        if limit > len(top_temp):
            limit = len(top_temp)

        for i in range(limit):
            if top_temp[i].weight <= playcount:
                logger.debug('Song Range Limit reached; terminating process...')
                break
            else:
                top.append((top_temp[i].item.title, top_temp[i].item.artist.get_name(), top_temp[i].weight))
                logger.debug(f'{i + 1} completed, {limit - i - 1} remaining')

        return top
    except:
        logger.error('User not found')
        pass

In [None]:
for follower in un_list:
    if db_user.count_documents({'Username': follower}, limit=1) != 0:
        print('{} is already listed'.format(follower))
        pass
    else:
        db_user.insert_one({'Username': follower, 'Tracks': user_top_tracks(follower, 750, 10)})
        print('{} added.'.format(follower))