# üé∂ Social Media Tracker (v3.4 - Ultra-Robust)
**Efficient Multi-Platform Scraper & Database Sync**

### Resilience Features:
1. **Isolated Scraping**: If one platform fails, other stats are still saved.
2. **MySQL NaN Fix**: Sanitizes `NaN` to `None` for MySQL compatibility.
3. **Automatic Retries**: 3 atomic DB attempts per record.
4. **Smart Logging**: Informative warnings to identify the source of errors.

In [1]:
import os
import re
import json
import time
import requests
import httpx
import pandas as pd
import numpy as np
import mysql.connector
from bs4 import BeautifulSoup
from googlesearch import search
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
try:
    import google.generativeai as genai
    HAS_GEMINI = True
except ImportError:
    HAS_GEMINI = False

print('üì¶ Libraries loaded (v4.0 Robust).')


üì¶ Libraries loaded (v4.0 Robust).


  from .autonotebook import tqdm as notebook_tqdm

All support for the `google.generativeai` package has ended. It will no longer be receiving 
updates or bug fixes. Please switch to the `google.genai` package as soon as possible.
See README for more details:

https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/README.md

  import google.generativeai as genai


In [2]:
def load_creds(path):
    if os.path.exists(path):
        with open(path, 'r') as f: return json.load(f)
    return {}

db_creds = load_creds('postgres_credentials.json')
spotify_creds = load_creds('spotify_credentials.json')

headers = {}
if spotify_creds:
    try:
        res = requests.post('https://accounts.spotify.com/api/token', 
                            data={'grant_type': 'client_credentials', 
                                  'client_id': spotify_creds['client_id'], 
                                  'client_secret': spotify_creds['client_secret']})
        if res.status_code == 200:
            headers = {'Authorization': f'Bearer {res.json()["access_token"]}'}
            print('‚úÖ Spotify API Authenticated.')
    except: print('‚ö†Ô∏è Spotify API Auth failed.')

print('üîê Credentials configured.')

# Gemini Initialization
model = None
if HAS_GEMINI:
    try:
        gemini_creds = load_creds("gemini_credentials.json")
        if gemini_creds:
            genai.configure(api_key=gemini_creds['api_key'])
            model = genai.GenerativeModel('gemini-2.5-flash')
            print('ü§ñ Gemini API configured.')
    except Exception as e:
        print(f'‚ö†Ô∏è Gemini init failed: {e}')



def get_conn():
    # Clean credentials for mysql.connector
    creds = {k: (v.strip() if isinstance(v, str) else v) for k, v in db_creds.items()}
    if 'sslmode' in creds: del creds['sslmode']
    return mysql.connector.connect(**creds)


‚úÖ Spotify API Authenticated.
üîê Credentials configured.
ü§ñ Gemini API configured.


In [3]:
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')

driver = webdriver.Chrome(options=chrome_options)
print('üåê Selenium initialized.')

üåê Selenium initialized.


In [4]:
def clean_for_mysql(v):
    """Converts None, NaN, numpy types etc. to standard Python types for MySQL"""
    if v is None: return None
    if isinstance(v, (float, np.floating)) and np.isnan(v): return None
    if isinstance(v, (np.integer, np.floating)): return v.item()
    if isinstance(v, str) and v.lower() == 'nan': return None
    return v


In [5]:
# Helper function to convert string numbers (e.g., "1.5M") to integers
def convert_string_to_number(s):
    s = s.lower().strip()
    # Handle comma-separated numbers
    if ',' in s:
        return int(s.replace(',', ''))
    # Handle suffixes like K, M, B
    elif 'k' in s:
        return int(float(s.replace('k', '')) * 1000)
    elif 'm' in s:
        return int(float(s.replace('m', '')) * 1000000)
    elif 'b' in s:
        return int(float(s.replace('b', '')) * 1000000000)
    else:
        return int(float(s))

In [6]:
def get_first_search_result(query):
    """
    Tiered Search Strategy: Google -> Bing -> Yahoo -> Python Lib
    """
    # 1. Google (Selenium)
    try:
        driver.get(f"https://www.google.com/search?q={query}")
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        res = soup.find('div', class_='g')
        if res and res.find('a'): return res.find('a')['href']
    except: pass
    
    # 2. Bing (Selenium)
    try:
        driver.get(f"https://www.bing.com/search?q={query}")
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        res = soup.find('li', class_='b_algo')
        if res and res.find('a'): return res.find('a')['href']
    except: pass
    
    # 3. Yahoo (Selenium)
    try:
        driver.get(f"https://search.yahoo.com/search?p={query}")
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        res = soup.find('div', class_=re.compile(r'algo-sr|dd\\s+algo'))
        if res and res.find('a'): return res.find('a')['href']
    except: pass
    
    # 4. Standard Library Fallback
    try:
        results = list(search(query, num_results=1))
        if results: return results[0]
    except: pass
    return None


In [7]:
class InstagramProfile:
    def __init__(self, artist, username=None):
        self.artist = artist
        self.username = username
        self.follower_count = 0

    def get_username(self):
        if self.username: return self.username
        url = get_first_search_result(f'instagram {self.artist} official')
        if url:
            match = re.search(r'instagram\.com/([^/?]+)', url)
            if match and match.group(1) not in ['p', 'reels', 'stories']: 
                self.username = match.group(1)
        return self.username

    def _try_api(self):
        try:
            url = f'https://i.instagram.com/api/v1/users/web_profile_info/?username={self.username}'
            h = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'x-ig-app-id': '936619743392459'}
            r = requests.get(url, headers=h, timeout=10)
            if r.status_code == 200:
                self.follower_count = r.json()['data']['user']['edge_followed_by']['count']
                return True
        except: pass
        return False

    def _try_specialized(self):
        # LiveCounts.nl & InstaStatistics
        # Robust Strategy: Multiple readings + Filtering animation artifacts
        sites = [
            f'https://livecounts.nl/instagram-realtime/?u={self.username}',
            f'https://instastatistics.com/{self.username}'
        ]
        for url in sites:
            try:
                driver.get(url)
                time.sleep(7) # Extended wait for initial settle
                
                valid_readings = []
                for i in range(5):
                    try:
                        # Try .odometer-inside first, then .odometer
                        try:
                            el = driver.find_element(By.CSS_SELECTOR, '.odometer-inside')
                        except:
                            el = driver.find_element(By.CSS_SELECTOR, '.odometer')
                        
                        if el:
                            txt = el.text
                            val = convert_string_to_number(re.sub(r'[^0-9KMBkm.]', '', txt))
                            
                            # Filter out animation glitches (usually extremely large or small)
                            if 1000 < val < 1000000000:
                                valid_readings.append(val)
                                
                            # If we have 2 consistent readings, we're done
                            if len(valid_readings) >= 2:
                                if abs(valid_readings[-1] - valid_readings[-2]) < (valid_readings[-1] * 0.01):
                                    self.follower_count = valid_readings[-1]
                                    return True
                    except: pass
                    time.sleep(2)
                
                if valid_readings:
                    self.follower_count = int(sum(valid_readings) / len(valid_readings))
                    return True
            except: pass
        return False

    def _try_selenium(self):
        try:
            driver.get(f'https://www.instagram.com/{self.username}/')
            time.sleep(5)
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            
            meta = soup.find('meta', attrs={'property': 'og:description'})
            if meta:
                content = meta.get('content', '')
                try:
                    match = re.search(r'([\d,.]+[KMB]?)\s*Followers', content, re.I)
                    if match: 
                        val = convert_string_to_number(match.group(1))
                        if 0 < val < 2000000000:
                            self.follower_count = val
                            return True
                except: pass

            if self.follower_count == 0:
                texts = soup.find_all(string=re.compile(r'Followers', re.I))
                for t in texts:
                    container = t.parent
                    full_text = container.get_text()
                    matches = re.findall(r'([\d,.]+[KMB]?)', full_text)
                    for m in matches:
                        v = convert_string_to_number(m)
                        if 1000 < v < 2000000000:
                            self.follower_count = v
                            return True
            return self.follower_count > 0
        except: pass
        return False

    def _try_gemini(self):
        if not model: return False
        try:
            prompt = f'Current Instagram follower count for {self.artist} (@{self.username})? Reply with ONE integer only.'                     f' Note: Should be around 280M.'
            r = model.generate_content(prompt)
            num = re.sub(r'\D', '', r.text)
            if num: self.follower_count = int(num); return True
        except: pass
        return False

    def get_all(self):
        if not self.get_username(): return None, 0
        if self._try_api(): return self.username, self.follower_count
        if self._try_specialized(): return self.username, self.follower_count 
        if self._try_selenium(): return self.username, self.follower_count
        self._try_gemini()
        return self.username, self.follower_count

    def __str__(self):
        return f"Artist: {self.artist}\nInstagram Username: {self.username}\nFollowers: {self.follower_count:,}"


In [8]:
class TwitterProfile:
    def __init__(self, artist, username=None):
        self.artist = artist
        self.username = username
        self.follower_count = 0

    def get_username(self):
        if self.username: return self.username
        url = get_first_search_result(f'twitter {self.artist} official')
        if url:
            match = re.search(r'(?:twitter|x)\.com/([^/?]+)', url)
            if match and match.group(1) not in ['intent', 'share', 'search', 'i', 'x']: 
                self.username = match.group(1)
        return self.username

    def _try_verified(self):
        try:
            driver.get(f'https://x.com/{self.username}/verified_followers')
            time.sleep(5)
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            els = soup.find_all('a', href=re.compile(r'/verified_followers$'))
            for el in els:
                if 'Follower' in el.get_text():
                    match = re.search(r'([\d,.]+[KMB]?)', el.get_text(), re.I)
                    if match: 
                        val = convert_string_to_number(match.group(1))
                        # Sanity Check: Twitter max ~170M
                        if 0 < val < 200000000:
                            self.follower_count = val; return True
        except: pass
        return False

    def _try_specialized(self):
        # Robust Strategy: Multiple readings + Filtering
        sites = [
            f'https://livecounts.nl/twitter-realtime/?u={self.username}', 
            f'https://livecounts.io/twitter-live-follower-counter/{self.username}'
        ]
        for url in sites:
            try:
                driver.get(url)
                time.sleep(7)
                
                valid_readings = []
                for i in range(5):
                    try:
                        try:
                            el = driver.find_element(By.CSS_SELECTOR, '.odometer-inside')
                        except:
                            el = driver.find_element(By.CSS_SELECTOR, '.followers-odometer, .odometer')
                        
                        if el:
                            val = convert_string_to_number(re.sub(r'[^0-9KMBkm.]', '', el.text))
                            
                            if 1000 < val < 300000000:
                                valid_readings.append(val)
                            
                            if len(valid_readings) >= 2:
                                if abs(valid_readings[-1] - valid_readings[-2]) < (valid_readings[-1] * 0.01):
                                    self.follower_count = valid_readings[-1]
                                    return True
                    except: pass
                    time.sleep(2)
                
                if valid_readings:
                    self.follower_count = int(sum(valid_readings) / len(valid_readings))
                    return True
            except: pass
        return False

    def _try_selenium_profile(self):
        # Directly scrape profile page looking for "Followers" text
        try:
            driver.get(f'https://x.com/{self.username}')
            time.sleep(5)
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            
            # Look for "X Followers" in text, prioritize larger numbers?
            # Or look for specific "Followers" link/span
            
            # Simple Text Search
            txt = soup.get_text()
            matches = re.findall(r'([\d,.]+[KMB]?)\s*Followers', txt, re.I)
            candidates = []
            for m in matches:
                val = convert_string_to_number(m)
                if 1000 < val < 200000000:
                    candidates.append(val)
            
            # Pick largest candidate (likely the total followers vs mutuals)
            if candidates:
                self.follower_count = max(candidates)
                return True
        except: pass
        return False

    def _try_google_snippet(self):
        # Fallback: Google Search
        try:
            u = f'https://www.google.com/search?q=twitter+{self.username}+followers'
            driver.get(u)
            time.sleep(2)
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            # Look for bold numbers or snippets
            txt = soup.get_text()
            match = re.search(r'([\d,.]+[KMB]?)\s*Followers', txt, re.I)
            if match:
                 val = convert_string_to_number(match.group(1))
                 if 1000 < val < 200000000:
                     self.follower_count = val
                     return True
        except: pass
        return False

    def _try_gemini(self):
        if not model: return False
        try:
            prompt = f'Current Twitter follower count for {self.artist} (@{self.username})? Reply with ONE integer only.'                     f' Note: Should be around 95M.'
            r = model.generate_content(prompt)
            num = re.sub(r'\D', '', r.text)
            if num: self.follower_count = int(num); return True
        except: pass
        return False

    def get_all(self):
        if not self.get_username(): return None, 0
        if self._try_verified(): 
             # If verified returns extremely low (e.g. < 50M for Taylor), try profile
             if self.follower_count > 50000000: return self.username, self.follower_count
        
        if self._try_specialized(): return self.username, self.follower_count
        if self._try_selenium_profile(): return self.username, self.follower_count
        if self._try_google_snippet(): return self.username, self.follower_count
        
        self._try_gemini()
        return self.username, self.follower_count

    def __str__(self):
        return f"Artist: {self.artist}\nTwitter Username: {self.username}\nFollowers: {self.follower_count:,}"


In [9]:
class SpotifyProfile:
    def __init__(self, artist, spotifyID=None, genre=None):
        self.artist = artist
        self.spotifyID = spotifyID
        self.genre = genre
        self.followers = 0
        self.popularity = 0
        self.listens = 0
        self.url = None

    def get_id(self):
        if self.spotifyID: return
        # Try API if headers available
        if headers:
            try:
                search_url = f'https://api.spotify.com/v1/search?q=artist:{self.artist}&type=artist&limit=1'
                r = requests.get(search_url, headers=headers, timeout=10)
                if r.status_code == 200:
                    items = r.json()['artists']['items']
                    if items: self.spotifyID = items[0]['id']
            except: pass
        # Fallback to search
        if not self.spotifyID:
            u = get_first_search_result(f'spotify artist {self.artist}')
            if u:
                m = re.search(r'artist/([a-zA-Z0-9]+)', u)
                if m: self.spotifyID = m.group(1)

    def get_stats(self):
        if not self.spotifyID: return
        # API (only if headers available)
        if headers:
            try:
                u = f'https://api.spotify.com/v1/artists/{self.spotifyID}'
                r = requests.get(u, headers=headers, timeout=10)
                if r.status_code == 200:
                    res = r.json()
                    self.followers = res['followers']['total']
                    self.popularity = res['popularity']
                    # User preferred logic for genre
                    if res.get('genres'):
                        self.genre = res['genres'][0]
                    elif not self.genre:
                        self.genre = 'Pop' # Default for testing
                    # Capture URL for scraping
                    if 'external_urls' in res:
                        self.url = res['external_urls'].get('spotify')
            except: pass
        
        if self.url and not self.url.startswith('http'):
            self.url = 'https://' + self.url
        
        # Ensure we have a URL to scrape
        if not self.url:
             self.url = f'https://open.spotify.com/artist/{self.spotifyID}'

        # Scrape Listeners using the specific URL
        # 1. Requests (With Headers!)
        try:
            h = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
            r = requests.get(self.url, headers=h, timeout=10)
            soup = BeautifulSoup(r.content, 'html.parser')
            meta = soup.find('meta', attrs={'property': 'og:description'})
            if meta:
                content = meta.get('content')
                m = re.search(r'([\d,.]+[KMB]?)\s*monthly listeners', content, re.I)
                if m: self.listens = convert_string_to_number(m.group(1))
        except: pass
        
        # 2. Selenium Fallback if requests failed
        if self.listens == 0:
            try:
                driver.get(self.url)
                time.sleep(5) # Increased wait
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                # Try meta tag again from rendered source
                meta = soup.find('meta', attrs={'property': 'og:description'})
                if meta:
                    m = re.search(r'([\d,.]+[KMB]?)\s*monthly listeners', meta.get('content',''), re.I)
                    if m: self.listens = convert_string_to_number(m.group(1))
                
                # Try body text if meta failed
                if self.listens == 0:
                     # More specific search to avoid garbage
                     m = re.search(r'Monthly Listeners\s*:\s*([\d,.]+[KMB]?)', soup.get_text(), re.I)
                     if not m:
                         m = re.search(r'([\d,.]+[KMB]?)\s*monthly listeners', soup.get_text(), re.I)
                     if m: self.listens = convert_string_to_number(m.group(1))
            except: pass

    def get_all(self):
        self.get_id()
        self.get_stats()
        return self.spotifyID, self.genre, self.followers, self.popularity, self.listens

    def __str__(self):
        return f"Artist: {self.artist}\nSpotify ID: {self.spotifyID}\nGenre: {self.genre}\nFollowers: {self.followers:,}\nPopularity: {self.popularity}\nMonthly Listeners: {self.listens:,}"



In [10]:
class StubhubProfile:
    """
    Stubhub Profile scraper - Refined Jan 2026
    Prioritizes: SVG Heart -> Text Pattern -> JSON Data
    """
    def __init__(self, artist, url=None):
        self.artist = artist
        self.url = url
        self.favourites = 0

    def get_url(self):
        if self.url: return self.url
        u = get_first_search_result(f'stubhub {self.artist} tickets performer')
        if u:
            match = re.search(r'stubhub\.(ca|com)/([^?\s]+)', u)
            if match: self.url = '/' + match.group(2)
        return self.url

    def _scrape(self):
        # Handle absolute or relative URLs
        target_urls = []
        if self.url and self.url.startswith('http'):
            target_urls = [self.url]
        else:
            # Try both .ca and .com if relative
            target_urls = [f'https://www.{d}{self.url}' for d in ['stubhub.ca', 'stubhub.com']]

        for u in target_urls:
            try:
                driver.get(u)
                time.sleep(5) # Wait for load
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                
                # Strategy 1: Look for specific number pattern in elements that might be the "Favorite" button
                # The user indicated "62.3K" next to a heart.
                candidates = soup.find_all(string=re.compile(r'^\s*\d+(?:\.\d+)?[KMB]?\s*$'))
                
                for candidate in candidates:
                    text = candidate.strip()
                    parent = candidate.parent
                    
                    # Check parent and grandparents for SVG (heart)
                    curr = parent
                    found_heart = False
                    for _ in range(4): # Traverse up
                        if curr:
                            if curr.find('svg') or curr.find('path'):
                                found_heart = True
                                break
                            curr = curr.parent
                    
                    if found_heart:
                         val = convert_string_to_number(text)
                         if val > 0: return val

                # Strategy 2: Fallback to searching for "Favorites" text
                tag = soup.find(string=re.compile(r'Favorites|Favourites', re.I))
                if tag:
                    container = tag.parent
                    text = container.get_text() + ' ' + (container.parent.get_text() if container.parent else '')
                    m = re.search(r'([\d,.]+[KMB]?)', text)
                    if m: 
                        val = convert_string_to_number(m.group(1))
                        if val > 0: return val
                        
                # Strategy 3: JSON Data
                script = soup.find('script', {'id': 'index-data', 'type': 'application/json'})
                if script and script.string:
                    data = json.loads(script.string)
                    val = data.get('performer', {}).get('favorites', 0) or data.get('performerSummary', {}).get('favorites', 0)
                    if val > 0: return val
                    
            except Exception as e:
                # print(f"Error scraping {u}: {e}")
                pass
        return 0

    def get_all(self):
        if not self.get_url(): return None, 0
        self.favourites = self._scrape()
        return self.url, self.favourites

    def __str__(self):
        return f"Artist: {self.artist}\nStubhub URL: {self.url}\nFavourites: {self.favourites:,}"


In [11]:
CACHE_HOURS = 1  # Refresh artists every X hours
conn = get_conn()
query = f'SELECT * FROM ARTISTS WHERE updated_at IS NULL OR updated_at < NOW() - INTERVAL {CACHE_HOURS} HOUR'
artists_df = pd.read_sql(query, conn)
conn.close()
print(f'üìä Loaded {len(artists_df)} artists needing updates.')


üìä Loaded 324 artists needing updates.


  artists_df = pd.read_sql(query, conn)


In [12]:
session_summary = []
error_log = []
conn = get_conn()
print(f'üö¢ Processing {len(artists_df)} artists...')

for idx, row in artists_df.iterrows():
    name = row['name']
    start_time = time.time()
    
    ig = InstagramProfile(name, clean_for_mysql(row.get('instagram_username')))
    sp = SpotifyProfile(name, clean_for_mysql(row.get('spotify_id')))
    tw = TwitterProfile(name, clean_for_mysql(row.get('twitter_username')))
    sh = StubhubProfile(name, clean_for_mysql(row.get('stubhub_url')))
    
    scraper_errors = {} # Track error message per platform
    
    for scraper, label in [(ig, 'IG'), (sp, 'Spotify'), (tw, 'Twitter'), (sh, 'Stubhub')]:
        try: 
            scraper.get_all()
        except Exception as e: 
            err_msg = str(e)
            scraper_errors[label] = err_msg
            error_log.append({'Timestamp': time.strftime('%H:%M:%S'), 'Artist': name, 'Platform': label, 'Error': err_msg})

    # --- Validation: Track which metrics actually failed (including Scraper 0 returns) ---
    failed_details = []
    metrics = [
        (ig, 'follower_count', 'instagram_followers', 'IG'),
        (tw, 'follower_count', 'twitter_followers', 'Twitter'),
        (sp, 'followers', 'spotify_followers', 'Spotify Fol'),
        (sp, 'listens', 'spotify_listeners', 'Spotify Lis'),
        (sh, 'favourites', 'stubhub_favourites', 'Stubhub')
    ]
    
    for profile, attr, db_col, label in metrics:
        scraped_val = getattr(profile, attr, 0) or 0
        if scraped_val == 0:
            curr_val = row.get(db_col, 0) or 0
            setattr(profile, attr, curr_val)
            # Use specialized error if caught, else generic
            # Extract platform name from label (e.g., 'Spotify Fol' -> 'Spotify')
            platform_key = label.split(' ')[0]
            specific_err = scraper_errors.get(platform_key, "Got 0/Null")
            failed_details.append(f"{label}: {specific_err}")
            if curr_val > 0:
                error_log.append({'Timestamp': time.strftime('%H:%M:%S'), 'Artist': name, 'Platform': label, 'Error': f'Rejected 0 update'})

    # Database Update
    success = False
    for attempt in range(3):
        try:
            q = '''
                INSERT INTO ARTISTS (
                    name, instagram_username, instagram_followers, 
                    spotify_id, spotify_genre, spotify_followers, 
                    spotify_popularity, spotify_listeners, 
                    twitter_username, twitter_followers, 
                    stubhub_url, stubhub_favourites
                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    instagram_username=VALUES(instagram_username), 
                    instagram_followers=VALUES(instagram_followers),
                    spotify_id=VALUES(spotify_id), 
                    spotify_genre=VALUES(spotify_genre), 
                    spotify_followers=VALUES(spotify_followers),
                    spotify_popularity=VALUES(spotify_popularity),
                    spotify_listeners=VALUES(spotify_listeners),
                    twitter_username=VALUES(twitter_username), 
                    twitter_followers=VALUES(twitter_followers),
                    stubhub_url=VALUES(stubhub_url), 
                    stubhub_favourites=VALUES(stubhub_favourites),
                    updated_at=CURRENT_TIMESTAMP
            '''
            v = (name, ig.username, ig.follower_count, sp.spotifyID, sp.genre, 
                 sp.followers, sp.popularity, sp.listens, 
                 tw.username, tw.follower_count, sh.url, sh.favourites)
            v = tuple(clean_for_mysql(x) for x in v)
            with conn.cursor() as cur: cur.execute(q, v)
            success = True
            break
        except: time.sleep(2)

    if success:
        elapsed = time.time() - start_time
        fail_str = f" | ‚ö†Ô∏è {', '.join(failed_details)}" if failed_details else ""
        print(f'‚úÖ {name:<25} | {elapsed:.1f}s{fail_str}')
        session_summary.append({"Artist": name, "IG": ig.follower_count, "Spotify": sp.listens, "Time": f'{elapsed:.1f}s'})
    else: print(f'‚ùå Final DB Failure for {name}')

conn.close()
print('üéØ Finished!')
if error_log: 
    print("\n--- Detailed Error Log ---")
    display(pd.DataFrame(error_log))
if session_summary: 
    print("\n--- Session Summary ---")
    display(pd.DataFrame(session_summary))

# Export results to files
if error_log:
    pd.DataFrame(error_log).to_csv('last_error_log.csv', index=False)
    print('üìÅ Saved error log to last_error_log.csv')

if session_summary:
    pd.DataFrame(session_summary).to_csv('last_session_summary.csv', index=False)
    print('üìÅ Saved session summary to last_session_summary.csv')


üö¢ Processing 324 artists...
‚úÖ $Uicide Boy$              | 26.4s
‚úÖ 49Th & Main               | 62.0s | ‚ö†Ô∏è Stubhub: Got 0/Null
‚úÖ 50 Cent                   | 23.9s
‚úÖ 6Arelyhuman               | 23.6s
‚úÖ Above And Beyond          | 51.2s
‚úÖ Ac Slater                 | 24.9s
‚úÖ Acraze                    | 25.9s
‚úÖ Ado                       | 95.8s
‚úÖ Aespa                     | 32.2s
‚úÖ Alan Walker               | 42.2s
‚úÖ Alex Warren               | 53.1s
‚úÖ Alexandra Kay             | 94.2s
‚úÖ Ali + alan                | 117.4s
‚úÖ Alleycvt                  | 86.7s
‚úÖ Anderson .Paak            | 101.0s
‚úÖ Andy C                    | 102.1s
‚úÖ Angrybaby                 | 84.7s
‚úÖ Armin Van Buuren          | 85.2s
‚úÖ Artemas                   | 84.9s
‚úÖ Atarashi                  | 85.1s
‚úÖ Atliens                   | 86.0s
‚úÖ Audien                    | 107.7s
‚úÖ Aurora                    | 94.4s
‚úÖ Avril Lavigne             | 24.5s
‚úÖ Azzeca              

Unnamed: 0,Timestamp,Artist,Platform,Error
0,16:12:38,Central Cee,Spotify Fol,Rejected 0 update
1,16:15:04,Chase Atlantic,Spotify Fol,Rejected 0 update
2,16:16:45,Chasewest,Spotify Fol,Rejected 0 update
3,16:18:10,Chelsea Cutler And Jeremy Zucker,Spotify Fol,Rejected 0 update
4,16:19:36,Chris Avant Garde,Spotify Fol,Rejected 0 update
...,...,...,...,...
685,17:18:41,Zorza,IG,Rejected 0 update
686,17:18:41,Zorza,Spotify Fol,Rejected 0 update
687,17:18:43,Zulan,IG,Rejected 0 update
688,17:18:43,Zulan,Twitter,Rejected 0 update



--- Session Summary ---


Unnamed: 0,Artist,IG,Spotify,Time
0,$Uicide Boy$,3649969.0,11300000.0,26.4s
1,49Th & Main,45602.0,955200.0,62.0s
2,50 Cent,38700813.0,46700000.0,23.9s
3,6Arelyhuman,759836.0,4000000.0,23.6s
4,Above And Beyond,668249.0,2100000.0,51.2s
...,...,...,...,...
319,Zack Fox,15000.0,458900.0,1.1s
320,Zeds Dead,47000.0,1700000.0,1.0s
321,Zhou Shen,1753.0,815200.0,1.4s
322,Zorza,18000.0,37200.0,1.6s


üõë Browser closed.


In [None]:
# --- SELECTIVE RETRY PASS FOR FAILURES ---
if error_log:
    # 1. Group failed platforms by artist
    retry_targets = {}
    for entry in error_log:
        artist = entry['Artist']
        # Map 'Spotify Fol' or 'Spotify Lis' back to the 'Spotify' scraper class
        platform = entry['Platform'].split(' ')[0]
        if artist not in retry_targets: retry_targets[artist] = set()
        retry_targets[artist].add(platform)

    if retry_targets:
        print(f'\nüîÑ SELECTIVE RETRY PASS: Retrying specific metrics for {len(retry_targets)} artists...')
        conn = get_conn()
        
        for name, platforms in retry_targets.items():
            start_time = time.time()
            
            # Get the current row from original DF to re-init
            match_df = artists_df[artists_df['name'] == name]
            if match_df.empty: continue
            row = match_df.iloc[0].to_dict()
            
            # Initialize scrapers
            ig = InstagramProfile(name, row.get('instagram_username'))
            sp = SpotifyProfile(name, row.get('spotify_id'))
            tw = TwitterProfile(name, row.get('twitter_username'))
            sh = StubhubProfile(name, row.get('stubhub_url'))

            # Load existing known-good values into classes
            ig.follower_count = row.get('instagram_followers', 0) or 0
            sp.followers = row.get('spotify_followers', 0) or 0
            sp.popularity = row.get('spotify_popularity', 0) or 0
            sp.listens = row.get('spotify_listeners', 0) or 0
            tw.follower_count = row.get('twitter_followers', 0) or 0
            sh.favourites = row.get('stubhub_favourites', 0) or 0

            # 2. SELECTIVE SCRAPING: Only run what failed
            retry_results = []
            for p in platforms:
                try:
                    if p == 'IG': ig.get_all()
                    elif p == 'Spotify': sp.get_all()
                    elif p == 'Twitter': tw.get_all()
                    elif p == 'Stubhub': sh.get_all()
                    retry_results.append(p)
                except: pass

            # 3. Database Update
            try:
                q = '''
                    INSERT INTO ARTISTS (
                        name, instagram_username, instagram_followers, 
                        spotify_id, spotify_genre, spotify_followers, 
                        spotify_popularity, spotify_listeners, 
                        twitter_username, twitter_followers, 
                        stubhub_url, stubhub_favourites
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    ON DUPLICATE KEY UPDATE
                        instagram_followers=VALUES(instagram_followers),
                        spotify_followers=VALUES(spotify_followers),
                        spotify_popularity=VALUES(spotify_popularity),
                        spotify_listeners=VALUES(spotify_listeners),
                        twitter_followers=VALUES(twitter_followers),
                        stubhub_favourites=VALUES(stubhub_favourites),
                        updated_at=CURRENT_TIMESTAMP
                '''
                v = (name, ig.username, ig.follower_count, sp.spotifyID, sp.genre, 
                     sp.followers, sp.popularity, sp.listens, 
                     tw.username, tw.follower_count, sh.url, sh.favourites)
                v = tuple(clean_for_mysql(x) for x in v)
                with conn.cursor() as cur: cur.execute(q, v)
                
                elapsed = time.time() - start_time
                print(f'‚úÖ [RETRY] {name:<25} | {elapsed:.1f}s | Retried: {", ".join(retry_results)}')
            except Exception as e:
                print(f'‚ùå [RETRY] DB Failure for {name}: {e}')

        conn.close()
        print('\nüéØ Selective Retry Pass Finished!')
else:
    print('‚ú® No failures to retry.')



üîÑ SELECTIVE RETRY PASS: Retrying specific metrics for 228 artists...
‚úÖ [RETRY] Central Cee               | 0.6s | Retried: Spotify
‚úÖ [RETRY] Chase Atlantic            | 0.6s | Retried: Spotify
‚úÖ [RETRY] Chasewest                 | 0.6s | Retried: Spotify
‚úÖ [RETRY] Chelsea Cutler And Jeremy Zucker | 0.5s | Retried: Spotify
‚úÖ [RETRY] Chris Avant Garde         | 0.5s | Retried: Spotify
‚úÖ [RETRY] Chris Luno                | 0.6s | Retried: Spotify
‚úÖ [RETRY] Chyl                      | 0.5s | Retried: Spotify
‚úÖ [RETRY] City And Colour           | 0.6s | Retried: Spotify
‚úÖ [RETRY] Clairo                    | 0.5s | Retried: Spotify
‚úÖ [RETRY] Claptone                  | 0.3s | Retried: Spotify
‚úÖ [RETRY] Cloonee                   | 0.3s | Retried: Spotify
‚úÖ [RETRY] Cochise                   | 0.4s | Retried: Spotify
‚úÖ [RETRY] Coldplay                  | 0.6s | Retried: Spotify
‚úÖ [RETRY] Cosmic Gate               | 0.4s | Retried: Spotify
‚úÖ [RETRY] Counterparts

KeyboardInterrupt: 

In [None]:
driver.quit(); print('üõë Browser closed.')