In [1]:
import sys
print(sys.version)


3.9.6 (default, Oct 18 2022, 12:41:40) 
[Clang 14.0.0 (clang-1400.0.29.202)]


In [2]:
import requests

url = "https://httpbin.org/get"  # A test API that returns request details
response = requests.get(url)

print("Status Code:", response.status_code)  # Should print 200 if successful
print("Response JSON:", response.json())  # Prints the response in JSON format




Status Code: 200
Response JSON: {'args': {}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.32.3', 'X-Amzn-Trace-Id': 'Root=1-67cdda8b-50fb53ef6549d08b37c15266'}, 'origin': '172.59.152.206', 'url': 'https://httpbin.org/get'}


In [6]:
import requests
from bs4 import BeautifulSoup

# Target URL
url = "https://cars.ksl.com/search/body/Convertible"

# Use a user-agent to avoid basic bot detection
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}

# Send request
response = requests.get(url, headers=headers)

# Check if request was successful
print("Status Code:", response.status_code)

# Parse the HTML
soup = BeautifulSoup(response.text, "html.parser")

# Extract car titles
car_titles = [title.text.strip() for title in soup.select(".listing-title")]

# Print results
print("Found Cars:", car_titles[:5])  # Print first 5 car titles


Status Code: 200
Found Cars: []


In [None]:
import asyncio
import nest_asyncio
from playwright.async_api import async_playwright
import pandas as pd

# Patch asyncio to work in Jupyter
nest_asyncio.apply()

# Target URL
url = "https://cars.ksl.com/search/body/Convertible"

async def scrape_ksl():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)  # Set True to hide the browser
        context = await browser.new_context()
        page = await context.new_page()

        # Visit the page
        print("Opening KSL Cars...")
        await page.goto(url, wait_until="networkidle")

        # Extract car titles
        car_titles = await page.locator(".listing-title").all_inner_texts()

        await browser.close()
        return car_titles

# Create and get the event loop
loop = asyncio.get_event_loop()
# Run the async function
cars = loop.run_until_complete(scrape_ksl())

# Save results
df = pd.DataFrame({"Car Titles": cars})
df.to_csv("ksl_cars.csv", index=False)

print("✅ Data saved to ksl_cars.csv!")

Opening KSL Cars...


TimeoutError: Page.goto: Timeout 30000ms exceeded.
Call log:
  - navigating to "https://cars.ksl.com/search/body/Convertible", waiting until "networkidle"


In [10]:
import asyncio
import nest_asyncio
from playwright.async_api import async_playwright, TimeoutError
from playwright_stealth import stealth_async
import pandas as pd

# Patch asyncio to work in Jupyter
nest_asyncio.apply()

# Target URL
url = "https://cars.ksl.com/search/body/Convertible"

async def handle_popups(page):
    try:
                
        # Wait for and handle location popup
        print("Checking for location popup...")
        location_button = page.locator('button:has-text("Use this location")')
        if await location_button.count() > 0:
            print("Accepting location...")
            await location_button.click()
            await page.wait_for_timeout(1000)  # Wait for popup to disappear    

        # Wait for and handle cookie consent popup
        print("Checking for cookie consent popup...")
        cookie_button = page.locator('button[aria-label="Accept"]').first
        if await cookie_button.count() > 0:
            print("Accepting cookies...")
            await cookie_button.click()
            await page.wait_for_timeout(1000)  # Wait for popup to disappear

            
    except Exception as e:
        print(f"Error handling popups: {e}")

async def scrape_ksl():
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=False,
            args=[
                '--disable-blink-features=AutomationControlled',
                '--disable-dev-shm-usage',
                '--no-sandbox'
            ]
        )
        
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
        )
        
        page = await context.new_page()
        await stealth_async(page)
        
        try:
            print("Opening KSL Cars...")
            await page.goto(
                url,
                wait_until="domcontentloaded",
                timeout=60000
            )
            
            # Handle any popups before proceeding
            await handle_popups(page)
            
            print("Waiting for listings to load...")
            await page.wait_for_selector(".listing-title", timeout=60000)
            
            # Add a small delay to ensure dynamic content loads
            await page.wait_for_timeout(2000)
            
            # Extract car titles
            car_titles = await page.locator(".listing-title").all_inner_texts()
            print(f"Found {len(car_titles)} listings")
            
            # Optional: Take a screenshot to verify what the page looks like
            await page.screenshot(path="ksl_cars_page.png")
            
            return car_titles
            
        except TimeoutError as e:
            print(f"Timeout error: {e}")
            print("Taking error screenshot...")
            await page.screenshot(path="error_screenshot.png")
            return []
        except Exception as e:
            print(f"An error occurred: {e}")
            print("Taking error screenshot...")
            await page.screenshot(path="error_screenshot.png")
            return []
        finally:
            await browser.close()

try:
    loop = asyncio.get_event_loop()
    cars = loop.run_until_complete(scrape_ksl())

    if cars:
        df = pd.DataFrame({"Car Titles": cars})
        df.to_csv("ksl_cars.csv", index=False)
        print("✅ Data saved to ksl_cars.csv!")
    else:
        print("❌ No car listings were found")
        
except Exception as e:
    print(f"Failed to run scraper: {e}")

Opening KSL Cars...
Checking for location popup...
Checking for cookie consent popup...
Waiting for listings to load...
Found 24 listings
✅ Data saved to ksl_cars.csv!


In [12]:
import asyncio
import nest_asyncio
from playwright.async_api import async_playwright, TimeoutError
from playwright_stealth import stealth_async
import pandas as pd

# Patch asyncio to work in Jupyter
nest_asyncio.apply()

# Target URL
url = "https://cars.ksl.com/search/body/Convertible"

async def handle_popups(page):
    try:
        # Wait for and handle location popup
        print("Checking for location popup...")
        location_button = page.locator('button:has-text("Use this location")')
        if await location_button.count() > 0:
            print("Accepting location...")
            await location_button.click()
            await page.wait_for_timeout(1000)

        # Wait for and handle cookie consent popup
        print("Checking for cookie consent popup...")
        cookie_button = page.locator('button[aria-label="Accept"]').first
        if await cookie_button.count() > 0:
            print("Accepting cookies...")
            await cookie_button.click()
            await page.wait_for_timeout(1000)
            
    except Exception as e:
        print(f"Error handling popups: {e}")

async def scrape_ksl():
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=False,
            args=[
                '--disable-blink-features=AutomationControlled',
                '--disable-dev-shm-usage',
                '--no-sandbox'
            ]
        )
        
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
        )
        
        page = await context.new_page()
        await stealth_async(page)
        
        try:
            print("Opening KSL Cars...")
            await page.goto(
                url,
                wait_until="domcontentloaded",
                timeout=60000
            )
            
            await handle_popups(page)
            
            print("Waiting for listings to load...")
            await page.wait_for_selector(".listing-title", timeout=60000)
            await page.wait_for_timeout(2000)
            
            # Extract all listings
            listings = await page.locator(".listing").all()
            cars_data = []
            
            print("Extracting listing details...")
            for listing in listings:
                try:
                    # Extract title and URL
                    title_element = listing.locator(".listing-title a").first
                    title = await title_element.inner_text()
                    href = await title_element.get_attribute("href")
                    full_url = f"https://cars.ksl.com{href}" if href else ""
                    
                    # Extract price (handle potential missing prices)
                    price_element = listing.locator(".listing-price")
                    price = await price_element.inner_text() if await price_element.count() > 0 else "N/A"
                    
                    # Extract location (handle potential missing locations)
                    location_element = listing.locator(".seller-location")
                    location = await location_element.inner_text() if await location_element.count() > 0 else "N/A"
                    
                    cars_data.append({
                        "title": title,
                        "price": price,
                        "location": location,
                        "url": full_url
                    })
                except Exception as e:
                    print(f"Error extracting listing details: {e}")
                    continue
            
            print(f"Found {len(cars_data)} complete listings")
            return cars_data
            
        except TimeoutError as e:
            print(f"Timeout error: {e}")
            await page.screenshot(path="error_screenshot.png")
            return []
        except Exception as e:
            print(f"An error occurred: {e}")
            await page.screenshot(path="error_screenshot.png")
            return []
        finally:
            await browser.close()

try:
    loop = asyncio.get_event_loop()
    cars_data = loop.run_until_complete(scrape_ksl())

    if cars_data:
        # Create DataFrame with all the extracted data
        df = pd.DataFrame(cars_data)
        
        # Save to CSV with all columns
        df.to_csv("ksl_cars_detailed.csv", index=False)
        print("✅ Data saved to ksl_cars_detailed.csv!")
        
        # Display first few entries
        print("\nFirst few listings:")
        print(df.head())
    else:
        print("❌ No car listings were found")
        
except Exception as e:
    print(f"Failed to run scraper: {e}")

Opening KSL Cars...
Checking for location popup...
Checking for cookie consent popup...
Waiting for listings to load...
Extracting listing details...
Error extracting listing details: Locator.inner_text: Timeout 30000ms exceeded.
Call log:
  - waiting for locator(".listing").first.locator(".listing-title a").first

Error extracting listing details: Locator.inner_text: Timeout 30000ms exceeded.
Call log:
  - waiting for locator(".listing").nth(1).locator(".listing-title a").first



KeyboardInterrupt: 

Error extracting listing details: Locator.inner_text: Timeout 30000ms exceeded.
Call log:
  - waiting for locator(".listing").nth(2).locator(".listing-title a").first

Error extracting listing details: Locator.inner_text: Timeout 30000ms exceeded.
Call log:
  - waiting for locator(".listing").nth(3).locator(".listing-title a").first

Error extracting listing details: Locator.inner_text: Timeout 30000ms exceeded.
Call log:
  - waiting for locator(".listing").nth(4).locator(".listing-title a").first

Error extracting listing details: Locator.inner_text: Target page, context or browser has been closed
Call log:
  - waiting for locator(".listing").nth(5).locator(".listing-title a").first

Error extracting listing details: Locator.inner_text: Target page, context or browser has been closed
Error extracting listing details: Locator.inner_text: Target page, context or browser has been closed
Error extracting listing details: Locator.inner_text: Target page, context or browser has been closed


In [14]:
import asyncio
import nest_asyncio
from playwright.async_api import async_playwright, TimeoutError
from playwright_stealth import stealth_async
import pandas as pd

# Patch asyncio to work in Jupyter
nest_asyncio.apply()

# Target URL
url = "https://cars.ksl.com/search/body/Convertible"

async def handle_popups(page):
    try:
        # Wait for and handle location popup
        print("Checking for location popup...")
        location_button = page.locator('button:has-text("Use this location")')
        if await location_button.count() > 0:
            print("Accepting location...")
            await location_button.click()
            await page.wait_for_timeout(1000)

        # Wait for and handle cookie consent popup
        print("Checking for cookie consent popup...")
        cookie_button = page.locator('button[aria-label="Accept"]').first
        if await cookie_button.count() > 0:
            print("Accepting cookies...")
            await cookie_button.click()
            await page.wait_for_timeout(1000)
            
    except Exception as e:
        print(f"Error handling popups: {e}")

async def scrape_ksl():
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=False,
            args=[
                '--disable-blink-features=AutomationControlled',
                '--disable-dev-shm-usage',
                '--no-sandbox'
            ]
        )
        
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
        )
        
        page = await context.new_page()
        await stealth_async(page)
        
        try:
            print("Opening KSL Cars...")
            await page.goto(url, wait_until="domcontentloaded", timeout=60000)
            await handle_popups(page)
            
            print("Waiting for listings to load...")
            # Wait for any listing to be present (using the selector that worked before)
            await page.wait_for_selector(".listing-title", state="attached", timeout=60000)
            
            # Add a longer delay for dynamic content
            await page.wait_for_timeout(5000)
            
            print("Extracting listing details...")
            # Use evaluate to extract data directly from the page context
            cars_data = await page.evaluate("""() => {
                const listings = Array.from(document.querySelectorAll('.listing'));
                return listings.map(listing => {
                    const titleElement = listing.querySelector('.listing-title a');
                    const priceElement = listing.querySelector('.listing-price');
                    const locationElement = listing.querySelector('.seller-location');
                    
                    return {
                        title: titleElement ? titleElement.innerText.trim() : 'N/A',
                        price: priceElement ? priceElement.innerText.trim() : 'N/A',
                        location: locationElement ? locationElement.innerText.trim() : 'N/A',
                        url: titleElement ? 'https://cars.ksl.com' + titleElement.getAttribute('href') : ''
                    };
                });
            }""")
            
            print(f"Found {len(cars_data)} listings")
            
            # Take a screenshot for verification
            await page.screenshot(path="ksl_cars_page.png")
            return cars_data
            
        except TimeoutError as e:
            print(f"Timeout error: {e}")
            print("Taking error screenshot...")
            await page.screenshot(path="error_screenshot.png")
            return []
        except Exception as e:
            print(f"An error occurred: {e}")
            print("Taking error screenshot...")
            await page.screenshot(path="error_screenshot.png")
            return []
        finally:
            await browser.close()

try:
    loop = asyncio.get_event_loop()
    cars_data = loop.run_until_complete(scrape_ksl())

    if cars_data:
        # Create DataFrame with all the extracted data
        df = pd.DataFrame(cars_data)
        
        # Save to CSV with all columns
        df.to_csv("ksl_cars_detailed.csv", index=False)
        print("✅ Data saved to ksl_cars_detailed.csv!")
        
        # Display first few entries
        print("\nFirst few listings:")
        print(df.head())
    else:
        print("❌ No car listings were found")
        
except Exception as e:
    print(f"Failed to run scraper: {e}")

Opening KSL Cars...
Checking for location popup...
Checking for cookie consent popup...
Waiting for listings to load...
Extracting listing details...
Found 24 listings
✅ Data saved to ksl_cars_detailed.csv!

First few listings:
  title price location url
0   N/A   N/A      N/A    
1   N/A   N/A      N/A    
2   N/A   N/A      N/A    
3   N/A   N/A      N/A    
4   N/A   N/A      N/A    


In [15]:
import asyncio
import nest_asyncio
from playwright.async_api import async_playwright, TimeoutError
from playwright_stealth import stealth_async
import pandas as pd

# Patch asyncio to work in Jupyter
nest_asyncio.apply()

# Target URL
url = "https://cars.ksl.com/search/body/Convertible"

async def handle_popups(page):
    try:
        # Wait for and handle location popup
        print("Checking for location popup...")
        location_button = page.locator('button:has-text("Use this location")')
        if await location_button.count() > 0:
            print("Accepting location...")
            await location_button.click()
            await page.wait_for_timeout(1000)

        # Wait for and handle cookie consent popup
        print("Checking for cookie consent popup...")
        cookie_button = page.locator('button[aria-label="Accept"]').first
        if await cookie_button.count() > 0:
            print("Accepting cookies...")
            await cookie_button.click()
            await page.wait_for_timeout(1000)
            
    except Exception as e:
        print(f"Error handling popups: {e}")

async def scrape_ksl():
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=False,
            args=[
                '--disable-blink-features=AutomationControlled',
                '--disable-dev-shm-usage',
                '--no-sandbox'
            ]
        )
        
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
        )
        
        page = await context.new_page()
        await stealth_async(page)
        
        try:
            print("Opening KSL Cars...")
            await page.goto(url, wait_until="domcontentloaded", timeout=60000)
            await handle_popups(page)
            
            print("Waiting for listings to load...")
            # Wait for any listing to be present
            await page.wait_for_selector('[data-testid="test-responsive"]', state="attached", timeout=60000)
            
            # Add a longer delay for dynamic content
            await page.wait_for_timeout(5000)
            
            print("Extracting listing details...")
            # Use evaluate to extract data directly from the page context
            cars_data = await page.evaluate("""() => {
                const listings = Array.from(document.querySelectorAll('[data-testid="test-responsive"]'));
                return listings.map(listing => {
                    // Get the raw listing data from the data-listing attribute
                    const listingData = JSON.parse(listing.getAttribute('data-listing') || '{}');
                    
                    // Get elements using more specific selectors
                    const titleElement = listing.querySelector('.listing-title');
                    const priceElement = listing.querySelector('.Listing__Prices-sc-1v5k5vh-10 p');
                    const locationLink = listing.querySelector('.Listing__LocationLink-sc-1v5k5vh-9');
                    const msrpElement = listing.querySelector('p[style*="font-size: 0.8em"]');
                    const mileageElement = listing.querySelector('p[style*="margin-top: -2px"]');
                    
                    return {
                        title: titleElement ? titleElement.innerText.trim() : 'N/A',
                        price: priceElement ? priceElement.innerText.trim() : 'N/A',
                        location: locationLink ? locationLink.innerText.trim() : 'N/A',
                        mileage: mileageElement ? mileageElement.innerText.trim() : 'N/A',
                        msrp: msrpElement ? msrpElement.innerText.trim() : 'N/A',
                        // Data from the listing attribute
                        make: listingData.make || 'N/A',
                        model: listingData.model || 'N/A',
                        trim: listingData.trim || 'N/A',
                        year: listingData.makeYear || 'N/A',
                        vin: listingData.vin || 'N/A',
                        sellerType: listingData.sellerType || 'N/A',
                        url: listingData.id ? `https://cars.ksl.com/listing/${listingData.id}` : ''
                    };
                }).filter(item => item.title !== 'N/A');  // Filter out empty listings
            }""")
            
            print(f"Found {len(cars_data)} listings")
            
            # Take a screenshot for verification
            await page.screenshot(path="ksl_cars_page.png")
            return cars_data
            
        except TimeoutError as e:
            print(f"Timeout error: {e}")
            print("Taking error screenshot...")
            await page.screenshot(path="error_screenshot.png")
            return []
        except Exception as e:
            print(f"An error occurred: {e}")
            print("Taking error screenshot...")
            await page.screenshot(path="error_screenshot.png")
            return []
        finally:
            await browser.close()

try:
    loop = asyncio.get_event_loop()
    cars_data = loop.run_until_complete(scrape_ksl())

    if cars_data:
        # Create DataFrame with all the extracted data
        df = pd.DataFrame(cars_data)
        
        # Save to CSV with all columns
        df.to_csv("ksl_cars_detailed.csv", index=False)
        print("✅ Data saved to ksl_cars_detailed.csv!")
        
        # Display first few entries
        print("\nFirst few listings:")
        print(df.head())
    else:
        print("❌ No car listings were found")
        
except Exception as e:
    print(f"Failed to run scraper: {e}")

Opening KSL Cars...
Checking for location popup...
Checking for cookie consent popup...
Waiting for listings to load...
Extracting listing details...
Found 24 listings
✅ Data saved to ksl_cars_detailed.csv!

First few listings:
                                   title     price           location  \
0       2017 Buick Cascada Sport Touring   $12,997   South Jordan, UT   
1              2024 Ford Bronco Badlands   $60,684      Bountiful, UT   
2              2024 Ford Bronco Badlands   $63,310      Bountiful, UT   
3     2023 Ford Bronco Wildtrak Advanced   $52,659  American Fork, UT   
4  2025 Mercedes-Benz SL-Class AMG SL 55  $166,045         Draper, UT   

        mileage           msrp           make     model               trim  \
0  99,749 Miles            N/A          Buick   Cascada      Sport Touring   
1       2 Miles   MSRP $67,785           Ford    Bronco           Badlands   
2       2 Miles   MSRP $70,670           Ford    Bronco           Badlands   
3  15,120 Miles      