In [2]:
import utils
import re

In [None]:
# Assuming you have the HTML content in search_cont variable
# If search_cont is HTML text, parse it: soup = BeautifulSoup(search_cont, 'html.parser')

# If search_cont is already a BeautifulSoup object
soup = search_cont

# Find all car listing elements
car_listings = soup.find_all('a', href=re.compile(r'^/item/\d+'))

# Initialize lists to store data
car_data = []

# Currency exchange rates (example rates, you might want to update these)
EXCHANGE_RATES = {
    '֏': 0.0025,  # Armenian Dram to USD (approximate)
    '€': 1.12,    # Euro to USD (approximate)
    '£': 1.30,    # British Pound to USD (approximate)
}

In [None]:
for car in car_listings:
    try:
        # Extract car model and description
        car_model_elem = car.find('div', class_=False)  # The first div without class
        car_model = car_model_elem.get_text(strip=True) if car_model_elem else "N/A"
        
        # Parse car title into make, model, and description
        make, model, description = parse_car_title(car_model)
        
        # Extract price
        price_elem = car.find('div', class_='p')
        price_str = price_elem.get_text(strip=True) if price_elem else "N/A"
        price_usd = convert_price(price_str)
        
        # Extract location and details
        at_elem = car.find('div', class_='at')
        at_text = at_elem.get_text(strip=True) if at_elem else "N/A"
        
        # Parse the location and details text
        # Format: "Location, Year, Mileage, FuelType"
        details_parts = at_text.split(', ')
        
        if len(details_parts) >= 4:
            location = details_parts[0]
            year_str = details_parts[1].replace('թ.', '').strip()
            mileage_str = details_parts[2]
            fuel_type = details_parts[3]
            
            # Convert year to integer
            try:
                year = int(year_str) if year_str.isdigit() else None
            except:
                year = None
                
            # Convert mileage to km
            mileage_km = convert_mileage(mileage_str)
        else:
            location = at_text
            year = None
            mileage_km = None
            fuel_type = "N/A"
        
        # Extract date
        date_elem = car.find('div', class_='d')
        date_str = date_elem.get_text(strip=True) if date_elem else "N/A"
        date_posted = parse_armenian_date(date_str)
        
        # Calculate age difference from 2025
        age_diff = 2025 - year if year else None
        
        # Extract image URL if needed
        img_elem = car.find('img')
        img_url = img_elem.get('src') or img_elem.get('data-original') if img_elem else "N/A"
        
        # Add to our data collection
        car_data.append({
            'make': make,
            'model': model,
            'description': description,
            'price_usd': price_usd,
            'location': location,
            'year': year,
            'mileage_km': mileage_km,
            'fuel_type': fuel_type,
            'date_posted': date_posted,
            'age_diff_2025': age_diff,
            'image_url': img_url,
            'item_url': car.get('href', 'N/A')
        })
    except Exception as e:
        print(f"Error processing car listing: {e}")
        continue

# Create DataFrame
df = pd.DataFrame(car_data)

# Display the first few rows
print(f"Found {len(df)} car listings")
print(df.head())

# Optional: Save to CSV
# df.to_csv('car_listings.csv', index=False, encoding='utf-8')