# Scrape data from Indeed

### Import necessary moduels 
Please note that this code requires a Firefox browser. 

In [None]:
import os  
import sys 
from csv import writer  

# Add utils directory to Python path
print(os.getcwd()) 
sys.path.append('./utils')  

# Import utility functions
import utils.web_scraping_utils as scraper

### Job title definitions

In [None]:
# Define job search keywords
JOB_SEARCH_KEYWORDS = [
    'Data+Analyst',
    'Data+Scientist', 
    'Product+Analyst',
    'BI+Analyst'
]

# Define country configurations (base URLs and city lists)
COUNTRY_CONFIGS = {
    'USA': {
        'base_url': 'https://www.indeed.com',
        'cities': ['New+York, NY', 'Los+Angeles, CA', 'Chicago, IL']
    },
    'France': {
        'base_url': 'https://www.indeed.fr',
        'cities': ['Paris (75)', 'Marseille (13)', 'Lyon (69)']
    },
    'Italy': {
        'base_url': 'https://it.indeed.com',
        'cities': ['Roma, Lazio', 'Milano, Lombardia', 'Napoli, Campania']
    },
    'Sweden': {
        'base_url': 'https://se.indeed.com',
        'cities': ['Stockholm', 'Göteborg', 'Malmö']
    }
}

### Run this cell to select a country

In [None]:
# Display available countries
available_countries = list(COUNTRY_CONFIGS.keys())
print('Select a country from the following list:')
for i, country in enumerate(available_countries, 1):
    print(f"{i}. {country}")

# Get user selection
while True:
    try:
        country_choice = int(input('Enter the number of your choice: '))
        if 1 <= country_choice <= len(available_countries):
            selected_country = available_countries[country_choice - 1]
            break
        print(f'Please enter a number between 1 and {len(available_countries)}')
    except ValueError:
        print('Please enter a valid number')

print(f"You have selected: {selected_country}")

# Get country configuration using dictionary unpacking
base_url, location_search_keywords = COUNTRY_CONFIGS[selected_country].values()

### Main script 
Saves the data to a CSV file. 

In [None]:
driver = scraper.initialize_driver()
try:
    with open(f'indeed_jobs_{selected_country}.csv', 'w', newline='', encoding='utf-8') as f:
        csv_writer = writer(f)
        csv_writer.writerow([
            'page', 'country', 'job_link', 'search_keyword', 'search_location',
            'job_title', 'company_name', 'company_location', 'salary', 'job_description'
        ])
        scraper.scrape_jobs(
            csv_writer=csv_writer,
            driver=driver,
            job_keywords=JOB_SEARCH_KEYWORDS,
            location_keywords=location_search_keywords,
            selected_country=selected_country,
            base_url=base_url
        )
finally:
    if driver:
        driver.quit()