In [5]:
## 🧪 Lab Task: Data Collection from RemoteOK

#As a data scientist, the first step in any project is to collect relevant and structured data. In this exercise, your task is to extract job-related information from the RemoteOK job website: [https://remoteok.com/r](https://remoteok.com/r).

### 🎯 Objectives:
#- Collect the following data fields for each job posting:
 # - **Company Name**
  #- **Job Role**
  #- **Location**
  #- **Features or Tags** (e.g., technologies, benefits, job type)

### 🛠 Instructions:
#- Use Python along with libraries such as `requests`, `pandas`, and optionally `json` or `BeautifulSoup` if needed.
#- Retrieve the job data from the RemoteOK API or web page.
#- Parse the JSON or HTML response to extract the required fields.
#- Store the collected data in a structured format such as **CSV** for future analysis.

### 📦 Output:
#A CSV file (e.g., `remoteok_jobs.csv`) containing all extracted job listings with the specified fields.

#> ✅ This dataset will serve as the foundation for further data analysis and machine learning tasks in upcoming lab exercises.


In [6]:
# To run this script, you need to install the following libraries:
# pip install requests pandas

import requests
import pandas as pd

def scrape_remoteok_jobs():
    """
    Scrapes job listings from the RemoteOK API.
    Extracts company name, job role, location, and tags/features.
    """
    print("--- Starting RemoteOK Job Scraping ---")
    api_url = "https://remoteok.com/api"

    try:
        # Fetch JSON data from the API
        response = requests.get(api_url, headers={"User-Agent": "Mozilla/5.0"})
        response.raise_for_status()

        jobs_data = response.json()

        # The first element is metadata, so skip it
        jobs_list = []

        for idx, job in enumerate(jobs_data[1:], start=1):
            job_entry = {
                "Company Name": job.get("company", "N/A"),
                "Job Role": job.get("position", "N/A"),
                "Location": job.get("location", "Worldwide"),
                "Tags/Features": ", ".join(job.get("tags", []))
            }
            jobs_list.append(job_entry)

            # Print to console
            print(f"\nJob #{idx}")
            print(f"Company Name : {job_entry['Company Name']}")
            print(f"Job Role     : {job_entry['Job Role']}")
            print(f"Location     : {job_entry['Location']}")
            print(f"Tags/Features: {job_entry['Tags/Features']}")

        print(f"\n Successfully scraped {len(jobs_list)} job postings.")
        return jobs_list

    except requests.exceptions.RequestException as e:
        print(f" Error fetching data from RemoteOK API: {e}")
        return None
    except Exception as e:
        print(f" Error processing job data: {e}")
        return None

def save_to_csv(data, filename):
    """
    Saves scraped job data to a CSV file.
    """
    if not data:
        print(f"No data to save to {filename}. Skipping.")
        return

    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False, encoding='utf-8')
        print(f"\n Data successfully saved to {filename}")
    except IOError as e:
        print(f" Error saving to CSV: {e}")

# --- Main Execution ---
if __name__ == "__main__":
    jobs_data = scrape_remoteok_jobs()

    if jobs_data:
        save_to_csv(jobs_data, "remoteok_jobs.csv")

    print("\n--- Process Complete ---")


--- Starting RemoteOK Job Scraping ---

Job #1
Company Name : Bonfire Studios
Job Role     : Character Concept Artist
Location     : United States
Tags/Features: 3d, support, recruiting

Job #2
Company Name : Coins.ph
Job Role     : Strategy Manager
Location     : Taguig City, Metro Manila, Philippines
Tags/Features: manager, web3, crypto, cryptocurrencies, support, growth, financial, finance, mobile, strategy, management, operational

Job #3
Company Name : Aragon
Job Role     : Content &amp; Social Lead
Location     : Remote - Anywhere
Tags/Features: web3, full-stack, technical, support, software, growth, manager, seo, strategy, management, lead, content, marketing

Job #4
Company Name : The Biz Conquest
Job Role     : Mobile Developer
Location     : 
Tags/Features: dev, engineer, mobiledev, react native

Job #5
Company Name : Virta Health
Job Role     : Backend Software Engineer Client Group
Location     : 
Tags/Features: software, frontend, security, training, full-stack, technical,