Pada assignment ini dilakukan web scraping pada 2 website berbeda:
1. https://harga-emas.org/ (mengambil data harga emas)
2. https://cottonink.co.id/collections/women (mengambil data produk fashion)

Scraping dilakukan menggunakan modul `requests` untuk mendapatkan konten halaman web dan `BeautifulSoup` untuk parsing HTML.

# Import Libraries and Fetch with Error Handling

In [78]:
from bs4 import BeautifulSoup
from io import StringIO
import requests
import pandas as pd

In [79]:
# Fungsi untuk mengirimkan request dan memeriksa status kode 200
def fetch_page(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Memeriksa apakah response memiliki status kode 200
        return response
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP error occurred while fetching {url}: {errh}")
    except requests.exceptions.ConnectionError as errc:
        print(f"Connection error occurred while fetching {url}: {errc}")
    except requests.exceptions.Timeout as errt:
        print(f"Timeout error occurred while fetching {url}: {errt}")
    except requests.exceptions.RequestException as err:
        print(f"An error occurred while fetching {url}: {err}")
    return None

# Harga Emas

In [80]:
# Web Scraping Website Harga Emas
url_emas = 'https://harga-emas.org/'
response_emas = fetch_page(url_emas)

In [81]:
if response_emas:
    try:
        soup_emas = BeautifulSoup(response_emas.content, 'html.parser')
        table = soup_emas.find_all("table", {"class": "in_table"})[2]
        df_emas = pd.read_html(StringIO(str(table)))[0]
    except Exception as e:
        print(f"Error occurred while scraping {url_emas}: {e}")
else:
    print(f"Failed to retrieve data from {url_emas}")

In [82]:
df_emas

Unnamed: 0,0,1,2,3,4
0,Harga Emas Hari Ini,Harga Emas Hari Ini,Harga Emas Hari Ini,Harga Emas Hari Ini,Harga Emas Hari Ini
1,Gram,Gedung Antam Jakarta,Gedung Antam Jakarta,Pegadaian,Pegadaian
2,Gram,per Gram (Rp),per Batangan (Rp),per Gram (Rp),per Batangan (Rp)
3,1000,1.454 (-30),1.453.600 (-30.000),1.043.040 (+8.200),1.043.040.000 (+8.200.000)
4,500,2.907 (-60),1.453.640 (-30.000),1.043.082 (+8.200),521.541.000 (+4.100.000)
5,250,5.816 (-120),1.454.060 (-30.000),1.043.512 (+8.200),260.878.000 (+2.050.000)
6,100,14.551 (-300),1.455.120 (-30.000),1.044.600 (+8.200),104.460.000 (+820.000)
7,50,29.118 (-600),1.455.900 (-30.000),1.045.400 (+8.200),52.270.000 (+410.000)
8,25,58.299 (-1.200),1.457.480 (-30.000),1.047.040 (+8.200),26.176.000 (+205.000)
9,10,146.250 (-3.000),1.462.500 (-30.000),1.052.200 (+8.200),10.522.000 (+82.000)


# Cotton Ink

In [83]:
# Web Scraping Website Produk Fashion (Cotton Ink)
url_fashion = 'https://cottonink.co.id/collections/women'
response_fashion = fetch_page(url_fashion)

In [84]:
if response_fashion:  # Pastikan response tidak None
    try:
        soup_fashion = BeautifulSoup(response_fashion.content, 'html.parser')
        product_items = soup_fashion.find_all('div', class_='ProductItem__Info ProductItem__Info--center')

        # Menyimpan data produk ke dalam list
        results_fashion = []
        for item in product_items:
            data = {
                'product_name': item.find('a').text.strip(),
                'price': item.find('span').text.strip(),
                'url': item.find('a')['href']
            }
            results_fashion.append(data)
        df_fashion = pd.DataFrame(results_fashion)

    except Exception as e:
        print(f"Error occurred while scraping {url_fashion}: {e}")
else:
    print(f"Failed to retrieve data from {url_fashion}")

In [85]:
df_fashion

Unnamed: 0,product_name,price,url
0,Off-white Embroidered Socia,Rp 359.000,/collections/women/products/off-white-embroide...
1,Navy Embroidered Elusi,Rp 329.000,/collections/women/products/navy-embroidered-e...
2,Off-white Striped Durov,Rp 279.000,/collections/women/products/off-white-striped-...
3,Black Bow Attal,Rp 319.000,/collections/women/products/black-bow-attal
4,Blue Bow Anja,Rp 409.000,/collections/women/products/blue-bow-anja
...,...,...,...
495,Off-white Striped Alizi,Rp 249.000,/collections/women/products/off-white-striped-...
496,Beige Elite,Rp 279.000,/collections/women/products/beige-elite
497,Brown Lotan,Rp 369.000,/collections/women/products/brown-lotan
498,Navy Sevyn,Rp 369.000,/collections/women/products/navy-sevyn


# Save to Drive

In [89]:
from google.colab import drive
drive.mount('drive')

Drive already mounted at drive; to attempt to forcibly remount, call drive.mount("drive", force_remount=True).


In [90]:
df_emas.to_csv('/content/drive/My Drive/dibimbing DE8/Harga Emas.csv', index=False)
df_fashion.to_csv('/content/drive/My Drive/dibimbing DE8/Cotton Ink.csv', index=False)
print("Data berhasil diekspor")