# Import Library and Function.

Import the function that compiled the web scraping process from the notebook before, from the src folder.

In [None]:
import sys
src_path = "/home/er_bim/Indonesia_COE/src/"
sys.path.insert(0, src_path)

from website_scraping import COE_scrape

# Extract The Indonesia COE data from the website.

Extract and save the tables as csv to the local directory.

In [None]:
# Year 2021 data
COE_scrape('indonesia', '2021')

# Year 2022 data
COE_scrape('indonesia', '2022')

# Year 2023 data
COE_scrape('indonesia', '2023')

Unnamed: 0,Ranking,Score,Farm,Weight (lb),Price per lb,Total Price,Buyer
0,1A,91.41,Ijen Lestari,396.83,$45.50,"$18,055.77","Orsir International Co., Ltd."
1,1B,91.41,Ijen Lestari,391.19,$43.60,"$17,055.88",SAVAYA Coffee Market – USA WAVY Coffee Roaster...
2,2A,90.35,Wajamala,330.69,$30.00,"$9,920.70","Coffee Me Up (Korea), Slick Coffee Co., Coffee..."
3,2B,90.35,Wajamala,295.11,$30.50,"$9,000.86","BIG BLACK BOX (Thailand), Casa Lapin, Koff&Bun..."
4,3A,90.06,Angin-angin,298.51,$26.25,"$7,835.89","BLACK GOLD COFFEE (TAIWAN), COFFEE PARTNER INT..."
5,3B,90.06,Angin-angin,264.55,$27.30,"$7,222.22",CHG（手多多咖啡）
6,4,90.06,Avarata Coffee,582.50,$10.00,"$5,825.00",Coffee Beyond Borders
7,5,89.47,Central Sumatera Coffee,773.16,$8.60,"$6,649.18",List & Beisler LA BOHEME
8,6,89.35,Wanoja Coffee Plot Hj. Eti Sumiati,568.48,$10.37,"$5,895.14",SAVAYA COFFEE MARKET – USA LaB Coffee & Roasters
9,7,89.21,Atang Jungket,581.18,$15.80,"$9,182.64",Coffee Beyond Borders


# Extract The Ethiopia and Thailand data.

Modify the function to separate the extracted data location from Indonesia COE data.

In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
PATH = '/home/er_bim/Indonesia_COE/data/other/'


def COE_scrape(c: str, 
               y: str, 
               save_file: bool = True,
               return_file: bool = True) -> pd.DataFrame:
    """This function extract a specific country and year of Cup of Excellence (COE) auctions data
    from its official website. The website's homepage URL is defined as the parent_url variable,
    while the specific country and year of auction are specified as c and y arguments rescpectively.
    The function will return the COE Auction Result table.
    The function will also save the COE Competition Result and COE Auction Result tables, and store it
    in the local, the folder directory path is specified outside the function.
    
    Args:
        c (str): Define a country listed in Cup of Excellence
        y (str): Define the year period of Coffee of Excellence award 
        save_file (bool, optional): The tables saved as csv. Defaults to True.
        return_file (bool, optional): Defaults to True.

    Returns:
        pd.DataFrame: The Cup of Excellence auction result table.
    """
    parent_url = 'https://allianceforcoffeeexcellence.org/'
    url = parent_url+c+'-'+y
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'lxml')
    tabs = soup.select('ul.vc_tta-tabs-list')[0]
    tabs_links = tabs.find_all('a')
    relative_links = [t.get("href") for t in tabs_links]
    table_urls = [url + '/' + l for l in relative_links]
    tables_Response = requests.get(table_urls[0])
    tables = pd.read_html(tables_Response.text, header = 0)
    competition_results = tables[0]
    auction_results = tables[1]
    
    if save_file:  
        competition_results.to_csv(PATH+c+'_'+y+'_'+'coffee.csv') 
        auction_results.to_csv(PATH+c+'_'+y+'_'+'auction.csv')  
    
    if return_file:
        return auction_results

In [11]:
COE_scrape('ethiopia', '2022')

Unnamed: 0,RANK,FARM,SCORE,Weight (lbs),Bid ($/lb),Total Value ($),Winner
0,1a,Legese,90.69,529.1,$ 400.50,"$ 211,904.55",LNK Coffee Trading DMCC for Lebunna弘顺
1,1b,Legese,90.69,529.1,$ 400.00,"$ 211,640.00",SAZA COFFEE ( Japan ) & Orsir Coffee (Taiwan)
2,2,Kenean,90.25,363.8,$ 189.10,"$ 68,794.58",Sulalat Coffee Trading
3,3a,A/ Temam,90.19,1190.0,$ 53.11,"$ 63,200.90","Simple Kaffa The Coffe One, Kakalove (Taiwan),..."
4,3b,A/ Temam,90.19,1124.0,$ 55.60,"$ 62,494.40",KALDI COFFEE FARM
5,4,Bukato,90.17,1190.0,$ 90.00,"$ 107,100.00",Sulalat Coffee Trading
6,5,Samuel,90.08,628.3,$ 58.50,"$ 36,755.55",SARUTAHIKO COFFEE
7,6,Tilahune,89.92,1918.0,$ 30.70,"$ 58,882.60",Kyokuto Fadie Corporation
8,7,Wolde,89.78,529.1,$ 51.99,"$ 27,507.91",Blue Bottle Coffee
9,8,Muluneh,89.56,1422.0,$ 26.70,"$ 37,967.40",BEAN SUPER豆超國際 / 百啡有光 / 云境農莊 / 麟閣咖啡 / 樂波波珈琲 / ...


In [12]:
COE_scrape('thailand', '2023')

Unnamed: 0,Rank,Score,Farm,Weight (lb),High Bid,Total Price,Company name
0,1a,91.13,Coffee De Hmong Biofarm,79.76,$69.50,"$5,543.32","YN coffee, Vela Ethan International Co.,Ltd, C..."
1,1b,91.13,Coffee De Hmong Biofarm,66.14,$73.30,"$4,848.06",CHG（手多多咖啡）、Tasty&Oracle（塔苏瑞）、BEAN SUPER豆超國際 – ...
2,2,89.5,Phala Akha Coffee Farm,153.71,$25.30,"$3,888.86",Kyokuto Fadie Corporation – Japan
3,3,89.13,Soft Coffee Farm,177.08,$45.00,"$7,968.60",Cuppingpost – South Korea
4,4,88.93,Cherpong village,153.18,$18.80,"$2,879.78",The Coffee Tree Roasters – United States of Am...
5,5,88.8,Amazingcoffee at Mae Chan Tai,154.94,$17.30,"$2,680.46",Kizuna Roaster & Co. – Hong Kong
6,6,88.8,Doi Pakgud coffee,170.51,$15.10,"$2,574.70",Full Sweet Coffee – Taiwan
7,7,88.73,DoiTung,165.08,$25.60,"$4,226.05","KALDI COFFEE FARM (CAMEL COFFEE CO.,LTD.) – Japan"
8,8,88.63,Ma Family Coffee Farm,158.42,$15.20,"$2,407.98",Ryans Coffee Roasters – South Korea
9,9,88.6,Doi Nan,167.73,$12.10,"$2,029.53",Mercanta The Coffee Hunters for Alchemist Sing...
