## Data Scrapping

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# custom headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.9999.999 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.9",
}

# Function to scrape data for a specific crop
def scrape_data(product_name, urls):
    cleaned_data = []

    for url in urls:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")
            table = soup.find("table")
            if table:
                data = [[product_name] + [cell.get_text(strip=True) for cell in row.find_all("td")][4:10] for row in table.find_all("tr") if len(row.find_all("td")) >= 10]
                cleaned_data.extend(data)

    return pd.DataFrame(cleaned_data, columns=["Product", "Market Location", "Wholesale Price", "Retail Price", "Supply Volume", "County", "Date"])

# URLs for different crops
urls = {
    "Beans": [
        f"https://amis.co.ke/site/market/{i}?product=30&per_page=3000" for i in range(0, 33001, 3000)
    ],
    "Dry Maize": [
        f"https://amis.co.ke/site/market/{i}?product=1&per_page=3000" for i in range(0, 33001, 3000)
    ],
    "Rice": [
        f"https://amis.co.ke/site/market/{i}?product=4&per_page=3000" for i in range(0, 33001, 3000)
    ],
    "Onion": [
        f"https://amis.co.ke/site/market/{i}?product=158&per_page=3000" for i in range(0, 33001, 3000)
    ]
}

# Scrape data for each crop
combined_data = pd.concat([scrape_data(product, urls[product]) for product in urls.keys()], ignore_index=True)

# Display the combined DataFrame
print(combined_data)


      Product  Market Location Wholesale Price Retail Price Supply Volume  \
0       Beans          Gikomba       150.00/Kg    210.00/Kg                 
1       Beans           Mabera       158.33/Kg    187.50/Kg       6000.00   
2       Beans            Kutus       130.00/Kg    150.00/Kg          0.00   
3       Beans             Molo               -    250.00/Kg         70.00   
4       Beans          Mwatate       170.00/Kg    180.00/Kg                 
...       ...              ...             ...          ...           ...   
87861   Onion           Kibuye        21.54/Kg            -                 
87862   Onion      Wath Ong'er        30.77/Kg            -                 
87863   Onion         Karatina        20.00/Kg            -                 
87864   Onion  Nakuru Wakulima        26.92/Kg            -                 
87865   Onion         Kongowea        32.31/Kg            -                 

             County        Date  
0           Nairobi  2024-04-25  
1      

In [5]:
print(combined_data['Product'].unique())


['Beans' 'Dry Maize' 'Rice' 'Onion']


: 