# Webscraping in Python

### Case Study: GoMarket Online Shopping Store

In [182]:
import requests
from bs4 import BeautifulSoup
import csv

## Functions

In [187]:
def get_title(card):
    '''Function to return all product titles'''
    
    if card.find('h2', class_='woo-loop-product__title') is None:
        title = ''
    else:
        title = card.find('h2', class_='woo-loop-product__title').text
        
    return title


def get_price(card):
    '''Function to return product prices'''
    
    if card.find('span', class_='price') is None:
        price = ''
    else:
        price = card.find('span', class_='price').text
        
    return price

def get_description(card):
    '''Function to return product description for all products'''
        
    if card.find('div', class_= 'woocommerce-product-details__short-description') is None:
        description = ''
    else:
        description = card.find('div', class_= 'woocommerce-product-details__short-description').text.strip()
        
    return description


def get_image(card):
    '''Function to return all product images'''
    
    image = card.find_all('img', class_='attachment-woocommerce_thumbnail')
        
    for index, img in enumerate(image):
        if image is None:
            image_url = ''
            
        image_url = img['src']
        return image_url


def make_request(url):
    '''Makes request to a url and returns a response content'''
    
    try:
        result = requests.get(url)  
        
        if result.status_code == 200:
            print('Request successful')
        else :
            print('Request unsuccessful')
    except Exception as e:
        print('An error occured')    
        
    return result.content

def dict_to_csv(data):
    '''Function to convert data into a csv file'''
    
    with open('groceries.csv', mode='w', newline='', encoding='utf-8') as csv_file:
        HEADERS = ['title', 'price', 'description', 'image']
        
        # write header
        csv_writer = csv.DictWriter(csv_file, fieldnames=HEADERS)
        csv_writer.writeheader()
        
        # write rows
        csv_writer.writerows(data)
    

### Data Schema

In [194]:
groceries = list()

if __name__ == '__main__':
    for i in range(1, 6):
        # make request and store content in a variable
        content = make_request(f'https://www.gomarket.com.ng/product-category/groceries/page/{i}')
        
        # soup object
        soup = BeautifulSoup(content, 'html.parser')
        cards = soup.find('ul', class_='products columns-4').find_all('li')
        
        for card in cards:
            grocery = {
                'title': get_title(card),
                'price': get_price(card),
                'description': get_description(card),
                'image': get_image(card),
            }
            groceries.append(grocery)
            
        dict_to_csv(groceries)

Request successful
Request successful
Request successful
Request successful
Request successful


In [190]:
groceries

[{'title': 'Nosak Famili Vegetable Oil 2.5L -×6(in a cartoon)',
  'price': '₦44,000.00₦49,500.00',
  'description': 'Nosak Famili Vegetable Oil is 100% pure vegetable oil is a healthy alternative to adulterated oil. It has no additives and is cholesterol free. It’s hard to find unadulterated oil these days.',
  'image': "data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%20220%20220'%3E%3C/svg%3E"},
 {'title': 'Nosak Famili Quality Palm Oil 2.5L -×6 (in a cartoon) cartoon)',
  'price': '₦38,500.00₦49,500.00',
  'description': 'Nosak Famili Palm Oil is your perfect choice for healthy cooking. It does not have any pungent smell, doesn’t stick to the tongue as noticed with adulterated oil and also gives your food an amazing flavor.\nREAD MORE',
  'image': "data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%20220%20220'%3E%3C/svg%3E"},
 {'title': 'Nosak Famili Vegetable Oil 2.5l',
  'price': '₦10,450.00',
  'description': 'Nosak Fam