In [0]:
# TODO
# 1. Make a request to the necessary page and get a page
# 2. Collect data from each detailed page
# 3. Collect all links to detail pages of each product
# 4. Write scrapped data to a csv file


from bs4 import BeautifulSoup
import requests
import csv
import lxml

# We need a function that will make requests to the necessary site. And let's define a new function and name it
# get_page() that will take URL-address as an argument. And the get_page() function will make requests with the
# Requests library, that we have installed.


def get_page(url):                                     # requesting page from url source
    response = requests.get(url)


    if not response.ok:
        print('Server responded: ', response.status_code)
    else:
        soup = BeautifulSoup(response.text, 'lxml')
    return soup


# That will contain a response of the site server. The 'response' variable is equal to the call of 'requests.get()'
# method. And the '.get()' method takes the url variables as an argument.


# So I'm creating a new function main() with the pass for a while..
# the main function() will play the role of a hub, that will manage the calls of other functions and we'll
# collect scraped data.


def get_detailed_data(soup):
    # title
    # price
    # items sold
    try:

        title = soup.find('h1', id='itemTitle').find('a').get('data-mtdes')
    except:
        title = ''                                                          # getting titles with the id
    try:
        try:
            p = soup.find('span', id='prcIsum').text.strip().split(' ')     # original product id
        except:
            p = soup.find('span', id='mm-saleDscPrc').text.strip()         # discount product have a different id
        currency, price = p.split(' ')            # splitting the currency and price
    except:
        currency = ''
        price = ''

    try:
        sold = soup.find('span', class_='vi-qtysw-hot-red').find('a').text.strip().split(' ')[0].replace('\xa0', '')  # getting sold number and cleaning
    except:
        sold = ''

    data = {
        'title': title,         # parsing the data library
        'price': price,
        'currency': currency,
        'total sold': sold
    }
    return data


def get_index_data(soup):              # to get the pure link from colleted page
    try:
        links = soup.find_all('a', class_='s-item__link')

    except:
        links = []
    urls = [item.get('href') for item in links]
    return urls


def write_csv(data, url):                                # writing data to csv file
    with open('output.csv', 'a') as csvfile:
        writer = csv.writer(csvfile)

        row = [data['title'], data['price'], data['currency'], data['total sold'], url]
        writer.writerow(row)


def main():
    url = 'https://www.ebay.com/sch/i.html?_nkw=mens+watches&_pgn=1'

    products = get_index_data(get_page(url))

    for link in products:
        data = get_detailed_data(get_page(link))
        write_csv(data, link)


# I suggest that we will scrape the needed data first and then we will add functionality to scrape all links to
# inner pages. So we have a detailed page URL, that we have pass in to the 'get_page()' function and let's call the
# get_page() function and pass in to it the URL variable.


if __name__ == '__main__':
    main()
# The IF condion checks whether the file 'Scraping(E-bay based)' was run directly from console or not.
# If the file is running from the console its '__name__' attribute will be equal to '__main__' but if the file will be
# imported to another script its '__name__' attribute will contain the name of the file 'Stock_Data_Scrapping'...
# In this cae its name attribute will be equal to the 'Stock_Data_Scrapping'. And if this block returns "True" then the 'main()' function will be called.
