In [4]:
import requests, openpyxl
from bs4 import BeautifulSoup
from os import path

def get_one_page(url):
    '''
    param url: A string that stores the url
    returns: A string that stores the decoded content of the web page. None if the request for this web page failed or a RequestException occured
    '''
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        # Change the encoding used by Request to utf-8
        response.encoding = 'utf-8'
        # Check whether the request for this web page succeded
        if response.status_code == requests.codes.ok:
            return response.text
        else:
            return None
    except RequestException:
        return None

def parse_homepage(res_text):
    """
    param res_text: A string that stores the decoded content of a response
    returns: A generator that generates a dictionary containing the school's name and its corresponding date and url
    """
    soup = BeautifulSoup(res_text, features='html5lib')
    cells = soup.select('.willnum-body > table > tbody > tr > td')
    for i in range(3, len(cells), 3):
        try:
            name = cells[i].string
            date = cells[i+1].string
            url = cells[i+2].find('a')['href']

            yield{
                'name': name,
                'date': date,
                'url': url
                }
        except TypeError:
            pass

def write_to_excel(school_generator):
    """
    param school_generator: A generator that generates a dictionary containing the school's name and its corresponding date and url
    """
    wb = openpyxl.Workbook()
    sheet = wb.active
    sheet.title = '报考简章'
    sheet.cell(row=1, column=1).value = '高校名单'
    sheet.cell(row=1, column=2).value = '报名时间'
    sheet.cell(row=1, column=3).value = '招生简章'
    row = 2
    for school in school_generator:
        sheet.cell(row=row, column=1).value = school['name']
        sheet.cell(row=row, column=2).value = school['date']
        sheet.cell(row=row, column=3).value = school['url']
        row += 1
        
    wb.save('浙江省2019年三位一体招生信息.xlsx')
    print('''The file'浙江省三位一体招生信息.xlsx'has been saved successfully''')


if __name__ == '__main__':
    url = 'http://www.eol.cn/html/g/zjswyt/'
    res_text = get_one_page(url)
    school_generator = parse_homepage(res_text)
    write_to_excel(school_generator)


The file'浙江省三位一体招生信息.xlsx'has been saved successfully
