## 4.1 객체 계획 및 정의

## 4.2 다양한 웹사이트 레이아웃 다루기

In [1]:
import requests
from bs4 import BeautifulSoup

class Content:
    def __init__(self, url, title, body):
        self.url = url
        self.title = title
        self.body = body


def getPage(url):
    req = requests.get(url)
    return BeautifulSoup(req.text, 'html.parser')


def scrapeNYTimes(url):
    bs = getPage(url)
    title = bs.find('h1').text
    lines = bs.select('div.StoryBodyCompanionColumn div p')
    body = '\n'.join([line.text for line in lines])
    return Content(url, title, body)

def scrapeBrookings(url):
    bs = getPage(url)
    title = bs.find('h1').text
    body = bs.find('div', {'class', 'post-body'}).text
    return Content(url, title, body)

url = 'https://www.brookings.edu/blog/future-development/2018/01/26/delivering-inclusive-urban-access-3-uncomfortable-truths/'
content = scrapeBrookings(url)
print(f'Title: {content.title}')
print(f'URL: {content.url}\n')
print(content.body)

url = '''
https://www.nytimes.com/2018/01/25/opinion/sunday/silicon-valley-immortality.html
'''

content = scrapeNYTimes(url)
print(f'Title: {content.title}')
print(f'URL: {content.url}\n')
print(content.body)

Title: Delivering inclusive urban access: 3 uncomfortable truths
URL: https://www.brookings.edu/blog/future-development/2018/01/26/delivering-inclusive-urban-access-3-uncomfortable-truths/


The past few decades have been filled with a deep optimism about the role of cities and suburbs across the world. These engines of economic growth host a majority of world population, are major drivers of economic innovation, and have created pathways to opportunities for untold amounts of people.	






Jeffrey Gutman

					Former Nonresident Fellow, Global Economy and Development										







Adie Tomer

					Senior Fellow - Brookings Metro 

 Twitter
AdieTomer






But all is not well within our so-called Urban Century. Rapid urbanization, rising gentrification, concentrated poverty, and shortages of basic infrastructure have combined to create spatial inequity in cities and suburbs across the globe. The challenges of housing, moving, and employing so many people have led to longer travel t

In [2]:
class Content:
    """
    글/페이지 전체에 사용할 기반 클래스
    """
    
    def __init__(self, url, title, body):
        self.url = url
        self.title = title
        self.body = body
        
    def print(self):
        """
        출력 결과를 원하는 대로 바꿀 수 있는 함수
        """
        print(f'URL: {self.url}')
        print(f'TITLE: {self.title}')
        print(f'BODY:\n{self.body}')
        
        
class Website:
    """
    웹사이트 구조에 관한 정보를 저장할 클래스
    """
    
    def __init__(self, name, url, titleTag, bodyTag):
        self.name = name
        self.url = url
        self.titleTag = titleTag
        self.bodyTag = bodyTag

In [3]:
import requests
from bs4 import BeautifulSoup

class Crawler:
    def getPage(self, url):
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException:
            return None
        return BeautifulSoup(req.text, 'html.parser')
    
    def safeGet(self, pageObj, selector):
        """
        BeautifulSoup 객체와 선택자를 받아 콘텐츠 문자열을 추출하는 함수
        주어진 선택자로 검색된 결과가 없다면 빈 문자열을 반환합니다.
        """
        
        selectedElems = pageObj.select(selector)
        if selectedElems is not None and len(selectedElems) > 0:
            return '\n'.join([elem.get_text() for elem in selectedElems])
        return ''
    
    def parse(self, site, url):
        """
        URL을 받아 콘텐츠를 추출합니다.
        """
        
        bs = self.getPage(url)
        if bs is not None:
            title = self.safeGet(bs, site.titleTag)
            body = self.safeGet(bs, site.bodyTag)
            if title != '' and body != '':
                content = Content(url, title, body)
                content.print()

In [4]:
# 객체 정의 후 프로세스 시작 코드

crawler = Crawler()

siteData = [
    ['0\'Reilly Media', 'http://oreilly.com', 'h1', 'section#product-description'],
    ['Reuters', 'http://reuters.com', 'h1', 'div.StandardArticleBody_body_1gnLA'],
    ['Brookings', 'http://www.brooking.edu', 'h1', 'div.post-body']
]

websites = []
urls = [
    'http://shop.oreilly.com/product/0636920028154.do',
    'http://www.reuters.com/article/us-usa-epa-pruitt-idUSKBN19W2D0',
    'https://www.brookings.edu/blog/techtank/2016/03/01/idea-to-retire-old-methods-of-policy-education/'
]

for row in siteData:
    websites.append(Website(row[0], row[1], row[2], row[3]))
    
crawler.parse(websites[0], urls[0])
crawler.parse(websites[1], urls[1])
crawler.parse(websites[2], urls[2])

URL: https://www.brookings.edu/blog/techtank/2016/03/01/idea-to-retire-old-methods-of-policy-education/
TITLE: Idea to Retire: Old methods of policy education
Idea to Retire: Old methods of policy education
BODY:

Public policy and public affairs schools aim to train competent creators and implementers of government policy. While drawing on the principles that gird our economic and political systems to provide a well-rounded education, like law schools and business schools, policy schools provide professional training. They are quite distinct from graduate programs in political science or economics which aim to train the next generation of academics. As professional training programs, they add value by imparting both the skills which are relevant to current employers, and skills which we know will be relevant as organizations and societies evolve. 
The relevance of the skills that policy programs impart to address problems of today and tomorrow bears further discussion. We are living t

## 4.3 크롤러 구성

#### 4.3.1 검색을 통한 사이트 크롤링

In [5]:
class Content:
    """
    글/페이지 전체에 사용할 기반 클래스
    """
    
    def __init__(self, topic, url, title, body):
        self.topic = topic
        self.url = url
        self.title = title
        self.body = body
        
    def print(self):
        """
        출력 결과를 원하는 대로 바꿀 수 있는 함수
        """
        print(f'New article found for topic: {self.topic}')
        print(f'URL: {self.url}')
        print(f'TITLE: {self.title}')
        print(f'BODY:\n{self.body}')

In [6]:
       
class Website:
    """
    웹사이트 구조에 관한 정보를 저장할 클래스
    """
    
    def __init__(self, name, url, searchUrl, resultListing, resultUrl, absoluteUrl, titleTag, bodyTag):
        self.name = name
        self.url = url
        self.searchUrl = searchUrl
        self.resultListing = resultListing
        self.resultUrl = resultUrl
        self.absoluteUrl = absoluteUrl
        self.titleTag = titleTag
        self.bodyTag = bodyTag

In [7]:
import requests
from bs4 import BeautifulSoup

class Crawler:

    def getPage(self, url):
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException:
            return None
        return BeautifulSoup(req.text, 'html.parser')

    def safeGet(self, pageObj, selector):
        childObj = pageObj.select(selector)
        if childObj is not None and len(childObj) > 0:
            return childObj[0].get_text()
        return ''
    
    def search(self, topic, site):
        """
        주어진 검색어로 주어진 웹사이트를 검색해 결과 페이지를 모두 기록합니다.
        """
        bs = self.getPage(site.searchUrl + topic)
        searchResults = bs.select(site.resultListing)
        for result in searchResults:
            url = result.select(site.resultUrl)[0].attrs['href']
            # 상대 URL인지 절대 URL인지 확인합니다.
            if(site.absoluteUrl):
                bs = self.getPage(url)
            else:
                bs = self.getPage(site.url + url)
            if bs is None:
                print('Something was wrong with that page or URL. Skipping!')
                return
            title = self.safeGet(bs, site.titleTag)
            body = self.safeGet(bs, site.bodyTag)
            if title != '' and body != '':
                content = Content(topic, title, body, url)
                content.print()
                
                
crawler = Crawler()

siteData = [
    ['O\'Reilly Media', 'http://oreilly.com', 'https://ssearch.oreilly.com/?q=',
        'article.product-result', 'p.title a', True, 'h1', 'section#product-description'],
    ['Reuters', 'http://reuters.com', 'http://www.reuters.com/search/news?blob=', 'div.search-result-content',
        'h3.search-result-title a', False, 'h1', 'div.StandardArticleBody_body_1gnLA'],
    ['Brookings', 'http://www.brookings.edu', 'https://www.brookings.edu/search/?s=',
        'div.list-content article', 'h4.title a', True, 'h1', 'div.post-body']]

sites = []
for row in siteData:
    sites.append(Website(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]))
    
topics= ['python', 'data science']
for topic in topics:
    print('GETTING INFO ABOUT: ' + topic)
    for targetSite in sites:
        crawler.search(topic, targetSite)

GETTING INFO ABOUT: python
New article found for topic: python
URL: How open-source software shapes AI policy
TITLE: 
Open-source software quietly affects nearly every issue in AI policy, but it is largely absent from discussions around AI policy—policymakers need to more actively consider OSS’s role in AI.
Open-source software (OSS), software that is free to access, use, and change without restrictions, plays a central role in the development and use of artificial intelligence (AI). Across open-source programming languages such as Python, R, C++, Java, Scala, Javascript, Julia, and others, there are thousands of implementations of machine learning algorithms. OSS frameworks for machine learning, including tidymodels in R and Scikit-learn in Python, have helped consolidate many diverse algorithms into a consistent machine learning process and enabled far easier use for the everyday data scientist. There are also OSS tools specific to the especially important subfield of deep learning, 

New article found for topic: python
URL: Preventing pandemics through biodiversity conservation and smart wildlife trade regulation
TITLE: 

Contents 

Summary
Challenge
Limits of historic and existing policies
Policy recommendations
Conclusion


Summary







Vanda Felbab-Brown

					Director - Initiative on Nonstate Armed Actors 

					Co-Director - Africa Security Initiative 

					Senior Fellow - Foreign Policy, Center for Security, Strategy, and Technology 

 Twitter
VFelbabBrown





 @import url(https://c24215cec6c97b637db6-9c0895f07c3474f6636f95b6bf3db172.ssl.cf1.rackcdn.com/interactives/2021/blueprints-printer-paper/app.css); 
The global public health and economic devastation caused by the coronavirus (COVID-19) outbreak dramatically reinforces the urgent imperative to minimize the chances of another zoonotic pandemic. Reducing the likelihood of another viral spillover sweeping the world requires a fundamental change in how we interact with nature. It requires minimizing huma

New article found for topic: python
URL: Leveraging the disruptive power of artificial intelligence for fairer opportunities
TITLE: 
According to President Obama’s Council of Economic Advisers (CEA), approximately 3.1 million jobs will be rendered obsolete or permanently altered as a consequence of artificial intelligence technologies. Artificial intelligence (AI) will, for the foreseeable future, have a significant disruptive impact on jobs. That said, this disruption can create new opportunities if policymakers choose to harness them—including some with the potential to help address long-standing social inequities. Investing in quality training programs that deliver premium skills, such as computational analysis and cognitive thinking, provides a real opportunity to leverage AI’s disruptive power.







Makada Henry-Nickie

					Robert and Virginia Hartley Fellow - Governance Studies 

 Twitter
mhnickie





AI’s disruption presents a clear challenge: competition to traditional skil

New article found for topic: python
URL: The Silicon Valley Wage Premium
TITLE: 


Software application developers earn large salaries in the United States, $96,260 a year on average. But in metropolitan San Jose they earn $131,270, the highest in the country. There are many partial explanations for this—local cost of living, differences in education levels, experience, and industry—but none of them quite account for it. It turns out that developers living in San Jose have acquired the specific skills most valued by employers.
 
As the map below shows, there is a huge amount of variation in earnings for software application developers across regional labor markets. In large metropolitan areas like New York, they earn $105,000, but in Louisville, they earn just $72,000.
 

Average Salary of Software Application Developers by Metropolitan Area, 2013

 


 
Similar patterns could be shown for other occupations, of course; for even within the same job title, people vary by education and ex

New article found for topic: python
URL: Idea to Retire: Old methods of policy education
TITLE: 
Public policy and public affairs schools aim to train competent creators and implementers of government policy. While drawing on the principles that gird our economic and political systems to provide a well-rounded education, like law schools and business schools, policy schools provide professional training. They are quite distinct from graduate programs in political science or economics which aim to train the next generation of academics. As professional training programs, they add value by imparting both the skills which are relevant to current employers, and skills which we know will be relevant as organizations and societies evolve. 
The relevance of the skills that policy programs impart to address problems of today and tomorrow bears further discussion. We are living through an era in which societies are increasingly interconnected. The wide-scale adoption of devices such as the smar

New article found for topic: python
URL: Skills, success, and why your choice of college matters
TITLE: 


Amidst growing frustration with the cost of higher education, complaints also abound about its quality. One critique, launched in the book Academically Adrift by two sociologists, finds little evidence that college students score better on measures of critical thinking, writing, and reasoning after attending college. This is something of a paradox, since strong evidence shows that attending college tends to raise earnings power, even for students who start with mediocre preparation. 
Our recent study uses a different approach to assess the value of a college education. We find that the particular skills listed by a college’s alumni on their resumes predict how well graduates from those schools perform in terms of earning a living, meeting debt obligations, and working for high-paying or innovative companies. Since jobs requiring more valuable skills typically require at least some

New article found for topic: python
URL: Making waves in India: Media and the COVID-19 pandemic
TITLE: 
Introduction
India has been struck hard by the second wave of the COVID-19 pandemic—daily cases and deaths peaked at more than 400,000 cases and 4,000 deaths, respectively, almost four to five times higher than the peak number of cases and deaths in the first wave.1 The second wave was largely attributed to complacency by the Indian government.2 As important as this may have been, it is crucial to examine the role of the media during the pandemic. In particular, what were the discussion topics on the eve of the second wave, and was COVID-19 a fading topic of discussion when the tragedy struck? In this paper, we answer this question and discuss how inadequate media coverage may have slowed India’s COVID-19 response.







Mudit Kapoor

					Associate Professor - Indian Statistical Institute 

 Twitter
muditkapoor








Shamika Ravi

					Nonresident Senior Fellow - Governance Studi

New article found for topic: python
URL: 
				BUILDING SKILLS FOR LIFE			
TITLE: 



 Menu
 





						Introduction					



						Why expand CS education?					





										Economic argument									



										Inequality argument									



										Beyond economics									





						Global advances					





										Progress across the globe									



										Key barriers and challenges									





						Lessons learned					



						Conclusion					



						Case studies					






		October 25, 2021	


This report makes the case for expanding computer science education in primary and secondary schools around the world, and outlines the key challenges standing in the way. Through analysis of regional and national education systems at various stages of progress in implementing computer science education programs, the report offers transferable lessons learned across a wide range of settings with the aim that all students—regardless of income level, race, or sex—can on

New article found for topic: python
URL: Inside the Pentagon’s Secret Afghan Spy Machine
TITLE: 
The Pentagon’s top researchers have rushed a classified and controversial intelligence program into Afghanistan. Known as “Nexus 7,” and previously undisclosed as a war-zone surveillance effort, it ties together everything from spy radars to fruit prices in order to glean clues about Afghan instability.
The program has been pushed hard by the leadership of the Defense Advanced Research Projects Agency (DARPA). They see Nexus 7 as both a breakthrough data-analysis tool and an opportunity to move beyond its traditional, long-range research role and into a more active wartime mission. 
But those efforts are drawing fire from some frontline intel operators who see Nexus 7 as little more than a glorified grad-school project, wasting tens of millions on duplicative technology that has nothing to do with stopping the Taliban. 
“There are no models and there are no algorithms,” says one person fami

New article found for topic: python
URL: Think Bigger on North Korea
TITLE: 
While the world is fixated on Iraq and the Middle East, North Korea continues to pose at least as great a threat to Western security interests. Six-party talks with the North Koreans in Beijing have just showed that the Bush administration hasn’t yet found a way out of the nuclear crisis. Although negotiations appear likely to resume in a couple of months, their prospects for success seem poor.
The basic dilemma is easy to understand. North Korea will not surrender its nuclear capabilities, which are among its only valuable national assets, unless offered a very good deal for giving them up. President Bush refuses to offer such a deal because he sees the North Korean demand as blackmail. He insists that before any talks about better diplomatic relations or economic interaction occur, North Korea first relinquish—with verification—a nuclear program it had pledged nine years ago to abandon completely. At most, B

New article found for topic: python
URL: Modeling with Data: Tools and Techniques for Scientific Computing
TITLE: 

PREFACE


Should you use the book? This book is intended to be a complement to the standard stats textbook, in three ways.
First, descriptive and inferential statistics are kept separate beginning with the first sentence of the first chapter. I believe that the fusing of the two is the number one cause of confusion among statistics students.
Once descriptive modeling is given its own space, and models do not necessarily have to be just preparation for a test, the options blossom. There are myriad ways to convert a subjective understanding of the world into a mathematical model, including simulations, models like the Bernoulli/Poisson distributions from traditional probability theory, ordinary least squares, and who knows what else.
If those options aren’t enough, simple models can be combined to form multilevel models to describe situations of arbitrary complexity. That i

New article found for topic: data science
URL: Investment in science and technology is key to an African economic boom
TITLE: 
The African continent represents 20 percent of the earth’s surface and is home to 1.3 billion people—likely reaching 2.53 billion people by 2050. It boasts 60 percent of the world’s arable lands, large swathes of forests, 30 percent of the world’s reserve of minerals, and the youngest population of any continent. Yet, despite these riches, it produces only 3 percent of global GDP, accounts for less than 3 percent of international trade (mainly primary commodities and natural resources), and shoulders 25 percent of the global disease burden. The picture is particularly bleak when it comes to research and innovation: Africa contributes just 2 percent of world research output, accounts for only 1.3 percent of research spending, and produces 0.1 percent of all patents.
How can a continent that has fueled the world’s industrial revolutions, that helped drive the dom

New article found for topic: data science
URL: How the National Science Foundation is taking on fairness in AI
TITLE: 
Most of the public discourse around artificial intelligence (AI) policy focuses on one of two perspectives: how the government can support AI innovation, and how the government can deter its harmful or negligent use. Yet there can also be a role for government in making it easier to use AI beneficially—in this niche, the National Science Foundation (NSF) has found a way to contribute. Through a grant-making program called Fairness in Artificial Intelligence (FAI), the NSF is providing $20 million in funding to researchers working on difficult ethical problems in AI. The program, a collaboration with Amazon, has now funded 21 projects in its first two years, with an open call for applications in its third and final year. This is an important endeavor, furthering a trend of federal support for the responsible advancement of technology, and the NSF should continue this im

New article found for topic: data science
URL: Measuring racism and discrimination in economic data
TITLE: 
Although researchers in economics are increasingly cognizant that race and ethnicity are key determinants of economic outcomes, credibly assessing potential causes and identifying solutions is often complicated by the lack of high-quality data. The typical economist’s work primarily focuses on proposing relationships and testing for causal mechanisms across a broad set of economic phenomena. The study of race and the consequences of race in market interactions have long been hampered by the relative lack of longitudinal data collected on relevant markers of discrimination, racism, and related long-term outcomes.







Randall Akee

					Nonresident Fellow - Economic Studies, Center on Children and Families, Future of the Middle Class Initiative 

					Former Brookings Rubenstein Fellow										

 Twitter
indigenalysis








Marcus Casey

					Nonresident Fellow - Economic Stu

New article found for topic: data science
URL: 
				BUILDING SKILLS FOR LIFE			
TITLE: 



 Menu
 





						Introduction					



						Why expand CS education?					





										Economic argument									



										Inequality argument									



										Beyond economics									





						Global advances					





										Progress across the globe									



										Key barriers and challenges									





						Lessons learned					



						Conclusion					



						Case studies					






		October 25, 2021	


This report makes the case for expanding computer science education in primary and secondary schools around the world, and outlines the key challenges standing in the way. Through analysis of regional and national education systems at various stages of progress in implementing computer science education programs, the report offers transferable lessons learned across a wide range of settings with the aim that all students—regardless of income level, race, or sex—

New article found for topic: data science
URL: Gender and racial diversity of federal government economists: 2020 data
TITLE: 







David Wessel

					Director - The Hutchins Center on Fiscal and Monetary Policy 

					Senior Fellow - Economic Studies 

 Twitter
davidmwessel








Lorena Hernandez Barcena

					Senior Research Assistant - The Hutchins Center on Fiscal and Monetary Policy 







Nasiha Salwati

					Research Assistant - The Hutchins Center on Fiscal and Monetary Policy 




The lack of diversity in the economics profession has drawn increasing attention in recent years, but much of the focus has been on academic institutions. This report – an update of one we did in 2018 – looks at the diversity of the more than 2,200 Ph.D. economists employed by the federal government, including in the Federal Reserve System, the executive branch, and the research arms of Congress.
Our major findings for 2020:

29% of Ph.D. economists in the federal government are women, compared 

New article found for topic: data science
URL: Bridging the gender data gap
TITLE: 
More men than women are killed in car crashes each year, partly because men drive more and engage in riskier driving behavior. On the other hand, women are 17% more likely to be killed and 47% more likely to be injured in crashes than men are. Women are at increased risk simply because they are women: cars are primarily designed, built, and tested by male engineers using male data, so they are built with men in mind. Scaled-down versions of male crash test dummies, meant to represent women, were not used until 2003—and are primarily tested in the passenger seat. In car design, development, and testing, male bodies are the standard and female bodies the outlier. This creates a gender data gap with very real impacts on the lives of Americans.





J



Jeanette Gaudry Haynie

					Founder and Executive Director - Athena Leadership Project 

					Lieutenant Colonel - U.S. Marine Corps Reserve 




The gend

New article found for topic: data science
URL: Data governance in Africa
TITLE: 
As African tech continues to advance, Lesly Goh and Buhle Goslar discuss why and how the private sector can step up in protecting African consumers from digital harms and offer recommendations for businesses looking to be proactive in data protection. Goh is a senior technology advisor to the World Bank; Goslar is chief executive officer, Africa region and Pakistan, at JUMO.



Related Content






 



Africa in Focus
The private sector must do its part on data governance in Africa

Lesly Goh and Buhle Goslar
Tuesday, February 1, 2022







 



Report
Foresight Africa 2022

Thursday, January 20, 2022







 



Future Development
The digital transformation of agriculture in Indonesia

Lesly Goh
Friday, January 21, 2022





See also:

Investor Guidelines for Responsible Investing in Digital Financial Services (IFC)
NIST Privacy Framework








Lesly Goh

					Senior Technology Advisor - World Bank 


IndexError: list index out of range

#### 4.3.2 링크를 통한 사이트 크롤링

In [15]:
class Website:
    def __init__(self, name, url, targetPattern, absoluteUrl, titleTag, bodyTag):
        self.name = name
        self.url = url
        self.targetPattern = targetPattern
        self.absoluteUrl = absoluteUrl
        self.titleTag = titleTag
        self.bodyTag = bodyTag
        
class Content:
    def __init__(self, url, title, body):
        self.url = url
        self.title = title
        self.body = body
        
    def print(self):
        print(f'URL: {self.url}')
        print(f'TITLE: {self.title}')
        print(f'BODY:\n{self.body}')

In [16]:
import re

class Crawler:
    def __init__(self, site):
        self.site = site
        self.visited = []
        
    def getPage(self, url):
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException:
            return None
        return BeautifulSoup(req.text, 'html.parser')
    
    def safeGet(self, pageObj, selector):
        selectedElems = pageObj.select(selector)
        if selectedElems is not None and len(selectedElems) > 0:
            return '\n'.join([elem.get_text() for elem in selectedElems])
        return ''
    
    def parse(self, url):
        bs = self.getPage(url)
        if bs is not None:
            title = self.safeGet(bs, self.site.titleTag)
            body = self.safeGet(bs, self.site.bodyTag)
            if title != '' and body != '':
                content = Content(url, title, body)
                content.print()
                
    def crawl(self):
        """
        사이트 홈페이지에서 페이지를 가져옵니다.
        """
        bs = self.getPage(self.site.url)
        targetPages = bs.findAll('a', href = re.compile(self.site.targetPattern))
        for targetPage in targetPages:
            targetPage = targetPage.attrs['href']
            if targetPage not in self.visited:
                self.visited.append(targetPage)
                if not self.site.absoluteUrl:
                    targetPage = f'{self.site.url}{targetPage}'
                self.parse(targetPage)
                
                
                
reuters = Website('Reuters',
                 'https://www.reuters.com',
                 '^(/article/)',
                 False,
                 'h1',
                 'div.StandardArticleBody_body')

crawler = Crawler(reuters)
crawler.crawl()

#### 4.3.3 여러 페이지 유형 크롤링

In [12]:
# 기본 웹사이트 속성에 pageType 추가

class Website:
    """Common base class for all articles/pages"""
    
    def __init__(self, type, name, url, searchUrl, resultListing, resultUrl, absoluteUrl, titleTag, bodyTag):
        self.name = name
        self.url = url
        self.searchUrl = searchUrl
        self.resultListing = resultListing
        self.resultUrl = resultUrl
        self.absoluteUrl = absoluteUrl
        self.titleTag = titleTag
        self.bodyTag = bodyTag
        self.pageType = pageType

In [13]:
# 하위 클래스 만들기

class Website:
    """Common base class for all articles/pages"""
    
    def __init__(self, name, url, titleTag):
        self.name = name
        self.url = url
        self.titleTage = titleTag
        
class Product(Website):
    """제품 페이지 스크랩에 필요한 정보를 저장하는 클래스"""
    def __init__(self, name, url, titleTag, productNumberTag, priceTag):
        Website.__init__(self, name, url, titleTag)
        self.productNumberTag = ProductNumberTag
        self.priceTag = priceTag
        
class Article(Website):
    """기사 페이지 스크랩에 필요한 정보를 저장하는 클래스"""
    def __init__(self, name, url, titleTag, bodyTag, dateTag):
        Website.__init__(self, name, url, titleTag)
        self.bodyTag = bodyTag
        self.dateTag = dateTag