# pyquery

## 初始化 

### 字符串初始化

In [7]:
html = '''

<div class = "panel">
    <div class = "panel-heading">
        <h4>hello</h4>
    </div>
    <div class = "panel-body">
        <ul class = "list" id = "list-1">
            <li class = "elements">foo<li>
            <li class = "elements">foo1<li>
            <li class = "elements">foo2<li>
        </ul>
        <ul class = "list list-small" id = "list-2">
            <li class = "elements">foo3<li>
            <li class = "elements">foo4<li>
            <li class = "elements">foo5<li>
        </ul>
    </div>
</div>
'''

from pyquery import PyQuery as pq
doc = pq(html)
print(doc('li'))

<li class="elements">foo</li><li>
            </li><li class="elements">foo1</li><li>
            </li><li class="elements">foo2</li><li>
        </li><li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
        </li>


### URL 初始化

In [4]:
doc = pq(url = "http://www.baidu.com")

print(doc('head'))

<head><meta http-equiv="content-type" content="text/html;charset=utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=Edge"/><meta content="always" name="referrer"/><link rel="stylesheet" type="text/css" href="http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css"/><title>ç¾åº¦ä¸ä¸ï¼ä½ å°±ç¥é</title></head> 


### 文件初始化

In [None]:
doc = pq(filename='demo.html')
print(doc('li'))

## 基本CSS选择器

In [9]:
doc = pq(html)
print(doc('.panel .panel-body #list-1'))

<ul class="list" id="list-1">
            <li class="elements">foo</li><li>
            </li><li class="elements">foo1</li><li>
            </li><li class="elements">foo2</li><li>
        </li></ul>
        


## 查找元素 

### 子元素

In [11]:
items = doc('.panel-heading')
print(type(items))
h4 = items.find('h4')  # find 查找元素
print(type(h4))
print(h4)

<class 'pyquery.pyquery.PyQuery'>
<class 'pyquery.pyquery.PyQuery'>
<h4>hello</h4>
    


### 父元素

In [12]:
container = items.parent()
print(type(container))
print(container)

<class 'pyquery.pyquery.PyQuery'>
<div class="panel">
    <div class="panel-heading">
        <h4>hello</h4>
    </div>
    <div class="panel-body">
        <ul class="list" id="list-1">
            <li class="elements">foo</li><li>
            </li><li class="elements">foo1</li><li>
            </li><li class="elements">foo2</li><li>
        </li></ul>
        <ul class="list list-small" id="list-2">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
        </li></ul>
    </div>
</div>



In [13]:
# 查找所有祖先节点
parents = items.parents()
print(type(parents))
print(parents)

<class 'pyquery.pyquery.PyQuery'>
<html><body><div class="panel">
    <div class="panel-heading">
        <h4>hello</h4>
    </div>
    <div class="panel-body">
        <ul class="list" id="list-1">
            <li class="elements">foo</li><li>
            </li><li class="elements">foo1</li><li>
            </li><li class="elements">foo2</li><li>
        </li></ul>
        <ul class="list list-small" id="list-2">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
        </li></ul>
    </div>
</div>
</body></html><body><div class="panel">
    <div class="panel-heading">
        <h4>hello</h4>
    </div>
    <div class="panel-body">
        <ul class="list" id="list-1">
            <li class="elements">foo</li><li>
            </li><li class="elements">foo1</li><li>
            </li><li class="elements">foo2</li><li>
        </li></ul>
        <ul class="list list-small" id="list-2">
    

### 兄弟元素

In [17]:
ul = doc('.list.list-small')
print(ul)
print(ul.siblings('.list'))

<ul class="list list-small" id="list-2">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
        </li></ul>
    
<ul class="list" id="list-1">
            <li class="elements">foo</li><li>
            </li><li class="elements">foo1</li><li>
            </li><li class="elements">foo2</li><li>
        </li></ul>
        


## 遍历

In [19]:
lis = doc('li').items()
print(type(lis))

for li in lis:
    print(li)

<class 'generator'>
<li class="elements">foo</li>
<li>
            </li>
<li class="elements">foo1</li>
<li>
            </li>
<li class="elements">foo2</li>
<li>
        </li>
<li class="elements">foo3</li>
<li>
            </li>
<li class="elements">foo4</li>
<li>
            </li>
<li class="elements">foo5</li>
<li>
        </li>


## 获取信息

### 获取属性

In [23]:
li = doc('.list.list-small')

print(li)
print(li.attr('id'))
print(li.attr.id) # 等价

<ul class="list list-small" id="list-2">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
        </li></ul>
    
list-2
list-2


### 获取文本

In [24]:
print(li.text())

foo3
foo4
foo5


### 获取html

In [26]:
html = '''

<div class = "panel">
    <div class = "panel-heading">
        <h4>hello</h4>
    </div>
    <div class = "panel-body">
        <ul class = "list" id = "list-1">
            <li class = "elements">foo<li>
            <li class = "elements">foo1<li>
            <li class = "elements">foo2<li>
        </ul>
        <ul class = "list list-small" id = "list-2">
            <li class = "elements">foo3<li>
            <li class = "elements">foo4<li>
            <li class = "elements">foo5<li>
            <a href="link2.html">link</a>
        </ul>
    </div>
</div>
'''
doc = pq(html)

print(doc('.list.list-small').html())


            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
            <a href="link2.html">link</a>
        </li>


## DOM 操作

### addClass、removeClass

In [27]:
li = doc('.list.list-small')
print(li)

li.removeClass('list-small')
print(li)

li.addClass('list-small')
print(li)

<ul class="list list-small" id="list-2">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
            <a href="link2.html">link</a>
        </li></ul>
    
<ul class="list" id="list-2">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
            <a href="link2.html">link</a>
        </li></ul>
    
<ul class="list list-small" id="list-2">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
            <a href="link2.html">link</a>
        </li></ul>
    


### attr、css

In [29]:
li.attr('name','link') # 增加属性
print(li)

li.css('font-size','14px') # 增加css
print(li)

<ul class="list list-small" id="list-2" name="link">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
            <a href="link2.html">link</a>
        </li></ul>
    
<ul class="list list-small" id="list-2" name="link" style="font-size: 14px">
            <li class="elements">foo3</li><li>
            </li><li class="elements">foo4</li><li>
            </li><li class="elements">foo5</li><li>
            <a href="link2.html">link</a>
        </li></ul>
    


### remove

In [30]:
html = '''
<div class='wrap'>
    hello world
    <p>this is a paragraph.</p>
</div>
'''

doc = pq(html)
wrap = doc('.wrap')
print(wrap.text())
wrap.find('p').remove()  # 删除p标签 
print(wrap.text())

hello world
this is a paragraph.
hello world


## 伪类选择器

In [37]:
html = '''

<div class = "panel">
    <div class = "panel-heading">
        <h4>hello</h4>
    </div>
    <div class = "panel-body">
        <ul class = "list" id = "list-1">
            <li class = "elements1">foo<li>
            <li class = "elements">foo1<li>
            <li class = "elements">foo2<li>
        </ul>
        <ul class = "list2 list-small" id = "list-2">
            <li class = "elements">foo3<li>
            <li class = "elements">foo4<li>
            <li class = "elements">foo5<li>
            <a href="link2.html">link</a>
        </ul>
    </div>
</div>
'''

doc = pq(html)

li = doc('.list li:first-child')
print(li)

li = doc('.list li:last-child')
print(li)

li = doc('.list li:nth-child(2)')
print(li)

li = doc('.list li:gt(2)') # 获取序号大于2的所有元素
print(li)

li = doc('.list li:nth-child(2n)')  # 获取序号为偶数的元素
print(li)

li = doc('li:contains(foo3)') # 获取文本含有 foo3 的元素
print(li)

<li class="elements1">foo</li>
<li>
        </li>
<li>
            </li>
<li>
            </li><li class="elements">foo2</li><li>
        </li>
<li>
            </li><li>
            </li><li>
        </li>
<li class="elements">foo3</li>
