In [3]:
# 进程是资源单位，每一个进程至少要有一个线程
# 线程是执行单位

from threading import Thread

def func():
    for i in range(10):
        print("func", i)

if __name__ == '__main__':
    t = Thread(target=func)
    t.start() # 线程可以启动了，具体执行时间由cpu决定
    for i in range(10):
        print("main", i)

# 另一种写法
# class MyThread(Thread):
#     def run(self):
#         for i in range(10):
#             print("func", i)
#
# if __name__ == '__main__':
#     t = MyThread()
#     t.start()
#     for i in range(10):
#         print("main", i)

funcmain 0
func 1
 0func 2
func 3
func 4
func 5
main 1
main 2
func 6
func 7
func 8
func 9

main 3
main 4
main 5
main 6
main 7
main 8
main 9


In [5]:
from multiprocessing import Process

# 不推荐使用

def func(name):
    for i in range(10):
        print(name, i)

if __name__ == '__main__':
    p = Process(target=func, args=("进程1",))
    p.start()
    p = Process(target=func, args=("进程2",))
    p.start()
    for i in range(10):
        print("主进程", i)

主进程 0
主进程 1
主进程 2
主进程 3
主进程 4
主进程 5
主进程 6
主进程 7
主进程 8
主进程 9


In [None]:
# 线程池

# 线程任务调度由线程池来完成

from concurrent.futures import ThreadPoolExecutor

# 进程池 ProcessPoolExecutor

def fn(name):
    for i in range(10):
        print(name, i)

if __name__ == '__main__':
    with ThreadPoolExecutor(50) as t:
        for i in range(10):
            t.submit(fn, name=f"线程{i}")
    # with，等待线程池任务全部执行完毕才会举行执行，线程守护
    print("over")

In [None]:
from concurrent.futures import ThreadPoolExecutor
import requests
from lxml import etree
import csv

f = open("data.csv", mode="w", encoding="utf-8")
wr = csv.writer(f)

def download_one_page(url):
    resp = requests.get(url)
    html = etree.HTML(resp.text)
    table = html.xpath("/html/body/div[2]/div[4]/div[1]/table")[0]
    trs = table.xpath("./tr")[1:]
    for tr in trs:
        txt = tr.xpath("./td/text()")
        txt = list(item.replace("\\", "").replace("|", "") for item in txt)
        wr.writerow(txt)
    print(f"{url} over")

if __name__ == '__main__':
    with ThreadPoolExecutor(50) as f:
        for i in range(1, 100):
            download_one_page(f'http://www.xinfadi.com.cn/marketanalysis/0/list/{i}.shtml')
    print("done")

In [None]:
# 协程，微线程
import time
import asyncio

async def func1():
    print("func1")
    # time.sleep(2) # 同步操作，异步就中断了
    await asyncio.sleep(2) # 挂起，切到其他任务
    print("func1")

async def func2():
    print("func2")
    # time.sleep(3)
    await asyncio.sleep(3)
    print("func2")

async def func3():
    print("func3")
    # time.sleep(4)
    await asyncio.sleep(4)
    print("func3")

async def main():
    tasks = [
        asyncio.create_task(func1()),
        asyncio.create_task(func2()),
        asyncio.create_task(func3()),
    ]
    await asyncio.wait(tasks)

if __name__ == '__main__':
    t1 = time.time()
    asyncio.run(main())
    t2 = time.time()
    print(t2-t1)

In [None]:
# 异步操作 pip install aiohttp

import asyncio
import aiohttp # 异步的http请求，效率高

urls = [
    'http://kr.shanghai-jiuxin.com/file/2021/0625/276b38c1489ff8d6fbcf442b309a5442.jpg',
    'http://kr.shanghai-jiuxin.com/file/2021/0625/e6f5048c93874005041102d7a3e1b2f6.jpg',
    'http://kr.shanghai-jiuxin.com/file/2021/0625/797c11c199cac132428510bd093bc56c.jpg'
]


async def aiodownload(url):
    name = url.rsplit('/', 1)[1]  # 从右边切，切一次
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            # 文件io操作可以换成 aiofiles
            with open("img/" + name, mode='wb') as f:
                f.write(await resp.content.read())


async def main():
    tasks = []
    for url in urls:
        tasks.append(asyncio.create_task(aiodownload(url)))
    await asyncio.wait(tasks)


if __name__ == '__main__':
    asyncio.run(main())

In [None]:
# 百度小说 西游记 https://dushu.baidu.com/pc/detail?gid=4306063500

# 所有章节的内容
# https://dushu.baidu.com/api/pc/getCatalog?data={%22book_id%22:%224306063500%22}

# 查看每一章的内容
# https://dushu.baidu.com/api/pc/getChapterContent
# data: {"book_id":"4306063500","cid":"4306063500|11348571","need_bookinfo":1}

import json

import requests
import asyncio
import aiofiles
import aiohttp

# async 异步的意思
# 同步操作获取所有章节的cid和名称
# 再异步访问所有章节的链接，下载文章内容

async def aiodownload(cid, title):
    data = {
        "book_id": "4306063500",
        "cid": f"4306063500|{cid}",
        "need_bookinfo": 1
    }
    data = json.dumps(data)  # 将python对象编码成Json字符串
    url = f"https://dushu.baidu.com/api/pc/getChapterContent?data={data}"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            dic = await resp.json()

            async with aiofiles.open("西游记/" + title, mode="w", encoding="utf-8") as f:
                await f.write(dic["data"]["novel"]["content"])


async def getCatalog(url):
    resp = requests.get(url)
    dic = resp.json()
    tasks = []
    for item in dic['data']['novel']['items']:
        title = item["title"]
        cid = item["cid"]
        print(title, cid)
        tasks.append(asyncio.create_task(aiodownload(cid, title)))
    await asyncio.wait(tasks)


if __name__ == '__main__':
    url = ""r"https://dushu.baidu.com/api/pc/getCatalog?data={%22book_id%22:%224306063500%22}"
    asyncio.run(getCatalog(url))
