## 数据库操作模块

In [110]:
import sqlite3

class Database:

    def __init__(self):
        self.conn = sqlite3.connect('data.db')
        self.cursor = self.conn.cursor()
        self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS BOOKS
            (BID   INT PRIMARY KEY NOT NULL,
             TITLE CHAR(30) NOT NULL,
             AUTHR CHAR(30) NOT NULL,
             PTIME INT,
             DESCR TEXT,
             CONTENT TEXT);
        ''')
        self.conn.commit()

    def insert(self, bid, title, authr, ptime, descr, content):
        self.cursor.execute('''
            INSERT INTO BOOKS(BID, TITLE, AUTHR, PTIME, DESCR, CONTENT)
            VALUES(?, ?, ?, ?, ?, ?);''', (bid, title, authr, ptime, descr, content))
        self.conn.commit()
    
    def printb(self, title):
        values = self.cursor.execute('''SELECT * FROM BOOKS WHERE TITLE=?''', (title, ))
        for bid, title, authr, ptime, descr, _ in values:
            print(f'[  BID]: {bid}\n[TITLE]: {title}\n[AUTHR]: {authr}\n[PTIME]: {ptime}\n[DESCR]: {descr}\n')
            
    def update(self, bid, field, value):
        self.cursor.execute('''
            UPDATE BOOKS SET %s = ? WHERE BID = ?;
        ''' % field.upper(), (value, bid))
        self.conn.commit()
        
    def close(self):
        self.conn.close()

## 使用说明
参考博客：[python + sqlite3 基本操作](https://blog.csdn.net/weixin_43789195/article/details/88201873)

### 初始化数据库
连接（不存在则自动创建）、建表（不存在则自动创建）

+ 表名为`BOOKS`

|字段名|含义|数据类型|要求|
|---|---|---|---|
|BID|书的编号|INT|PRIMARY KEY、NOT NULL|
|TITLE|标题|CHAR(30)| NOT NULL|
|AUTHR|作者|CHAR(30)| NOT NULL|
|PTIME|出版年份|INT|-|
|DESCR|描述|TEXT|-|
|CONTENT|内容|TEXT|-|

In [112]:
database = Database()

### 插入一本书
参数依次为BID，TITLE，AUTHR，PTIME，DESCR，CONTENT，注意传入的数据类型

In [None]:
database.insert(0, title, authr, ptime, ' ', ' ')

### 更新一本书的属性
参数依次为BID，控制的属性名field，更新后的属性值value

In [114]:
database.update(0, 'descr', descr)

### 输出一本书的简要信息
参数为书名

In [115]:
database.printb(title)

[  BID]: 0
[TITLE]: Boob Tube (a Soap Opera Novel)
[AUTHR]: Mark Coker; & Lesleyann Coker
[PTIME]: 2008-03-07
[DESCR]: Actress Gina Martin lands a role on a top daytime television soap opera, and days later her predecessor is found dead in the Hollywood hills. Will Gina be next? Boob Tube offers readers a fast-paced read filled with intrigue and humor, and a surprise ending that will leave you breathless. Co-written by Lesleyann Coker, a former reporter for Soap Opera Weekly Magazine.


### 手动关闭数据库

In [111]:
database.close()

## 爬第一个网站的书

网站地址：[https://www.smashwords.com/](https://www.smashwords.com/)

这里使用了`parsel`模块，可以参考[官方文档](https://parsel.readthedocs.io/en/latest/usage.html#using-selectors)

In [None]:
import requests, time
from parsel import Selector

database = Database()
url = "https://www.smashwords.com/books/view/{bid}"
for i in range(3, 80000):
    res = requests.get(url.format(bid=i))
    if res.status_code == 404: 
        print("No Book", i)
        continue
    selector = Selector(res.text)
    title = selector.css("h1[itemprop='name']::text").extract_first()
    authr = selector.css("meta[name='Author']").xpath('@content').extract_first()
    descr = selector.css("meta[name='Description']").xpath('@content').extract_first()
    ptime = selector.css("li[itemprop='datePublished']").xpath('@content').extract_first()
    download_links = [link for link in selector.css("#download a").xpath("@href").getall() 
                      if link.endswith(".txt") or link.endswith(".epub")]
    content = requests.get("https://www.smashwords.com" + download_links[-1]).text if download_links else ""
    if not (content and title and authr and ptime):
        print("Not Free", i)
    else:
        database.insert(i, title, authr, ptime, descr, content)
        database.printb(title)
    time.sleep(0.3)

[  BID]: 0
[TITLE]: Boob Tube (a Soap Opera Novel)
[AUTHR]: Mark Coker; & Lesleyann Coker
[PTIME]: 2008-03-07
[DESCR]: Actress Gina Martin lands a role on a top daytime television soap opera, and days later her predecessor is found dead in the Hollywood hills. Will Gina be next? Boob Tube offers readers a fast-paced read filled with intrigue and humor, and a surprise ending that will leave you breathless. Co-written by Lesleyann Coker, a former reporter for Soap Opera Weekly Magazine.
[  BID]: 3
[TITLE]: Boob Tube (a Soap Opera Novel)
[AUTHR]: Mark Coker; & Lesleyann Coker
[PTIME]: 2008-03-07
[DESCR]: Actress Gina Martin lands a role on a top daytime television soap opera, and days later her predecessor is found dead in the Hollywood hills. Will Gina be next? Boob Tube offers readers a fast-paced read filled with intrigue and humor, and a surprise ending that will leave you breathless. Co-written by Lesleyann Coker, a former reporter for Soap Opera Weekly Magazine.
Not Free 4
Not Free 