In [2]:
import requests
from urllib.parse import urlencode
from pyquery import PyQuery as pq
from pymongo import MongoClient

base_url = 'https://m.weibo.cn/api/container/getIndex?'
headers = {
    'Host': 'm.weibo.cn',
    'Referer': 'https://m.weibo.cn/u/2830678474',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
}
client = MongoClient()
db = client['weibo']
collection = db['weibo']
max_page = 10


def get_page(page):
    params = {
        'type': 'uid',
        'value': '2830678474',
        'containerid': '1076032830678474',
        'page': page
    }
    url = base_url + urlencode(params)
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.json(), page
    except requests.ConnectionError as e:
        print('Error', e.args)


def parse_page(json, page: int):
    if json:
        items = json.get('data').get('cards')
        for index, item in enumerate(items):
            if page == 1 and index == 1:
                continue
            else:
                item = item.get('mblog')
                weibo = {}
                weibo['id'] = item.get('id')
                weibo['text'] = pq(item.get('text')).text()
                weibo['attitudes'] = item.get('attitudes_count')
                weibo['comments'] = item.get('comments_count')
                weibo['reposts'] = item.get('reposts_count')
                yield weibo


def save_to_mongo(result):
    if collection.insert(result):
        print('Saved to Mongo')


if __name__ == '__main__':
    for page in range(1, max_page + 1):
        json = get_page(page)
        results = parse_page(*json)
        for result in results:
            print(result)
            save_to_mongo(result)

{'id': '4278160793059549', 'text': '我中了就再抽奖送出去//@NLPJob:问一下右边，要是中奖了怎么处理？//@崔庆才丨静觅: 拉低中奖率//@Fengari:才哥稳的一笔@崔庆才丨静觅', 'attitudes': 0, 'comments': 2, 'reposts': 4}
Saved to Mongo
{'id': '4277759108996221', 'text': '拉低中奖率//@Fengari:才哥稳的一笔@崔庆才丨静觅', 'attitudes': 2, 'comments': 5, 'reposts': 30}
Saved to Mongo
{'id': '4276611929338402', 'text': '最近压力很大，勿扰，谢谢理解。', 'attitudes': 11, 'comments': 6, 'reposts': 0}
Saved to Mongo
{'id': '4275594835262800', 'text': '佛祖保佑，面试顺利！🙏', 'attitudes': 23, 'comments': 14, 'reposts': 1}
Saved to Mongo
{'id': '4273274626313421', 'text': '富士拍立得！想要的速来看！', 'attitudes': 1, 'comments': 0, 'reposts': 0}
Saved to Mongo
{'id': '4272501490351473', 'text': '哪里可以大批量收集到标题党的新闻啊？', 'attitudes': 2, 'comments': 8, 'reposts': 0}
Saved to Mongo
{'id': '4272328815388640', 'text': '有点焦虑，好好调整。', 'attitudes': 7, 'comments': 5, 'reposts': 1}
Saved to Mongo
{'id': '4272004045840806', 'text': '求转发！不要去北京五道口的“信服”维修任何设备！这是一家黑店！ 五道口的“信服”你是一家黑店吗？', 'attitudes': 26, 'comments': 31, 'reposts': 



{'id': '4269792297417984', 'text': '顺利完成一件人生大事😂😂😂 北京·欧美汇购物中心', 'attitudes': 20, 'comments': 14, 'reposts': 2}
Saved to Mongo
{'id': '4269358379826342', 'text': '转发微博', 'attitudes': 1, 'comments': 1, 'reposts': 7}
Saved to Mongo
{'id': '4269354386698664', 'text': '', 'attitudes': 6, 'comments': 0, 'reposts': 2}
Saved to Mongo
{'id': '4269262711525357', 'text': '6666//@大佬:两台宝马！前所未有！反正转了也不要钱，没准儿就是你的！转我这条加抽666红包！8.10号开，记得关注原po的两个金主！@微博抽奖平台 大佬的秒拍视频', 'attitudes': 0, 'comments': 1, 'reposts': 1}
Saved to Mongo
{'id': '4269257619611350', 'text': '抽奖了//@蚁工厂：不让大家看看作者小哥哥吗 @崔庆才丨静觅', 'attitudes': 2, 'comments': 3, 'reposts': 1}
Saved to Mongo
{'id': '4268541799176271', 'text': '一文了解 Elasticsearch 及其与 Python 的对接实现一文了解 Elasticsearch 及其与 Python 的对接实现', 'attitudes': 4, 'comments': 2, 'reposts': 6}
Saved to Mongo
{'id': '4268519887406018', 'text': '', 'attitudes': 0, 'comments': 0, 'reposts': 2}
Saved to Mongo
{'id': '4267861067307512', 'text': '威廉哥的演讲干货满满！另外更重要的是真人比照片更帅！@王威廉 北京·微软亚太研发集团总部 崔庆才丨静觅的秒拍视频'

{'id': '4231378067071372', 'text': 'Splash压力过大？来试试负载均衡吧！ Splash压力过大？来试试负载均衡吧！', 'attitudes': 0, 'comments': 0, 'reposts': 0}
Saved to Mongo
{'id': '4230984784339627', 'text': '哎呦不错哦', 'attitudes': 7, 'comments': 10, 'reposts': 0}
Saved to Mongo
{'id': '4230983370192151', 'text': '大佬们求帮投下70号，其他的几组随便投少的几个就好啦，谢谢！🙏🙏🙏爱读摄影大赛 | 开始投票啦！你的作品入围了吗？', 'attitudes': 2, 'comments': 1, 'reposts': 1}
Saved to Mongo
{'id': '4230089727551510', 'text': '', 'attitudes': 1, 'comments': 1, 'reposts': 1}
Saved to Mongo
{'id': '4230075035874699', 'text': 'Scrapy对接Docker网页链接', 'attitudes': 3, 'comments': 1, 'reposts': 0}
Saved to Mongo
{'id': '4230074364576785', 'text': '正品，有需要的吗？', 'attitudes': 0, 'comments': 3, 'reposts': 0}
Saved to Mongo
{'id': '4229064983585072', 'text': '求大家给一号投一下票谢谢！！', 'attitudes': 0, 'comments': 1, 'reposts': 0}
Saved to Mongo
{'id': '4229062484907660', 'text': '只会用Selenium爬网页？Appium爬App了解一下只会用Selenium爬网页？Appium爬App了解一下', 'attitudes': 0, 'comments': 3, 'reposts': 8}
Saved to Mongo
{'id'