/
items.py
90 lines (78 loc) · 2.22 KB
/
items.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class UserInfoItem(scrapy.Item):
# 用户ID.
user_id = scrapy.Field()
# 昵称.
user_name = scrapy.Field()
# 性别.
gender = scrapy.Field()
# 所在地.
district = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class FollowItem(scrapy.Item):
user_id = scrapy.Field()
# 所有关注的人.
follow_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class FanItem(scrapy.Item):
user_id = scrapy.Field()
# 所有粉丝.
fan_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class PostItem(scrapy.Item):
user_id = scrapy.Field()
# 所有微博.
post_id = scrapy.Field()
# 发布时间.
publish_time = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class TextItem(scrapy.Item):
user_id = scrapy.Field()
post_id = scrapy.Field()
# 每条微博的文本.
text = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class ImageItem(scrapy.Field):
user_id = scrapy.Field()
post_id = scrapy.Field()
# 每条微博的所有图片.
image_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class CommentItem(scrapy.Item):
user_id = scrapy.Field()
post_id = scrapy.Field()
# 每条微博的所有评论.
comment_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class ForwardItem(scrapy.Item):
user_id = scrapy.Field()
post_id = scrapy.Field()
# 每条微博的所有转发.
forward_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class ThumbupItem(scrapy.Item):
user_id = scrapy.Field()
post_id = scrapy.Field()
# 每条微博的所有点赞.
thumbup_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()