/
reddit_keywords_search.py
60 lines (45 loc) · 1.35 KB
/
reddit_keywords_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Search Method 1
import praw
import re
user_agent = ("[use your own user agent]")
r = praw.Reddit(user_agent = user_agent)
subreddit_lst = ["cuddlebuddies"]
reddit_prefix = "https://www.reddit.com/r/"
limit_num = 1000
posts = {}
for st in subreddit_lst:
subreddit = r.get_subreddit(st)
url_prefix = reddit_prefix+st+"/"
# I tried to set period and author here, the search results were not accurate at all..
x = r.search('seattle', subreddit)
for e in x:
print e
print url_prefix+e.id
# Search Method 2 - more flexibility, cano control where to search
import praw
import re
user_agent = ("cuddle_analysis 1.01")
r = praw.Reddit(user_agent = user_agent)
subreddit_lst = ["cuddlebuddies"]
reddit_prefix = "https://www.reddit.com/r/"
limit_num = 1000
posts = {}
for st in subreddit_lst:
subreddit = r.get_subreddit(st)
url_prefix = reddit_prefix+st+"/"
for s in subreddit.get_new(limit = limit_num):
sid = s.id
posts[sid] = {}
posts[sid]["title"] = s.title
posts[sid]["text"] = s.title + ' ' + s.selftext
posts[sid]["score"] = s.score
posts[sid]["comments_num"] = s.num_comments
posts[sid]["url"] = url_prefix+sid
print len(posts.keys())
key_terms = ['seattle']
for v in posts.values():
for key_term in key_terms:
if key_term in v['title'].lower():
print v['title']
print v['url']
print