This repository has been archived by the owner on Nov 24, 2020. It is now read-only.
/
linktitle.py
146 lines (135 loc) · 5.36 KB
/
linktitle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from api import *
from utils.pastebins import *
from BeautifulSoup import BeautifulSoup
import re
import urllib2
import HTMLParser
htmlparser = HTMLParser.HTMLParser()
def load():
"""Shows page titles of all URLs spoken in channel."""
dbExecute('''create table if not exists urls (
urlID int auto_increment primary key,
url varchar(255),
title text,
unique(url) )''')
dbExecute('''create table if not exists blacklists (
blacklistID int auto_increment primary key,
domain varchar(255),
index(domain) )''')
registerMessageHandler(None, searchLinks)
registerFunction("links %S", showLinks, "links <search term>")
registerFunction("all links %S", showAllLinks, "all links <search term>", restricted = True)
registerFunction("show blacklist", showBlacklist, None, restricted = True)
registerFunction("blacklist %s", blacklistDomain, "blacklist <domain>", restricted = True)
registerFunction("remove blacklist %s", unBlacklistDomain, "remove blacklist <domain>", restricted = True)
registerModule('LinkTitle', load)
def _isBlacklisted(domain):
while True:
if len(dbQuery("SELECT domain FROM blacklists WHERE domain LIKE %s", [domain])) > 0:
return True
pos = domain.find('.')
if pos < 0:
return False
domain = domain[pos+1:]
def _parseTitle(html):
dom = BeautifulSoup(html)
if dom.title is not None:
print dom.title.string
title = dom.title.string
return htmlparser.unescape(title.string).encode("utf-8")
def _fetchTitle(url):
global ismime
opener = urllib2.build_opener()
opener.addheaders = [
('User-agent',"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"),
('Accept-Language','en-us')
]
response = opener.open(url)
mime = response.info().gettype()
if mime != 'text/html':
ismime = True
return mime
title = _parseTitle(response.read(10240))
if title == None:
ismime = True
return mime
ismime = False
return title
def searchLinks(channel, sender, message):
match = re.search('(http(s)?://([^/#\s]+)[^#\s]*)(#|\\b)', message)
if match == None:
return
url = match.group(1)
domain = match.group(3)
if _isBlacklisted(domain):
log.info('Domain in blacklist: %s' % domain)
return
cache = dbQuery('SELECT title FROM urls WHERE url=%s LIMIT 1', [url])
if len(cache) > 0:
sendMessage(channel, '%s: %s' % ("Content-Type" if ismime is True else "Site title", cache[0][0]))
return
try:
title = _fetchTitle(url)
except urllib2.URLError, e:
if hasattr(e, 'reason'):
error = e.reason
else:
error = e.code
sendMessage(channel, 'Failed to fetch url: %s' % error)
return
dbExecute('INSERT INTO urls (url, title) VALUES (%s, %s)', [url, title])
sendMessage(channel, '%s: %s' % ("Content-Type" if ismime is True else "Site title", title))
def showLinks(channel, sender, searchterm):
"""Shows URLs whose titles match a search term."""
results = dbQuery('SELECT url, title FROM urls WHERE title LIKE %s OR url LIKE %s',
['%' + searchterm + '%', '%' + searchterm + '%'])
if len(results) > 3:
sendMessage(channel, '%s entries found, refine your search' % len(results))
return
if len(results) == 0:
sendMessage(channel, 'No results found.')
return
for link in results:
sendMessage(channel, '%s %s' % (link[0], link[1]))
def showAllLinks(channel, sender, searchterm):
"""Posts all URLs whose titles match a search term on a pastebin."""
links = dbQuery('SELECT url, title FROM urls WHERE title like %s OR url like %s',
['%' + searchterm + '%','%' + searchterm + '%'])
if len(links) == 0:
sendMessage(channel, "No links found")
return
linklist = ''
for link in links:
linklist += "%s : %s\n" % (link[0], link[1])
try:
url = paste(linklist)
except:
log.warning('Failed to upload link list')
sendMessage(channel, "Error uploading link list")
return
sendMessage(channel, url)
def showBlacklist(channel, sender):
"""Lists the currently blacklisted domains."""
blacklist = ''
for domain in dbQuery('SELECT domain FROM blacklists'):
blacklist += domain[0] + "\n"
try:
url = paste(blacklist)
except:
log.warning('Failed to upload blacklist')
sendMessage(channel, "Error uploading blacklist")
return
sendMessage(channel, url)
def blacklistDomain(channel, sender, domain):
"""Blocks URLs from a given domain from being summarized."""
if len(dbQuery('SELECT domain FROM blacklists WHERE domain=%s', [domain])) > 0:
sendMessage(channel, 'domain already blacklisted')
return
dbExecute('INSERT INTO blacklists (domain) VALUES (%s)', [domain])
log.info('Domain blacklisted: %s' % domain)
sendMessage(channel, 'Blacklisted %s' % domain)
def unBlacklistDomain(channel, sender, domain):
"""Unblocks URLs from a given domain from being summarized."""
dbExecute('DELETE FROM blacklists WHERE domain=%s', [domain])
log.info('Domain removed from blacklist: %s' % domain)
sendMessage(channel, 'Removed %s from blacklist' % domain)