Skip to content

Commit

Permalink
Unicode sandwich 🍞
Browse files Browse the repository at this point in the history
> Bytes on the outside, unicode on the inside
> Encode/decode at the edges

http://nedbatchelder.com/text/unipain/unipain.html#35

Close #11.
Close #12.
  • Loading branch information
dentarg committed Jan 3, 2016
1 parent 9cc7b04 commit d918643
Show file tree
Hide file tree
Showing 12 changed files with 84 additions and 225 deletions.
7 changes: 4 additions & 3 deletions ircbot.py
Expand Up @@ -46,7 +46,7 @@ def __cmp__(self, other):
class IRCBot:
def __init__(self, address, port, nick, username, realname):
self.client = IRCClient(address, port, nick, username, realname)
self.client.callbacks = { "on_connected": self.on_connected, "on_join": self.on_join, "on_nick_change": self.on_nick_change, "on_notice": self.on_notice, "on_part": self.on_part, "on_privmsg": self.on_privmsg, "on_quit": self.on_quit }
self.client.callbacks = { "on_connected": self.on_connected, "on_join": self.on_join, "on_nick_change": self.on_nick_change, "on_notice": self.on_notice, "on_part": self.on_part, "on_privmsg": self.on_privmsg, "on_raw_privmsg": self.on_raw_privmsg, "on_quit": self.on_quit }
self.plugins = []
self.timer_heap = PriorityQueue()

Expand Down Expand Up @@ -83,10 +83,11 @@ def on_part(self, nick, channel, reason):
self.execute_plugins("on_part", nick, channel, reason)

def on_privmsg(self, nick, target, message):
#for plugin in plugin_handler.all_plugins():
# plugin.on_privmsg(self, nick, target, message)
self.execute_plugins("on_privmsg", nick, target, message)

def on_raw_privmsg(self, nick, target, message, raw_line):
self.execute_plugins("on_raw_privmsg", nick, target, message, raw_line)

def on_quit(self, nick, reason):
self.execute_plugins("on_quit", nick, reason)

Expand Down
23 changes: 16 additions & 7 deletions ircclient/ircclient.py
Expand Up @@ -71,7 +71,9 @@ def send(self, line):

self.last_action = datetime.datetime.now()

data = line + "\r\n"
full_line = line + "\r\n"
# unicode -> bytes
data = full_line.encode('utf-8')

while data:
sent = self.s.send(data)
Expand Down Expand Up @@ -202,14 +204,17 @@ def on_ping(self, tupels):
self.send("PONG :" + tupels[4])

def on_privmsg(self, tupels):
source, target, message = tupels[2], tupels[4], tupels[5]
source, target, message, raw_line = tupels[2], tupels[4], tupels[5], tupels[6]

if target[0] != '#':
target = source

if "on_privmsg" in self.callbacks:
self.callbacks["on_privmsg"](source, target, message)

if "on_raw_privmsg" in self.callbacks:
self.callbacks["on_raw_privmsg"](source, target, message, raw_line)

def on_notice(self, tupels):
source, target, message = tupels[2], tupels[4], tupels[5]

Expand Down Expand Up @@ -253,16 +258,20 @@ def tick(self):
self.recv_buf += retn
recv_lines = self.recv_buf.splitlines(True)
self.recv_buf = ''
for line in recv_lines:
if not line.endswith("\r\n"):
self.recv_buf = line
for raw_line in recv_lines:
if not raw_line.endswith("\r\n"):
self.recv_buf = raw_line
else:
line = line.rstrip("\r\n")
raw_line = raw_line.rstrip("\r\n")

# bytes -> unicode
line = raw_line.decode('utf-8', 'replace')

self.log_line(timestamp() + " RECV: " + line)
m = self.irc_message_pattern.match(line)
if m:
if m.group(3) in self.message_handlers:
self.message_handlers[m.group(3)](m.group(0, 1, 2, 3, 4, 5))
self.message_handlers[m.group(3)](m.group(0, 1, 2, 3, 4, 5) + (raw_line,))

except socket.error, (error_code, error_message):
if error_code != errno.EWOULDBLOCK:
Expand Down
2 changes: 1 addition & 1 deletion offline_tester.py
Expand Up @@ -37,7 +37,7 @@ def tell(self, target, message):
print "all right. let's go!"
while 1:
print "> ",
line = sys.stdin.readline()
line = sys.stdin.readline().decode('utf-8')
if not line:
break
while len(line) > 0 and (line[-1] == '\n' or line[-1] == '\r'):
Expand Down
2 changes: 1 addition & 1 deletion plugins/__init__.py
Expand Up @@ -7,4 +7,4 @@
'tenta', 'prisjakt', 'spotify', 'down', 'metacritic',
'notes', 'fml', 'systembolaget', 'randombuy', 'festern_bbq',
'compliment', 'roulette', 'tyda', 'char', 'tweet',
'erekebabfredag', 'github', 'quote']
'erekebabfredag', 'github', 'quote', 'detect_encoding']
4 changes: 0 additions & 4 deletions plugins/command_catcher.py
Expand Up @@ -49,10 +49,6 @@ def on_command(self, bot, source, target, trigger, arguments):
except utility.TimeoutException:
return "Command '%s' took too long to execute." % trigger
except:
boll = list(traceback.extract_tb(sys.exc_info()[2]))
bolliStr = ", ".join(map(lambda x: str(x), boll))
bot.tell('#botnik', "%s triggered an error by typing \'%s %s\': %s. %s" % (source, trigger, arguments, sys.exc_info(), bolliStr))

print sys.exc_info()
print 'Error when executing command \'', trigger, '\':', traceback.extract_tb(sys.exc_info()[2])

Expand Down
31 changes: 31 additions & 0 deletions plugins/detect_encoding.py
@@ -0,0 +1,31 @@
# coding: utf-8

from __future__ import unicode_literals
from commands import Command
import chardet

class DetectEncodingCommmand(Command):
hooks = ['on_raw_privmsg']

def __init__(self):
pass

def trig_chardet(self, bot, source, target, message, raw_line):
"""Detect your encoding. Usage: .chardet text with åäö (alias: .åäö)"""

def on_raw_privmsg(self, bot, source, target, message, raw_line):
triggers = ['chardet', 'åäö']

if not any([message.startswith('.%s' % (trigger)) for trigger in triggers]):
return

message_out = detect_encoding(raw_line)
bot.tell(target, message_out)

def detect_encoding(raw_line):
result = chardet.detect(raw_line)
encoding = result["encoding"].upper()
confidence = int(result["confidence"] * 100)
message_out = "Du använder nog %s (%s%% säker)" % (encoding, confidence)
return message_out

142 changes: 0 additions & 142 deletions plugins/httpget.py

This file was deleted.

3 changes: 3 additions & 0 deletions plugins/plugins.py
Expand Up @@ -34,6 +34,9 @@ def on_part(self, bot, nick, channel, reason):
def on_privmsg(self, bot, source, target, message):
pass

def on_raw_privmsg(self, bot, source, target, message, raw_line):
pass

def on_quit(self, bot, nick, reason):
pass

22 changes: 4 additions & 18 deletions plugins/standard.py
@@ -1,5 +1,6 @@
# coding: utf-8

from __future__ import unicode_literals
from commands import Command
import htmlentitydefs
import string
Expand Down Expand Up @@ -138,9 +139,6 @@ def trig_help(self, bot, source, target, trigger, argument):
else:
return "That's not a command! Try `help <command>`"

# def can_trigger(self, source, trigger):
# return source in ['serp!~serp@85.8.2.181.se.wasadata.net']

_get_temp_re = re.compile('^\s*(.+)\s*$')
class TempCommand(Command):
def __init__(self):
Expand Down Expand Up @@ -194,7 +192,7 @@ def on_unload(self):

class GoogleCommand(Command):
def trig_google(self, bot, source, target, trigger, argument):
url = 'http://www.google.com/search?rls=en&q=' + utility.escape(argument) + '&ie=UTF-8&oe=UTF-8'
url = 'http://www.google.com/search?rls=en&q=' + argument + '&ie=UTF-8&oe=UTF-8'

response = utility.read_url(url)

Expand All @@ -203,8 +201,6 @@ def trig_google(self, bot, source, target, trigger, argument):
data = re.sub(r"\n|\r|\r\n", "", data)
data = re.sub(r" +", " ", data)

print data

# try to extract video result
m = re.search(r'Video results for <em>.*?<\/em>.*?<td valign=top style="padding-right:10px"><a href="(.*?)" class=l.*?>(.*?)</a><br>',data)
if m:
Expand All @@ -218,7 +214,8 @@ def trig_google(self, bot, source, target, trigger, argument):
m = re.search('.*?font-size:138%">(.*?)<', data)
if m:
answer = m.group(1)
answer = answer.replace(' &#215;', '×').replace('<sup>', '^')
answer = utility.unescape(answer)
answer = answer.replace('<sup>', '^')
answer = re.sub('<.+?>', '', answer)
return answer

Expand Down Expand Up @@ -312,17 +309,6 @@ def trig_wp(self, bot, source, target, trigger, argument):

return "I couldn't find an article... :("

class AAOCommand(Command):
triggers = ['}{|', 'åäö', 'åäö']

def on_trigger(self, bot, source, target, trigger, argument):
if trigger == 'åäö':
return source+": Du använder nog Latin-1"
elif trigger == '}{|':
return source+": Du använder nog ISO-646"
else:
return source+": Du använder nog UTF-8"

class CollectCommand(Command):
def trig_collect(self, bot, source, target, trigger, argument):
import gc
Expand Down
1 change: 1 addition & 0 deletions plugins/title_reader.py
Expand Up @@ -74,6 +74,7 @@ def on_privmsg(self, bot, source, target, message):
if not tweetbool and target in settings.title_channels:
bot.tell(target, self.clean(url, title))
except utility.TimeoutException:
print "TitleReaderPlugin utility.TimeoutException for %s" % (url)
pass


Expand Down
14 changes: 6 additions & 8 deletions plugins/tweet.py
Expand Up @@ -38,13 +38,12 @@ def get_tweet_text_and_user(tweet):

status = api.GetStatus(tweet.idno)

# Use latin-1 to make IRCClient.send() happy
tweet.text = status.text.encode('latin-1', 'replace')
tweet.screen_name = status.user.screen_name.encode('latin-1', 'replace')
tweet.text = status.text
tweet.screen_name = status.user.screen_name

for url in status.urls:
latin_url = url.url.encode('latin-1', 'replace')
latin_expanded_url = url.expanded_url.encode('latin-1', 'replace')
latin_url = url.url
latin_expanded_url = url.expanded_url
tweet.text = tweet.text.replace(latin_url, latin_expanded_url)

return tweet
Expand All @@ -57,9 +56,8 @@ def get_user_description(tweet):

user = api.GetUser(screen_name=tweet.screen_name)

# Use latin-1 to make IRCClient.send() happy
tweet.user_name = user.name.encode('latin-1', 'replace')
tweet.user_description = user.description.encode('latin-1', 'replace')
tweet.user_name = user.name
tweet.user_description = user.description
return tweet

def get_tweet(message):
Expand Down

0 comments on commit d918643

Please sign in to comment.