Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,13 @@ cp config.py.sample config.py
cp feeds.sql.sample feeds.sql
```

Edit `configs.py` to fit your needs and IRC settings. All feeds from `feeds.sql` will be imported one the first start.
Edit `config.py` to fit your needs and IRC settings. All feeds from `feeds.sql` will be imported on the first start.

You might want to update all feeds before connecting to the IRC server to prevent spamming the channel (and optionally a ban from your IRC server). Either set `update_before_connecting = True` in the `config.py` or run the update script before starting the bot:

```
python2 feedupdater.py
```

To start the bot, run:

Expand Down
69 changes: 14 additions & 55 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from colour import Colours
from db import FeedDB
from config import Config
from feedupdater import FeedUpdater

class IRCBot(irc.bot.SingleServerIRCBot):
def __init__(self, config, db, on_connect_cb):
Expand Down Expand Up @@ -152,69 +153,27 @@ class Bot(object):
def __init__(self):
self.__config = Config()
self.__db = FeedDB(self.__config)
self.__feedupdater = FeedUpdater(self.__config, self.__db)
self.__irc = IRCBot(self.__config, self.__db, self.on_started)
self.__threads = []
self.__connected = False

def start(self):
"""Starts the IRC bot"""
threading.Thread(target=self.__irc.start).start()

def initial_feed_update(self):
def print_feed_update(feed_title, news_title, news_url, news_date):
print("[+]: {}||{}||{}||{}".format(feed_title, news_title, news_url, news_date))

if self.__config.update_before_connecting:
print "Started pre-connection updates!"
self.__feedupdater.update_feeds(print_feed_update, False)
print "DONE!"

def on_started(self):
"""Gets executed after the IRC thread has successfully established a connection."""
if not self.__connected:
print "Connected!"

# Start one fetcher thread per feed
for feed in self.__db.get_feeds():
t = threading.Thread(target=self.__fetch_feed, args=(feed,))
t.start()
self.__threads.append(t)
print "Started fetcher threads!"
self.__connected = True

def __fetch_feed(self, feed_info):
"""Fetches a RSS feed, parses it and updates the database and/or announces new news."""
while 1:
try:
# Parse a feed's url
news = feedparser.parse( feed_info[2] )

# Reverse the ordering. Oldest first.
for newsitem in news.entries[::-1]:
newstitle = newsitem.title
if self.__config.shorturls:
newsurl = tinyurl.create_one(newsitem.link) # Create a short link
if newsurl == "Error": #If that fails, use the long version
newsurl = newsitem.link
else:
newsurl = newsitem.link

# Try to get the published or updated date. Otherwise set it to 'no date'
try:
# Get date and parse it
newsdate = dateutil.parser.parse(newsitem.published)
# Format date based on 'dateformat' in config.py
newsdate = newsdate.strftime(self.__config.dateformat)

except Exception as e:
try:
# Get date and parse it
newsdate = dateutil.parser.parse(newsitem.updated)
# Format date based on 'dateformat' in config.py
newsdate = newsdate.strftime(self.__config.dateformat)

except Exception as e:
newsdate = "no date"

# Update the database. If it's a new issue, post it to the channel
is_new = self.__db.insert_news(feed_info[0], newstitle, newsitem.link, newsdate)
if is_new:
self.__irc.post_news(feed_info[1], newstitle, newsurl, newsdate)
print "Updated: " + feed_info[1]
except Exception as e:
print e
print "Failed: " + feed_info[1]

# sleep frequency minutes
time.sleep(int(feed_info[3])*60)
self.__feedupdater.update_feeds(self.__irc.post_news, True)
print "Started feed updates!"
self.__connected = True
1 change: 1 addition & 0 deletions config.py.sample
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ class Config(object):
self.shorturls = False
self.dateformat = '%Y-%m-%d %H:%M:%S %z'
self.feedlimit = 10
self.update_before_connecting = True #Update all feeds before connecting to the IRC server
107 changes: 107 additions & 0 deletions feedupdater.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/python2.7

import feedparser
import datetime
import dateutil.parser
import signal
import time
import tinyurl
import threading
import os
from db import FeedDB
from config import Config

class FeedUpdater(object):

def __init__(self, config, db):
self.__config = config
self.__db = db
self.__threads = []

def update_feeds(self, callback=None, forever=False):
for feed in self.__db.get_feeds():
t = threading.Thread(target=self.__fetch_feed,
args=({
'id': feed[0],
'title': feed[1],
'url': feed[2],
'published': feed[3]
},
callback,
forever,
)
)
t.start()
self.__threads.append(t)

if not forever:
for thread in self.__threads:
thread.join()
self.__threads.remove(thread)

def __fetch_feed(self, feed_info, callback, forever):
"""Fetches a RSS feed, parses it and updates the database and/or announces new news."""
while 1:
try:
# Parse a feed's url
news = feedparser.parse( feed_info['url'] )

# Reverse the ordering. Oldest first.
for newsitem in news.entries[::-1]:
newstitle = newsitem.title
if self.__config.shorturls:
newsurl = tinyurl.create_one(newsitem.link) # Create a short link
if newsurl == "Error": #If that fails, use the long version
newsurl = newsitem.link
else:
newsurl = newsitem.link

# Try to get the published or updated date. Otherwise set it to 'no date'
try:
# Get date and parse it
newsdate = dateutil.parser.parse(newsitem.published)
# Format date based on 'dateformat' in config.py
newsdate = newsdate.strftime(self.__config.dateformat)

except Exception as e:
try:
# Get date and parse it
newsdate = dateutil.parser.parse(newsitem.updated)
# Format date based on 'dateformat' in config.py
newsdate = newsdate.strftime(self.__config.dateformat)

except Exception as e:
newsdate = "no date"

# Update the database. If it's a new issue, post it to the channel
is_new = self.__db.insert_news(feed_info['id'], newstitle, newsitem.link, newsdate)
if is_new and callback is not None:
callback(feed_info['title'], newstitle, newsurl, newsdate)
print "Updated: " + feed_info['title']
except Exception as e:
print e
print "Failed: " + feed_info['title']

if not forever:
break

# sleep frequency minutes
time.sleep(int(feed_info['published'])*60)

if __name__ == "__main__":
def print_line(feed_title, news_title, news_url, news_date):
print("[+]: {}||{}||{}||{}".format(feed_title, news_title, news_url, news_date))

def main():
config = Config()
db = FeedDB(config)
updater = FeedUpdater(config, db)

updater.update_feeds(print_line, False)

def signal_handler(signal, frame):
print "Caught SIGINT, terminating."
os._exit(0)

signal.signal(signal.SIGINT, signal_handler)
main()
2 changes: 2 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bot import Bot
from feedupdater import FeedUpdater
import os
import signal

Expand All @@ -11,6 +12,7 @@ def signal_handler(signal, frame):
if __name__ == "__main__":
bot = Bot()
bot._Bot__irc.connection.buffer_class.errors = 'replace' # prevent utf-8 error in jaraco.stream
bot.initial_feed_update()
bot.start()
signal.signal(signal.SIGINT, signal_handler)
while True:
Expand Down