Permalink
Browse files

Initial commit

  • Loading branch information...
0 parents commit 069db0506e21a1d8d9f99c6d04e21ef7ae70810e @christianhellsten committed Sep 28, 2012
@@ -0,0 +1,20 @@
+*.gem
+*.rbc
+**/*.log
+**/*.sqlite3
+Gemfile.lock
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+
+# Specify your gem's dependencies in feedr.gemspec
+gemspec
@@ -0,0 +1,5 @@
+guard 'minitest' do
+ watch(%r|^test/*/*/test_(.*)\.rb|)
+ watch(%r{^lib/(.*/)?([^/]+)\.rb$}) { |m| "test/#{m[1]}test_#{m[2]}.rb" }
+ watch(%r|^test/test_helper\.rb|) { "test" }
+end
22 LICENSE
@@ -0,0 +1,22 @@
+Copyright (c) 2012 Christian Hellsten
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,32 @@
+# Feedr
+
+Feedr helps you fetch and store Atom and RSS feeds.
+
+Use it, for example, to build a feed reader.
+
+## Installation
+
+Add this line to your application's Gemfile:
+
+ gem 'feedr'
+
+And then execute:
+
+ $ bundle
+
+Or install it yourself as:
+
+ $ gem install feedr
+
+## Usage
+
+See feedr-example:
+http://github.com/christianhellsten/feedr-example
+
+## Contributing
+
+1. Fork it
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Added some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request
@@ -0,0 +1,9 @@
+#!/usr/bin/env rake
+require "bundler/gem_tasks"
+
+task :console do
+ require './test/test_helper'
+ require 'pry'
+ include Feedr::ActiveRecord
+ binding.pry
+end
@@ -0,0 +1,8 @@
+development: &sqlite
+ adapter: sqlite3
+ database: db/development.sqlite3
+ pool: 5
+ timeout: 5000
+test:
+ <<: *sqlite
+ database: db/test.sqlite3
@@ -0,0 +1,7 @@
+feeds:
+ # How often should we update feeds (minutes)
+ update_interval: 20
+ # How many errors before we stop trying (integer)
+ invalidate_after_n_errors: 10
+ # How often do we fetch invalid feeds (days)
+ retry_invalid_after: 7
@@ -0,0 +1,27 @@
+# -*- encoding: utf-8 -*-
+require File.expand_path('../lib/feedr/version', __FILE__)
+
+Gem::Specification.new do |gem|
+ gem.authors = ["Christian Hellsten"]
+ gem.email = ["christian@aktagon.com"]
+ gem.description = %q{Fetches and stores feeds in your database. Can be used as a backend for apps that read feeds.}
+ gem.summary = %q{Backend for feed readers.}
+ gem.homepage = ""
+
+ gem.add_dependency "feedzirra"
+ gem.add_dependency "activerecord", '>=3.1'
+
+ gem.add_development_dependency "pry"
+ gem.add_development_dependency "guard"
+ gem.add_development_dependency "fakeweb"
+ gem.add_development_dependency "sqlite3"
+ gem.add_development_dependency "guard-minitest"
+ gem.add_development_dependency "database_cleaner"
+
+ gem.files = `git ls-files`.split($\)
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
+ gem.name = "feedr" # LOL
+ gem.require_paths = ["lib"]
+ gem.version = Feedr::VERSION
+end
@@ -0,0 +1,54 @@
+require 'feedr/version'
+require 'active_support/all'
+#require 'pry'
+
+module Feedr
+ require_relative 'feedr/active_record'
+ #require_relative 'feedr/workers/update_worker'
+
+ mattr_accessor :update_interval
+ mattr_accessor :configuration
+ mattr_accessor :logger
+ mattr_accessor :invalidate_after_n_errors
+ mattr_accessor :retry_invalid_after
+
+ def self.migrate
+ require_relative 'feedr/active_record/schema'
+ Feedr::ActiveRecord::Schema.new.up
+ end
+
+ def self.init_environment
+ environment = (ENV['RACK_ENV'] || ENV['RAILS_ENV'])
+ raise "No environment specified. Use RACK_ENV or RAILS_ENV." unless environment
+ environment = environment.to_sym
+ ::ActiveRecord::Base.logger = Logger.new(File.join('log', "#{environment}.log"))
+ ::ActiveRecord::Base.logger.level = Logger::DEBUG
+
+ config = YAML::load(File.new(File.join('config', 'database.yml')))
+ config = deep_symbolize_keys(config)
+ ::ActiveRecord::Base.establish_connection(config[environment])
+ end
+
+ # From Rails
+ def self.deep_symbolize_keys(hash)
+ result = {}
+ hash.each do |key, value|
+ result[(key.to_sym rescue key)] = value.is_a?(Hash) ? deep_symbolize_keys(value) : value
+ end
+ result
+ end
+
+ def self.init(config_file=File.join('config', 'feedr.yml'))
+ raise "config/feedr.yml is missing" unless File.exist?(config_file)
+ config = YAML.load(File.new(config_file))
+ config = deep_symbolize_keys(config)
+ self.configuration = config
+ self.update_interval = config[:feeds][:update_interval]
+ self.invalidate_after_n_errors = config[:feeds][:invalidate_after_n_errors]
+ self.retry_invalid_after = config[:feeds][:retry_invalid_after]
+ end
+ self.configuration = init
+end
+
+Feedr.logger = Logger.new(File.join('log', "feedr.log"))
+#Feedr.logger.level = Logger::DEBUG
@@ -0,0 +1,9 @@
+require 'active_record'
+
+module Feedr
+ module ActiveRecord
+ require_relative 'active_record/sanitizer'
+ require_relative 'active_record/feed'
+ require_relative 'active_record/entry'
+ end
+end
@@ -0,0 +1,37 @@
+module Feedr
+ module ActiveRecord
+ class Entry < ::ActiveRecord::Base
+ include Sanitizer
+ self.table_name = :feedr_entries
+ belongs_to :feed
+ validates :feed, presence: true
+ validates :url, presence: true
+ validates :guid, presence: true
+ before_validation :sanitize_attributes
+ before_validation :create_guid
+ attr_accessible :feed, :title, :url, :published_at, :summary, :author, :content
+ scope :latest, order('published_at desc')
+
+ def exists?
+ Entry.exists?(guid: create_guid)
+ end
+
+ def copy!(other)
+ self.title = other.title
+ self.url = other.url
+ self.published_at = other.published_at
+ self.summary = other.summary
+ self.author = other.author
+ self.guid = other.guid
+ self.content = other.content
+ save!
+ end
+
+ protected
+ # GUID is digest of <item URL>:<feed URL>
+ def create_guid
+ self.guid = Digest::MD5.hexdigest([feed.url, url].join(':'))
+ end
+ end
+ end
+end
@@ -0,0 +1,107 @@
+require 'feedzirra'
+
+module Feedr
+ module ActiveRecord
+ class Feed < ::ActiveRecord::Base
+ include Feedr::ActiveRecord::Sanitizer
+ self.table_name = :feedr_feeds
+ attr_accessible :url, :title
+ has_many :entries, dependent: :delete_all
+ before_validation :sanitize_attributes
+ validates :url, presence: true, uniqueness: true
+ scope :invalid, lambda {
+ where(["error_count >= ?", Feedr.invalidate_after_n_errors])
+ }
+ scope :fresh, lambda {
+ where(["last_fetched_at >= ?", Feedr.update_interval.minutes.ago])
+ }
+ scope :stale, lambda {
+ sql = [
+ "last_fetched_at < ?",
+ "(last_error_at > ? or last_error_at is null)" # No need to check error count
+ ].join(' and ')
+ sql = [sql, Feedr.update_interval.minutes.ago, Feedr.retry_invalid_after.days.ago]
+ where(sql)
+ }
+
+ def self.fetch_stale(force = false)
+ feeds = force ? Feed.all : Feed.stale
+ urls = feeds.map(&:url)
+ on_success = lambda { |url, feed|
+ # NOTE never called. bug.
+ }
+ on_failure = lambda { |url, code, headers, body|
+ # NOTE never called. bug.
+ }
+ Feedzirra::Feed.fetch_and_parse(
+ urls,
+ :on_success => on_success,
+ :on_failure => on_failure
+ ).each do |url, parsed_feed|
+ feed = feeds.detect {|feed| feed.url == url}
+ unless parsed_feed.is_a?(Fixnum) # Great...
+ feed.after_parse(parsed_feed)
+ else
+ feed.on_error(parsed_feed)
+ end
+ feed.save!
+ end
+ feeds
+ end
+
+ def on_error(feed)
+ self.last_fetched_at = Time.now
+ self.last_error_at = Time.now
+ self.error_count = error_count + 1
+ self.error = feed # Fixnum = HTTP status
+ end
+
+ def after_parse(feed)
+ new_entries = []
+ self.last_fetched_at = Time.now
+ self.last_error_at = nil
+ self.error = nil
+ self.error_count = nil
+ self.title = feed.title
+ self.etag = feed.etag
+ self.last_modified_at = feed.last_modified
+ feed.entries.each do |feed_item|
+ begin
+ entry = to_entry(feed_item)
+ if entry.exists?
+ Entry.find_by_guid(entry.guid).copy!(entry)
+ else
+ self.entries << entry
+ end
+ rescue Exception => ex
+ Feedr.logger.warn [ex.message, ex.backtrace.join("\n")].join("\n")
+ end
+ end
+ new_entries
+ end
+
+ def fetch
+ feed = Feedzirra::Feed.fetch_and_parse(url)
+ after_parse(feed)
+ save!
+ end
+
+ def invalid?
+ errors >= Feedr.invalidate_after_n_errors
+ end
+
+ def to_entry(feed_item)
+ Entry.new do |entry|
+ entry.feed = self
+ entry.title = feed_item.title
+ entry.url = feed_item.url
+ entry.published_at = feed_item.published
+ entry.summary = feed_item.summary
+ entry.author = feed_item.author
+ #entry.guid = feed_item.entry_id # we are paranoid and use f**kin* URLs
+ entry.content = feed_item.content
+ end
+ end
+ end
+ end
+end
@@ -0,0 +1,13 @@
+module Feedr
+ module ActiveRecord
+ module Sanitizer
+ protected
+ def sanitize_attributes
+ attributes.each do |key, value|
+ value.strip! if value && value.respond_to?(:strip)
+ end
+ true
+ end
+ end
+ end
+end
Oops, something went wrong.

0 comments on commit 069db05

Please sign in to comment.