Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make lastfm work with new pagination/recursing scheme
- Loading branch information
Philip (flip) Kromer
committed
Sep 1, 2009
1 parent
b161305
commit d2b80dc
Showing
13 changed files
with
677 additions
and
418 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,12 @@ | ||
require 'monkeyshines/scrape_request/raw_json_contents' | ||
require 'wuclan/lastfm/scrape/base.rb' | ||
require 'wuclan/lastfm/scrape/concrete.rb' | ||
|
||
module Wuclan | ||
module Lastfm | ||
module Scrape | ||
autoload :Base, 'wuclan/lastfm/scrape/base.rb' | ||
autoload :LastfmJob, 'wuclan/lastfm/scrape/lastfm_job.rb' | ||
autoload :LastfmRequestStream, 'wuclan/lastfm/scrape/lastfm_request_stream.rb' | ||
autoload :LastfmArtistShoutsRequest, 'wuclan/lastfm/scrape/base.rb' | ||
autoload :LastfmArtistTopFansRequest, 'wuclan/lastfm/scrape/base.rb' | ||
autoload :LastfmArtistTopTracksRequest, 'wuclan/lastfm/scrape/base.rb' | ||
autoload :LastfmTrackTopFansRequest, 'wuclan/lastfm/scrape/base.rb' | ||
autoload :LastfmUserRequest, 'wuclan/lastfm/scrape/base.rb' | ||
end | ||
end | ||
end |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
require 'wuclan/lastfm/scrape/recursive_requests' | ||
|
||
module Wuclan | ||
module Lastfm | ||
module Scrape | ||
|
||
Base.class_eval do | ||
# | ||
# Pagination | ||
# | ||
include Monkeyshines::ScrapeRequestCore::Paginating | ||
include Monkeyshines::ScrapeRequestCore::PaginatedWithLimit | ||
# include Monkeyshines::ScrapeRequestCore::PaginatedWithRate | ||
|
||
# Items to get each re-visit. If there are up to 50 items per page, | ||
# target_items_per_job of 1000 will try to reschedule so that its return visit | ||
# makes about twenty page requests. | ||
self.target_items_per_job = 150 | ||
|
||
# creates the paginated request | ||
def request_for_page page | ||
req = TwitterSearchRequest.new(obj[:key], page) | ||
req.url << "&rpp=#{req.max_items}" | ||
req.url << "&max_id=#{sess_span.min - 1}" if sess_span.min | ||
req | ||
end | ||
end | ||
|
||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
require 'monkeyshines/scrape_request/raw_json_contents' | ||
module Wuclan | ||
module Lastfm | ||
module Scrape | ||
|
||
# | ||
# Simple requestables | ||
# | ||
|
||
class LastfmArtistInfoRequest | ||
self.requestables = [ | ||
LastfmArtistSimilarRequest, | ||
LastfmArtistTopAlbumsRequest, | ||
LastfmArtistTopTracksRequest, | ||
LastfmArtistShoutsRequest, | ||
LastfmArtistEventsRequest, | ||
LastfmArtistTopFansRequest, | ||
# LastfmArtistTopTagsRequest, LastfmArtistImagesRequest, LastfmArtistPodcastRequest, | ||
] | ||
end | ||
|
||
class LastfmTrackInfoRequest | ||
self.requestables = [LastfmTrackSimilarRequest, LastfmTrackTopFansRequest, LastfmTrackTopTagsRequest] | ||
end | ||
class LastfmEventInfoRequest | ||
self.requestables = [LastfmEventAttendeesRequest, LastfmEventShoutsRequest] | ||
end | ||
class LastfmUserTopTagsRequest # LastfmUserInfoRequest | ||
self.requestables = [ | ||
# LastfmUserTopTagsRequest, | ||
LastfmUserEventsRequest, | ||
LastfmUserPastEventsRequest, | ||
LastfmUserFriendsRequest, # recenttracks | ||
LastfmUserNeighboursRequest, | ||
LastfmUserLovedTracksRequest, | ||
LastfmUserRecentTracksRequest, | ||
LastfmUserShoutsRequest, | ||
LastfmUserTopAlbumsRequest, # period (Optional) : overall | 7day | 3month | 6month | 12month | ||
LastfmUserTopArtistsRequest, # period (Optional) : overall | 7day | 3month | 6month | 12month | ||
LastfmUserTopTracksRequest, # period (Optional) : overall | 7day | 3month | 6month | 12month | ||
# uninteresting(?): LastfmUserPlaylistsRequest, LastfmUserWeeklyAlbumChartRequest, LastfmUserWeeklyArtistChartRequest, LastfmUserWeeklyChartListRequest, LastfmUserWeeklyTrackChartRequest, | ||
# needs auth: LastfmUserInfoRequest, LastfmUserRecentStationsRequest, LastfmUserRecommendedArtistsRequest, LastfmUserRecommendedEventsRequest, | ||
] | ||
end | ||
|
||
# | ||
# Recursive requests based on contents | ||
# | ||
|
||
module LastfmTimeWindowed | ||
def recursive_requests *args, &block | ||
super(*args, &block) | ||
unless (identifier =~ /&period=/) | ||
['7day', '3month', '6month'].each do |period| | ||
req = self.class.new(identifier+"&period=#{period}") | ||
req.generation = generation.to_i | ||
yield req | ||
end | ||
end | ||
end | ||
end | ||
[LastfmUserTopArtistsRequest, LastfmUserTopAlbumsRequest, LastfmUserTopTracksRequest | ||
].each do |klass| | ||
klass.class_eval do include LastfmTimeWindowed ; end | ||
end | ||
|
||
module LastfmContainsArtists | ||
def recursive_requests *args, &block | ||
super(*args, &block) | ||
items.each do |artist| | ||
req = LastfmArtistInfoRequest.new(url_encode(artist['name'])) | ||
req.generation = generation.to_i + 1 | ||
yield req | ||
end | ||
end | ||
end | ||
[ LastfmArtistSimilarRequest, LastfmGeoTopArtistsRequest, LastfmTagTopArtistsRequest, | ||
LastfmUserRecommendedArtistsRequest, LastfmUserTopArtistsRequest, | ||
].each do |klass| | ||
klass.class_eval do include LastfmContainsArtists ; end | ||
end | ||
|
||
module LastfmContainsAlbums | ||
def recursive_requests *args, &block | ||
super(*args, &block) | ||
items.each do |item| | ||
obj_artist = item['artist']['name'] || item['artist']['#text'] rescue nil | ||
req = LastfmAlbumInfoRequest.from_identifier_hash( | ||
item['name'], :artist => obj_artist, :mbid => item['mbid'] ) | ||
req.generation = generation.to_i + 1 | ||
yield req | ||
end | ||
end | ||
end | ||
[ LastfmArtistTopAlbumsRequest, LastfmTagTopAlbumsRequest, LastfmUserTopAlbumsRequest, | ||
].each do |klass| | ||
klass.class_eval do include LastfmContainsAlbums ; end | ||
end | ||
|
||
module LastfmContainsTracks | ||
def recursive_requests *args, &block | ||
super(*args, &block) | ||
items.each do |track| | ||
obj_artist = track['artist']['name'] || track['artist']['#text'] rescue nil | ||
req = LastfmTrackInfoRequest.from_identifier_hash( | ||
track['name'], :artist => obj_artist, :mbid => track['mbid']) | ||
req.generation = generation.to_i + 1 | ||
yield req | ||
end | ||
end | ||
end | ||
[ LastfmArtistTopTracksRequest, LastfmGeoTopTracksRequest, LastfmTagTopTracksRequest, | ||
LastfmTrackSimilarRequest, LastfmUserLovedTracksRequest, LastfmUserRecentTracksRequest, | ||
LastfmUserTopTracksRequest, | ||
].each do |klass| | ||
klass.class_eval do include LastfmContainsTracks ; end | ||
end | ||
|
||
module LastfmContainsEvents | ||
def recursive_requests *args, &block | ||
super(*args, &block) | ||
items.each do |event| | ||
req = LastfmEventInfoRequest.new(event['id']) | ||
req.generation = generation.to_i + 1 | ||
yield req | ||
end | ||
end | ||
end | ||
[ LastfmArtistEventsRequest, LastfmGeoEventsRequest, LastfmUserEventsRequest, | ||
LastfmUserPastEventsRequest, LastfmUserRecommendedEventsRequest, LastfmVenueEventsRequest, | ||
LastfmVenuePastEventsRequest, | ||
].each do |klass| | ||
klass.class_eval do include LastfmContainsEvents ; end | ||
end | ||
|
||
module LastfmContainsUsers | ||
def recursive_requests *args, &block | ||
super(*args, &block) | ||
items.each do |user| | ||
req = LastfmUserTopTagsRequest.new(url_encode(user['name'])) | ||
req.generation = generation.to_i + 1 | ||
yield req | ||
end | ||
end | ||
end | ||
[ LastfmArtistTopFansRequest, LastfmEventAttendeesRequest, LastfmGroupMembersRequest, | ||
LastfmTrackTopFansRequest, LastfmUserFriendsRequest, LastfmUserNeighboursRequest, | ||
].each do |klass| | ||
klass.class_eval do include LastfmContainsUsers ; end | ||
end | ||
|
||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# | ||
# # | ||
# # | ||
# # query terms must be URL-encoded | ||
# # (use '+' for space; %23 # %27 ' etc) | ||
# # | ||
# def initialize *args | ||
# super *args | ||
# raise "Query term missing" if self.query_term.blank? | ||
# self[:query_term].strip! | ||
# [:priority, :prev_items, :prev_span_min, :prev_span_max].each{|attr| self[attr] = self[attr].to_i if self[attr] } | ||
# self[:prev_rate] = self[:prev_rate].to_f | ||
# self[:priority] = DEFAULT_PRIORITY if (self[:priority] == 0) | ||
# self[:prev_rate] = nil if (self[:prev_rate] < 1e-6) | ||
# end | ||
# | ||
# class TwitterSearchStream < Monkeyshines::RequestStream::SimpleRequestStream | ||
# # | ||
# # for the given user_id, | ||
# # gets the user | ||
# # and then each of the requests in more_request_klasses | ||
# # | ||
# def each *args, &block | ||
# request_store.each do |*raw_job_args| | ||
# job = klass.new(*raw_job_args) | ||
# # do_faking(job) | ||
# job.each_request(*args, &block) | ||
# end | ||
# end | ||
# end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,25 @@ | ||
# require 'monkeyshines/scrape_request' | ||
# require 'monkeyshines/scrape_request/paginated' | ||
# require 'monkeyshines/scrape_request/raw_json_contents' | ||
# module Wuclan | ||
# module Twitter | ||
# module Scrape | ||
# # | ||
# # TwitterSearchJob for the twitter Search API | ||
# # | ||
# # * Manages a series of paginated requests from first result back to last item in | ||
# # previous scrape scrape_job. | ||
# # | ||
# # | ||
# class TwitterSearchJob < Struct.new( | ||
# :query_term, | ||
# :priority, | ||
# :prev_rate, :prev_items, :prev_span_min, :prev_span_max | ||
# ) | ||
# | ||
# end | ||
# end | ||
# end | ||
# end | ||
class TwitterSearchJob < Edamame::Job | ||
# | ||
# Pagination | ||
# | ||
include Monkeyshines::ScrapeRequestCore::Paginating | ||
include Monkeyshines::ScrapeRequestCore::PaginatedWithLimit | ||
include Monkeyshines::ScrapeRequestCore::PaginatedWithRate | ||
# API max pages | ||
self.hard_request_limit = 15 | ||
|
||
# Items to get each re-visit. If there are up to 50 items per page, | ||
# target_items_per_job of 1000 will try to reschedule so that its return visit | ||
# makes about twenty page requests. | ||
# | ||
# For Twitter, 1500 is the max, so 1000 gives a safety margin. | ||
self.target_items_per_job = 1000 | ||
|
||
# creates the paginated request | ||
def request_for_page page, info=nil | ||
req = TwitterSearchRequest.new(obj[:key], page) | ||
req.url << "&rpp=#{req.max_items}" | ||
req.url << "&max_id=#{sess_span.min - 1}" if sess_span.min | ||
req | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.