Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
79 lines (62 sloc) 2.01 KB
class Illtra::Twitter < Illtra::Base
def initialize(query)
super(query)
@refresh = nil
@base_url = "https://api.twitter.com/1.1/search/tweets.json"
@next_page = "?q=#{@query}&rpp=5&include_entities=false&result_type=recent&rpp=100"
@last_next_page = nil
@data = []
@total = 0
@kept = 0
end
def search
data = []
i = 0
begin
p @next_page
raw_chunk = fetch_json("#{@base_url}#{@next_page}")
@last_next_page = @next_page
@next_page, @refresh, results, total, kept = normalize_chunk(raw_chunk)
@total += total
@kept += kept
data += results
i += 1
end until @next_page.nil? || (@last_next_page == @next_page) || (i >= 10)
puts "[Twitter] Total: #{@total}"
puts "[Twitter] Kept: #{@kept}"
@data += data
data
end
def get_next_page(next_page)
return nil unless next_page.is_a?(String)
next_page.sub!(/^\?/, '')
pairs = next_page.split('&').map {|x| x.split('=') }
hash = Hash[pairs]
hash['rpp'] = 100 # Force 100 per page.
'?' + hash.map {|x| x.join('=') }.join('&')
end
def normalize_chunk(json)
next_page = get_next_page(json['next_page'])
refresh = json['refresh_url']
results = json['results']
total = results.length
results.map! do |tweet|
{
"time" => Time.parse(tweet['created_at']).utc,
"geo" => tweet['geo'],
"network" => "twitter",
"id" => tweet['id_str'],
#"iso_lang" => tweet['iso_language_code'],
#"text" => tweet['text'],
# Apparently anonymizing the data is against the API terms of service,
# so let's keep it around!
# More info: https://dev.twitter.com/terms/api-terms
# Section 4E under under "Twitter Content."
"raw" => tweet,
}
end#.reject! { |tweet| tweet['geo'].nil? }
#kept = results.length
kept = results.select { |tweet| !tweet['geo'].nil? }.length
[next_page, refresh, results, total, kept]
end
end
Something went wrong with that request. Please try again.