Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
bbbco committed Dec 10, 2015
1 parent 9f7413b commit 4344416
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Gemfile
@@ -0,0 +1,4 @@
source 'https://rubygems.org'
gem 'koala'
gem 'magic_cloud', :github => 'bbbco/magic_cloud', :branch => 'master'
gem 'pry'
40 changes: 40 additions & 0 deletions Gemfile.lock
@@ -0,0 +1,40 @@
GIT
remote: git://github.com/bbbco/magic_cloud.git
revision: 54917d79c82982cab5e8c9af44bf08c92833d0b5
branch: master
specs:
magic_cloud (0.0.3)
rmagick
slop (~> 3.6.0)

GEM
remote: https://rubygems.org/
specs:
addressable (2.4.0)
coderay (1.1.0)
faraday (0.9.2)
multipart-post (>= 1.2, < 3)
koala (2.2.0)
addressable
faraday
multi_json
method_source (0.8.2)
multi_json (1.11.2)
multipart-post (2.0.0)
pry (0.10.3)
coderay (~> 1.1.0)
method_source (~> 0.8.1)
slop (~> 3.4)
rmagick (2.15.4)
slop (3.6.0)

PLATFORMS
ruby

DEPENDENCIES
koala
magic_cloud!
pry

BUNDLED WITH
1.10.6
61 changes: 61 additions & 0 deletions bin/fb_leader_script.rb
@@ -0,0 +1,61 @@
#!/usr/bin/env ruby
#encoding: utf-8

script_dir = File.dirname(__FILE__)
$LOAD_PATH.unshift File.join(script_dir, '..', 'lib')

require 'koala'
require 'json'
require 'pry'
require 'text_word_count'

fb_token = ARGV[0]

unless fb_token

puts "Usage: #{$0} <token>"
puts "This script requires a Facebook API Graph token passed as its first parameter."
puts "You can a temporary one here: https://developers.facebook.com/tools/explorer/"
exit

end

begin
leaders_file = File.read("#{script_dir}/../conf/leaders_ids.json")
rescue
puts "Cannot find the ./conf/leaders_ids.json file!"
exit
end

leaders_json = JSON.parse(leaders_file)["leaders"]

begin
@graph = Koala::Facebook::API.new(fb_token)
@graph.get_object("me")
rescue Koala::Facebook::AuthenticationError
puts "Your token has expired! Please get a new token at: https://developers.facebook.com/tools/explorer/"
exit
end

leaders_text = {}

leaders_json.each do |leader|

puts "Culling #{leader["id"]}"
all_text = []
posts = @graph.get_connection(leader["facebook_id"], 'posts', {fields: ['message','from', 'created_time'], limit: 10, since: "2015-01-01"})
begin
all_text += posts.map{|post| post["message"] }
end while posts = posts.next_page
leaders_text[leader["id"]] = TextWordCount.new(all_text.join(" ")).word_counts.to_a.sort_by{|x,y| y}.reverse.first(150)

end

File.open(File.join(script_dir, '..', 'data', 'facebook', 'results.json'), 'w') do |f|
f.write(leaders_text.to_json)
end





77 changes: 77 additions & 0 deletions conf/blacklist.txt
@@ -0,0 +1,77 @@
the
and
of
to
a
in
on
this
for
is
from
was
an
it
as
with
are
that
be
we
but
at
have
their
has
if
its
us
not
by
where
were
they
dont
there
there
so
also
thats
some
havent
been
his
her
who
although
neither
nor
things
go
or
had
can
all
week
both
no
which
werent
hasnt
than
what
do
could
theyve
himself
herself
him
her
our
-
weeks
just
these
because
them
too
24 changes: 24 additions & 0 deletions conf/leaders_ids.json
@@ -0,0 +1,24 @@
{
"leaders": [
{
"id": "jdgreear",
"name": "J. D. Greear",
"facebook_id": "pastorgreear"
},
{
"id": "timkeller",
"name": "Tim Keller",
"facebook_id": "TimKellerNYC"
},
{
"id": "joelolsteen",
"name": "Joel Olsteen",
"facebook_id": "JoelOsteen"
},
{
"id": "markdriscoll",
"name": "Mark Driscoll",
"facebook_id": "pastormark"
}
]
}
63 changes: 63 additions & 0 deletions lib/text_word_count.rb
@@ -0,0 +1,63 @@
require 'uri'

class TextWordCount

attr_reader :blacklist, :substitutions, :word_counts

def initialize(text)

script_dir = File.dirname(__FILE__)
@blacklist = []
@substitutions = {}

begin
@blacklist = File.open(File.join(script_dir, '..', '/conf/blacklist.txt'),'r').read.encode!('UTF-8','UTF-8', :invalid => :replace).upcase.split("\n")
rescue
end

begin
CSV.foreach(File.join(script_dir, '..', "../conf/substitutions.txt")) do |line|
line.upcase!
@substitutions[line[0]] = line[1].strip
end
rescue
end

words = sanitize_and_split(text)

@word_counts = Hash.new(0)

words.each do |word|
@word_counts[word] += 1
end

@word_counts.delete('')
@blacklist.each do |word|
@word_counts.delete(word)
end

@substitutions.each do |word,replace|
if @word_counts.include?(replace)
@word_counts[replace] += @word_counts[word]
@word_counts.delete(word)
end
end

return @word_counts.sort_by{|word,count| count }.reverse

end

private

def sanitize_and_split(text)
text = text.upcase.strip.gsub('/--+/', '')
text_arr = text.split.map do |w|
unless w =~ URI::regexp
w.gsub(/[^A-Z0-9\-']/,'').gsub(/(^-|-$)/,'')
end
end
text_arr.reject!{|h| h.nil? }
text_arr
end

end

0 comments on commit 4344416

Please sign in to comment.