-
Notifications
You must be signed in to change notification settings - Fork 1
/
process.rb
234 lines (156 loc) · 7 KB
/
process.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
####
# Coding tutorial: Congressmiles
# PART 2: Processing
# a. Crop each Senator's mugshot using Face.com metadata and RMagick
# b. Create a webpage ranking faces by smile, glasses, mood, and androgenicity
#
require 'rubygems'
require 'rmagick' # a Ruby wrapper for the awesome ImageMagick library
require 'crack' # to do easy parsing of JSON
## Step A: Crop photos
## - Read Senator data (NYT API) and face meta-data (Face.com API)
## from JSON files downloaded in PART 1 (fetch.rb)
## - Crop files using RMagick and save
## PREREQS: Images and meta-data stored in a folder called '200x250'
IMAGES_DIR = '200x250'
NYT_CONGRESS_JSON_NAME = 'nyt-congress.json'
CROP_DIR = "crop"
Dir.mkdir(CROP_DIR) unless File.exists?(CROP_DIR)
senate_json = Crack::JSON.parse(File.open(NYT_CONGRESS_JSON_NAME, 'r').read)
senators = senate_json['results'][0]['members']
senators.each do |senator|
puts "Cropping #{senator['id']} - #{senator['first_name']} #{senator['last_name']}"
## Face.com's API returns an array of photos with an array of tags
f_fname = File.join(IMAGES_DIR, "#{senator['id']}.json")
## Since each JSON response we got has just one photo, we use index 0
fjson = Crack::JSON.parse(File.open(f_fname).read)['photos'][0]
## But there may be more than one face tagged...so let's pick the most prominent
f = fjson['tags'].sort_by{|t| t['attributes']['face']['confidence']}.reverse[0]
# adding to the senator hash for later reference...
senator['f_json'] = f
## Now open image with RMagick
img_name = "#{senator['id']}.jpg"
img = Magick::Image.read("#{IMAGES_DIR}/#{img_name}")[0]
w = img.columns
h = img.rows
## let's crop to the specified face center,height,and width attributes
## http://studio.imagemagick.org/RMagick/doc/image1.html#crop
# First, let's get the crop rectangle using the face-API data
## Face.com API returns the relative point (i.e. from 0-100) of a feature,
## not exact pixels
## assuming the f_width, f_height gets only the face and not all of the head
## let's add a threshold on all sides (i.e. 5% and 15% to width and height respectively)...
## but be careful that neither f_height, f_width are greater than 100.0:
f_center = f['center']
f_height = [f['height'] + 15.0, 100.0 ].min
f_width = [f['width'] + 5.0, 100.0 ].min
face_width = w * f_width/100.0
face_height = face_width * 12/9.0 # 12/9 ratio
img.crop!(
w * (f_center['x']-f_width/2)/100.0,
h * (f_center['y']-f_height/2)/100.0, # y-coord of top-left corner
face_width,
face_height
)
cname = "#{CROP_DIR}/#{img_name}"
img.resize(90,120).write(cname)
# add some convenience attributes
senator['crop_image'] = cname
senator['name_title'] = "Sen. #{senator['last_name']} (#{senator['party']}-#{senator['state']})"
end
## Step B: Let's make a webpage
# This is just messy HTML construction
## define a div/img printing helper function
def foo_div_img(sen, *val)
# takes in an block that passes in a senator hash
h =<<IMG
<div class="face"><img src="#{sen['crop_image']}" alt="#{sen['name_title']}"/><div class="name">#{sen['name_title']} #{"#{val}" if val}</div></div>
IMG
end
html_fname = "smiles.html"
html_file = File.open(html_fname, 'w')
html_file.puts("<html><body>")
## Best smile
# remember how we added to each senator hash a 'f_json' attribute?
## sorting by smile confidence and then
## smile width (as a proportion of face width) as a tie breaker
senators = senators.sort_by{ |s|
[
s['f_json']['attributes']['smiling']['confidence'],
(s['f_json']['mouth_right']['x'] - s['f_json']['mouth_left']['x']) / s['f_json']['width']
]
}.reverse
smiles = senators.select{|s|
s['f_json']['attributes']['smiling']['value']=='true'}
puts "#{smiles.length} senators had a smile"
html_file.puts("<h2>10 Biggest Smiles</h2>")
smiles[0..9].each do |senator|
html_file << foo_div_img(senator, senator['f_json']['attributes']['smiling']['confidence'])
end
html_file.puts("<h2>10 Most Ambiguous Smiles</h2>")
smiles.reverse[0..9].each do |senator|
html_file << foo_div_img(senator, senator['f_json']['attributes']['smiling']['confidence'])
end
html_file.puts("<h2>The Non-Smilers</h2>")
# these had a smiling value of 'false'...the higher the confidence, the
# more non-smiley the face
non_smiles = (senators-smiles)
non_smiles.each do |senator|
html_file << foo_div_img(senator, senator['f_json']['attributes']['smiling']['confidence'])
end
##s Now for some partisanship
html_file.puts("<h2>Smiles by party</h2>")
html_file.puts("
<table><thead><tr><th>Party</th><th>Smiles</th><th>Non-smiles</th><th>Avg. Smile Confidence</th></tr></thead>
<tbody>")
['D','R','I'].each do |party|
party_smilers = smiles.select{|sen| sen['party']==party}
html_file.puts( "<tr>" + [party,
party_smilers.length,
non_smiles.select{|sen| sen['party']==party}.length,
party_smilers.inject(0){|sm, sen| sm += sen['f_json']['attributes']['smiling']['confidence']} / party_smilers.length
].map{|v| "<td>#{v}</td>"}.join(' ') + "</tr>")
end
html_file.puts("</tbody></table>")
## Let's have fun with the 'glasses' attribute
## same strategy as before
glasses = senators.select{|s|
s['f_json']['attributes']['glasses']['value']=='true'}.sort_by{ |s|
s['f_json']['attributes']['glasses']['confidence']
}.reverse
puts "#{glasses.length} senators wear glasses"
html_file.puts("<h2>10 Most Bespectacled Senators</h2>")
glasses[0..9].each do |senator|
html_file << foo_div_img(senator, senator['f_json']['attributes']['glasses']['confidence'])
end
## One more rating: Face.com API's gender confidence
# first sort by confidence, regardless of gender
sens = senators.select{|s| g = s['f_json']['attributes']['gender'] }.sort_by{|s|
s['f_json']['attributes']['gender']['confidence']
}.reverse
males = sens.select{|s| s['f_json']['attributes']['gender']['value']=='male'}
females = sens.select{|s| s['f_json']['attributes']['gender']['value']=='female'}
puts "Face.com thinks there are #{males.length} men and #{females.length} women in the Senate"
html_file.puts("<h2>10 Most Masculine-Featured Senators</h2>")
males[0..9].each do |senator|
html_file << foo_div_img(senator, senator['f_json']['attributes']['gender']['confidence'])
end
html_file.puts("<h2>10 Most Feminine-Featured Senators</h2>")
females[0..9].each do |senator|
html_file << foo_div_img(senator,senator['f_json']['attributes']['gender']['confidence'])
end
## End the file
html_file.puts("</body></html>")
html_file.close
#
# CREDITS:
# by Dan Nguyen dan@danwin.com / twitter: @dancow / http://danwin.com
#
# APIs:
# Sunlight Labs: http://services.sunlightlabs.com/docs/Sunlight_Congress_API/
# NYT Congress API: http://developer.nytimes.com/docs/congress_api/
# Face API: http://developers.face.com/
# http://developers.face.com/docs/api/faces-detect/
# More programming help at:
# http://ruby.bastardsbook.com/chapters/image-manipulation/
# http://studio.imagemagick.org/RMagick/doc/