-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
annotations.go
259 lines (226 loc) · 9.43 KB
/
annotations.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vision
import (
"image"
pb "google.golang.org/genproto/googleapis/cloud/vision/v1"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
)
// Annotations contains all the annotations performed by the API on a single image.
// A nil field indicates either that the corresponding feature was not requested,
// or that annotation failed for that feature.
type Annotations struct {
// Faces holds the results of face detection.
Faces []*FaceAnnotation
// Landmarks holds the results of landmark detection.
Landmarks []*EntityAnnotation
// Logos holds the results of logo detection.
Logos []*EntityAnnotation
// Labels holds the results of label detection.
Labels []*EntityAnnotation
// Texts holds the results of text detection.
Texts []*EntityAnnotation
// SafeSearch holds the results of safe-search detection.
SafeSearch *SafeSearchAnnotation
// ImageProps contains properties of the annotated image.
ImageProps *ImageProps
// If non-nil, then one or more of the attempted annotations failed.
// Non-nil annotations are guaranteed to be correct, even if Error is
// non-nil.
Error error
}
func annotationsFromProto(res *pb.AnnotateImageResponse) *Annotations {
as := &Annotations{}
for _, a := range res.FaceAnnotations {
as.Faces = append(as.Faces, faceAnnotationFromProto(a))
}
for _, a := range res.LandmarkAnnotations {
as.Landmarks = append(as.Landmarks, entityAnnotationFromProto(a))
}
for _, a := range res.LogoAnnotations {
as.Logos = append(as.Logos, entityAnnotationFromProto(a))
}
for _, a := range res.LabelAnnotations {
as.Labels = append(as.Labels, entityAnnotationFromProto(a))
}
for _, a := range res.TextAnnotations {
as.Texts = append(as.Texts, entityAnnotationFromProto(a))
}
as.SafeSearch = safeSearchAnnotationFromProto(res.SafeSearchAnnotation)
as.ImageProps = imagePropertiesFromProto(res.ImagePropertiesAnnotation)
if res.Error != nil {
// res.Error is a google.rpc.Status. Convert to a Go error. Use a gRPC
// error because it preserves the code as a separate field.
// TODO(jba): preserve the details field.
as.Error = grpc.Errorf(codes.Code(res.Error.Code), "%s", res.Error.Message)
}
return as
}
// A FaceAnnotation describes the results of face detection on an image.
type FaceAnnotation struct {
// BoundingPoly is the bounding polygon around the face. The coordinates of
// the bounding box are in the original image's scale, as returned in
// ImageParams. The bounding box is computed to "frame" the face in
// accordance with human expectations. It is based on the landmarker
// results. Note that one or more x and/or y coordinates may not be
// generated in the BoundingPoly (the polygon will be unbounded) if only a
// partial face appears in the image to be annotated.
BoundingPoly []image.Point
// FDBoundingPoly is tighter than BoundingPoly, and
// encloses only the skin part of the face. Typically, it is used to
// eliminate the face from any image analysis that detects the "amount of
// skin" visible in an image. It is not based on the landmarker results, only
// on the initial face detection, hence the fd (face detection) prefix.
FDBoundingPoly []image.Point
// Landmarks are detected face landmarks.
Face FaceLandmarks
// RollAngle indicates the amount of clockwise/anti-clockwise rotation of
// the face relative to the image vertical, about the axis perpendicular to
// the face. Range [-180,180].
RollAngle float32
// PanAngle is the yaw angle: the leftward/rightward angle that the face is
// pointing, relative to the vertical plane perpendicular to the image. Range
// [-180,180].
PanAngle float32
// TiltAngle is the pitch angle: the upwards/downwards angle that the face is
// pointing relative to the image's horizontal plane. Range [-180,180].
TiltAngle float32
// DetectionConfidence is the detection confidence. The range is [0, 1].
DetectionConfidence float32
// LandmarkingConfidence is the face landmarking confidence. The range is [0, 1].
LandmarkingConfidence float32
// Likelihoods expresses the likelihood of various aspects of the face.
Likelihoods *FaceLikelihoods
}
func faceAnnotationFromProto(pfa *pb.FaceAnnotation) *FaceAnnotation {
fa := &FaceAnnotation{
BoundingPoly: boundingPolyFromProto(pfa.BoundingPoly),
FDBoundingPoly: boundingPolyFromProto(pfa.FdBoundingPoly),
RollAngle: pfa.RollAngle,
PanAngle: pfa.PanAngle,
TiltAngle: pfa.TiltAngle,
DetectionConfidence: pfa.DetectionConfidence,
LandmarkingConfidence: pfa.LandmarkingConfidence,
Likelihoods: &FaceLikelihoods{
Joy: Likelihood(pfa.JoyLikelihood),
Sorrow: Likelihood(pfa.SorrowLikelihood),
Anger: Likelihood(pfa.AngerLikelihood),
Surprise: Likelihood(pfa.SurpriseLikelihood),
UnderExposed: Likelihood(pfa.UnderExposedLikelihood),
Blurred: Likelihood(pfa.BlurredLikelihood),
Headwear: Likelihood(pfa.HeadwearLikelihood),
},
}
populateFaceLandmarks(pfa.Landmarks, &fa.Face)
return fa
}
// An EntityAnnotation describes the results of a landmark, label, logo or text
// detection on an image.
type EntityAnnotation struct {
// ID is an opaque entity ID. Some IDs might be available in Knowledge Graph(KG).
// For more details on KG please see:
// https://developers.google.com/knowledge-graph/
ID string
// Locale is the language code for the locale in which the entity textual
// description (next field) is expressed.
Locale string
// Description is the entity textual description, expressed in the language of Locale.
Description string
// Score is the overall score of the result. Range [0, 1].
Score float32
// Confidence is the accuracy of the entity detection in an image.
// For example, for an image containing the Eiffel Tower, this field represents
// the confidence that there is a tower in the query image. Range [0, 1].
Confidence float32
// Topicality is the relevancy of the ICA (Image Content Annotation) label to the
// image. For example, the relevancy of 'tower' to an image containing
// 'Eiffel Tower' is likely higher than an image containing a distant towering
// building, though the confidence that there is a tower may be the same.
// Range [0, 1].
Topicality float32
// BoundingPoly is the image region to which this entity belongs. Not filled currently
// for label detection. For text detection, BoundingPolys
// are produced for the entire text detected in an image region, followed by
// BoundingPolys for each word within the detected text.
BoundingPoly []image.Point
// Locations contains the location information for the detected entity.
// Multiple LatLng structs can be present since one location may indicate the
// location of the scene in the query image, and another the location of the
// place where the query image was taken. Location information is usually
// present for landmarks.
Locations []LatLng
// Properties are additional optional Property fields.
// For example a different kind of score or string that qualifies the entity.
Properties []Property
}
func entityAnnotationFromProto(e *pb.EntityAnnotation) *EntityAnnotation {
var locs []LatLng
for _, li := range e.Locations {
locs = append(locs, latLngFromProto(li.LatLng))
}
var props []Property
for _, p := range e.Properties {
props = append(props, propertyFromProto(p))
}
return &EntityAnnotation{
ID: e.Mid,
Locale: e.Locale,
Description: e.Description,
Score: e.Score,
Confidence: e.Confidence,
Topicality: e.Topicality,
BoundingPoly: boundingPolyFromProto(e.BoundingPoly),
Locations: locs,
Properties: props,
}
}
// SafeSearchAnnotation describes the results of a SafeSearch detection on an image.
type SafeSearchAnnotation struct {
// Adult is the likelihood that the image contains adult content.
Adult Likelihood
// Spoof is the likelihood that an obvious modification was made to the
// image's canonical version to make it appear funny or offensive.
Spoof Likelihood
// Medical is the likelihood that this is a medical image.
Medical Likelihood
// Violence is the likelihood that this image represents violence.
Violence Likelihood
}
func safeSearchAnnotationFromProto(s *pb.SafeSearchAnnotation) *SafeSearchAnnotation {
if s == nil {
return nil
}
return &SafeSearchAnnotation{
Adult: Likelihood(s.Adult),
Spoof: Likelihood(s.Spoof),
Medical: Likelihood(s.Medical),
Violence: Likelihood(s.Violence),
}
}
// ImageProps describes properties of the image itself, like the dominant colors.
type ImageProps struct {
// DominantColors describes the dominant colors of the image.
DominantColors []*ColorInfo
}
func imagePropertiesFromProto(ip *pb.ImageProperties) *ImageProps {
if ip == nil || ip.DominantColors == nil {
return nil
}
var cinfos []*ColorInfo
for _, ci := range ip.DominantColors.Colors {
cinfos = append(cinfos, colorInfoFromProto(ci))
}
return &ImageProps{DominantColors: cinfos}
}