1
+ __author__ = 'Virginia'
2
+ from propertysuggester .evaluator .ResultEvaluation import ResultEvaluation
3
+ from propertysuggester .utils .WikidataApi import WikidataApi
4
+ from Entity import Entity
5
+ import csv
6
+ from manuelle_auswertung import UserFeedbackEvaluation
7
+ import json
8
+
9
+
10
+ class Missing_Evaluation (ResultEvaluation ):
11
+ def __init__ (self ):
12
+ self .api = WikidataApi ("http://suggester.wmflabs.org/w/" )
13
+ self .ranking_amounts = None
14
+ self .filename = "C:\Users\Virginia\Documents\GitHub\PropertySuggester-Python\propertysuggester\evaluator\wikidatawiki-20140526-pages-articles.csv"
15
+ self .samplesize = 1000
16
+ self .outputfile = "20130526_dump" + str (self .samplesize ) + ".csv"
17
+ self .ranked_dict = {}
18
+ self .foundMissing = 0
19
+ self .notFoundMissing = 0
20
+ ResultEvaluation .__init__ (self , self .filename , self .samplesize , self .outputfile )
21
+
22
+ # input: entity, missing_properties
23
+ # fuer jede property schauen, wo sie in Liste auftaucht
24
+
25
+ def get_missing (self , eval_list ):
26
+ entity_dict = {}
27
+ for entity in eval_list :
28
+ if entity .missing == "[]" :
29
+ continue
30
+ entity_dict [entity .entity ] = str (entity .missing )
31
+ #print str(entitity_dict)
32
+ print str (len (entity_dict ))
33
+ return entity_dict
34
+
35
+ def find_missing (self , entity_dict ):
36
+ for entity , missing in entity_dict .items ():
37
+ entity_json = self .api .wb_getentities (entityid = entity , language = "en" )
38
+ entity_description = entity_json ["entities" ][str (entity )]
39
+ claims = entity_description ["claims" ]
40
+ self .process_entities (entity_description , json .loads (missing ))
41
+ print "AVG of found ones " + str (float (self .foundMissingPropertiesRankSum )/ self .foundMissingProperties )
42
+ print "Total amount of found(dealed with) missing properties " + str (self .foundEntities )
43
+ print "Total amount entities: " + str (len (entity_dict ))
44
+ print " Found with property_suggester: " + str (self .foundMissingProperties ) + " " + str (self .foundMissing ) + " not found: " + str (self .notFoundMissing )
45
+
46
+ def process_entities (self , entity , missing_dict ):
47
+
48
+ missing_properties = [int (str (item .keys ()[0 ])[1 :]) for item in missing_dict ]
49
+ print "\n Missing properties {0} for entity {1}" .format (missing_properties ,entity ["title" ])
50
+ # propertyIds = [int(prop_id[1:]) for prop_id in entity["claims"].keys()] # get ids from claims
51
+ #print "Property Ids: {0}".format(propertyIds)
52
+ print "Item {0} - properties" .format (entity ["title" ])
53
+ entity_string = entity ["title" ][5 :]
54
+ suggestions = self .api .wbs_getsuggestions (entity = entity_string , limit = 50 , cont = 0 )
55
+ self .rank_suggestions (entity ["title" ],suggestions , missing_properties )
56
+
57
+ def rank_suggestions (self , entity , suggestions , missing_properties ):
58
+ rank = 0
59
+ self .appearsWithinFirst50 = False
60
+ entity = entity [5 :] # cut off "Item:"
61
+ local_found_missing = 0
62
+ for suggestion in suggestions ["search" ]:
63
+ rank += 1
64
+ print "currently checking: " + suggestion ["id" ][1 :]
65
+ #print str(missing_properties)
66
+ if int (suggestion ["id" ][1 :]) in missing_properties :
67
+ print "Found missing property " + str (suggestion ["id" ][1 :]) + " at rank " + str (rank )
68
+ local_found_missing += 1
69
+ if entity not in self .ranked_dict :
70
+ self .ranked_dict [entity ] = [{"missing" : suggestion ["id" ], "rank" : rank }]
71
+ else :
72
+ self .ranked_dict [entity ].append ({"missing" : suggestion ["id" ], "rank" : rank })
73
+
74
+ self .foundMissingPropertiesRankSum += rank
75
+ self .foundMissingProperties += 1
76
+
77
+
78
+ not_found_local = len (missing_properties )- local_found_missing
79
+ self .foundMissing += local_found_missing
80
+ self .notFoundMissing += not_found_local
81
+
82
+ def print_result (self ):
83
+ print str (self .ranked_dict )
84
+ with open ("missing_analysis___2.csv" ,"wb" ) as csv_file :
85
+ missing_writer = csv .writer (csv_file , delimiter = ';' ,
86
+ quotechar = '|' )
87
+ missing_writer .writerow (["item" ,"missing" ,"rank" ])
88
+ for item , result in self .ranked_dict .items ():
89
+ for entry in result :
90
+ # [{'rank': 12, 'missing': u'P1082'},x,y]
91
+ missing = entry ["missing" ]
92
+ rank = entry ["rank" ]
93
+ missing_writer .writerow ([item , missing , rank ])
94
+
95
+
96
+
97
+
98
+
99
+ def make_readableoutput (self , entity_missing_dict ):
100
+ result_dict = {}
101
+ with open ("missing_readable.csv" ,"wb" ) as csv_file :
102
+ missing_writer = csv .writer (csv_file , delimiter = ';' ,
103
+ quotechar = '|' )
104
+ for entity , missing in entity_missing_dict .items ():
105
+ entity_json = self .api .wb_getentities (entityid = entity , language = "en" )
106
+ label = entity_json ["entities" ][entity ]["labels" ]["en" ]["value" ]
107
+ missing = json .loads (missing )
108
+ for i in missing :
109
+ for property , description in i .items ():
110
+ missing_writer .writerow ([entity , label .encode ("utf-8" ), property , description .encode ("utf-8" )])
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+ luser = UserFeedbackEvaluation ()
120
+ eval_list = luser .preprocess_file ()
121
+ m = Missing_Evaluation ()
122
+ entity_dict = m .get_missing (eval_list )
123
+ #m.make_readableoutput(entity_dict)
124
+ m .find_missing (entity_dict )
125
+ m .print_result ()
0 commit comments