/
test_resource_search.py
186 lines (162 loc) · 7.32 KB
/
test_resource_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
from webob.multidict import UnicodeMultiDict, MultiDict
from nose.tools import assert_raises, assert_equal
from ckan.tests import *
from ckan.tests import is_search_supported
import ckan.lib.search as search
from ckan import model
from ckan.lib.create_test_data import CreateTestData
class TestSearch(object):
@classmethod
def setup_class(self):
if not is_search_supported():
raise SkipTest("Search not supported")
self.ab = 'http://site.com/a/b.txt'
self.cd = 'http://site.com/c/d.txt'
self.ef = 'http://site.com/e/f.txt'
self.pkgs = [
{'name':'pkg1',
'resources':[
{'url':self.ab,
'description':'This is site ab.',
'format':'Excel spreadsheet',
'hash':'abc-123',
'alt_url': 'alt1',
'extras':{'size_extra': '100'},
},
{'url':self.cd,
'description':'This is site cd.',
'format':'Office spreadsheet',
'hash':'qwe-456',
'alt_url':'alt2',
'extras':{'size_extra':'200'},
},
]
},
{'name':'pkg2',
'resources':[
{'url':self.cd,
'alt_url': 'alt1',
'description':'This is site cd.'},
{'url':self.ef,
'description':'This is site ef.'},
{'url':self.ef,
'description':'This is site gh.'},
{'url':self.ef,
'description':'This is site ij.'},
]
},
]
CreateTestData.create_arbitrary(self.pkgs)
@classmethod
def teardown_class(self):
model.repo.rebuild_db()
def res_search(self, query='', fields={}, terms=[], options=search.QueryOptions()):
result = search.query_for(model.Resource).run(query=query, fields=fields, terms=terms, options=options)
resources = [model.Session.query(model.Resource).get(resource_id) for resource_id in result['results']]
urls = set([resource.url for resource in resources])
return urls
def test_01_search_url(self):
fields = {'url':'site.com'}
result = search.query_for(model.Resource).run(fields=fields)
assert result['count'] == 6, result
resources = [model.Session.query(model.Resource).get(resource_id) for resource_id in result['results']]
urls = set([resource.url for resource in resources])
assert set([self.ab, self.cd, self.ef]) == urls, urls
def test_02_search_url_2(self):
urls = self.res_search(fields={'url':'a/b'})
assert set([self.ab]) == urls, urls
def test_03_search_url_multiple_words(self):
fields = UnicodeMultiDict(MultiDict(url='e'))
fields.add('url', 'f')
urls = self.res_search(fields=fields)
assert set([self.ef]) == urls, urls
def test_04_search_url_none(self):
urls = self.res_search(fields={'url':'nothing'})
assert set() == urls, urls
def test_05_search_description(self):
urls = self.res_search(fields={'description':'cd'})
assert set([self.cd]) == urls, urls
def test_06_search_format(self):
urls = self.res_search(fields={'format':'excel'})
assert set([self.ab]) == urls, urls
def test_07_search_format_2(self):
urls = self.res_search(fields={'format':'sheet'})
assert set([self.ab, self.cd]) == urls, urls
def test_08_search_hash_complete(self):
urls = self.res_search(fields={'hash':'abc-123'})
assert set([self.ab]) == urls, urls
def test_09_search_hash_partial(self):
urls = self.res_search(fields={'hash':'abc'})
assert set([self.ab]) == urls, urls
def test_10_search_hash_partial_but_not_initial(self):
urls = self.res_search(fields={'hash':'123'})
assert set() == urls, urls
def test_11_search_several_fields(self):
urls = self.res_search(fields={'description':'ab', 'format':'sheet'})
assert set([self.ab]) == urls, urls
def test_12_search_all_fields(self):
fields = {'url':'a/b'}
options = search.QueryOptions(all_fields=True)
result = search.query_for(model.Resource).run(fields=fields, options=options)
assert result['count'] == 1, result
res_dict = result['results'][0]
assert isinstance(res_dict, dict)
res_keys = set(res_dict.keys())
expected_res_keys = set(model.Resource.get_columns())
expected_res_keys.update(['id', 'resource_group_id', 'package_id', 'position', 'size_extra', 'tracking_summary'])
assert_equal(res_keys, expected_res_keys)
pkg1 = model.Package.by_name(u'pkg1')
ab = pkg1.resources[0]
assert res_dict['id'] == ab.id
assert res_dict['package_id'] == pkg1.id
assert res_dict['url'] == ab.url
assert res_dict['description'] == ab.description
# FIXME: This needs to be fixed before this branch is merged to master
from ckan.lib.dictization.model_dictize import _unified_resource_format
assert res_dict['format'] == _unified_resource_format(ab.format)
assert res_dict['hash'] == ab.hash
assert res_dict['position'] == 0
def test_13_pagination(self):
# large search
options = search.QueryOptions(order_by='id')
fields = {'url':'site'}
all_results = search.query_for(model.Resource).run(fields=fields, options=options)
all_resources = all_results['results']
all_resource_count = all_results['count']
assert all_resource_count >= 6, all_results
# limit
options = search.QueryOptions(order_by='id')
options.limit = 2
result = search.query_for(model.Resource).run(fields=fields, options=options)
resources = result['results']
count = result['count']
assert len(resources) == 2, resources
assert count == all_resource_count, (count, all_resource_count)
assert resources == all_resources[:2], '%r, %r' % (resources, all_resources)
# offset
options = search.QueryOptions(order_by='id')
options.limit = 2
options.offset = 2
result = search.query_for(model.Resource).run(fields=fields, options=options)
resources = result['results']
assert len(resources) == 2, resources
assert resources == all_resources[2:4]
# larger offset
options = search.QueryOptions(order_by='id')
options.limit = 2
options.offset = 4
result = search.query_for(model.Resource).run(fields=fields, options=options)
resources = result['results']
assert len(resources) == 2, resources
assert resources == all_resources[4:6]
def test_14_extra_info(self):
fields = {'alt_url':'alt1'}
result = search.query_for(model.Resource).run(fields=fields)
assert result['count'] == 2, result
fields = {'alt_url':'alt2'}
result = search.query_for(model.Resource).run(fields=fields)
assert result['count'] == 1, result
# Document that resource extras not in ckan.extra_resource_fields
# can't be searched
fields = {'size_extra':'100'}
assert_raises(search.SearchError, search.query_for(model.Resource).run, fields=fields)