forked from snarfed/mockfacebook
-
Notifications
You must be signed in to change notification settings - Fork 0
/
schemautil.py
407 lines (320 loc) · 10.7 KB
/
schemautil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
"""Code for reading, writing, and representing the FQL schema and example data.
"""
__author__ = ['Ryan Barrett <mockfacebook@ryanb.org>']
import collections
import copy
import datetime
import json
import os
import pprint
import re
import sqlite3
def thisdir(filename):
return os.path.join(os.path.dirname(__file__), filename)
FQL_SCHEMA_PY_FILE = thisdir('fql_schema.py')
FQL_SCHEMA_SQL_FILE = thisdir('fql_schema.sql')
FQL_DATA_PY_FILE = thisdir('fql_data.py')
FQL_DATA_SQL_FILE = thisdir('fql_data.sql')
GRAPH_SCHEMA_PY_FILE = thisdir('graph_schema.py')
GRAPH_DATA_PY_FILE = thisdir('graph_data.py')
GRAPH_DATA_SQL_FILE = thisdir('graph_data.sql')
MOCKFACEBOOK_SCHEMA_SQL_FILE = thisdir('mockfacebook.sql')
PY_HEADER = """\
# Do not edit! Generated automatically by mockfacebook.
# https://github.com/rogerhu/mockfacebook
# %s
""" % datetime.datetime.now()
SQL_HEADER = PY_HEADER.replace('#', '--')
# TODO: other object types have aliases too, e.g. name for applications?
# see: http://graph.facebook.com/FarmVille
# but not all, e.g. http://graph.facebook.com/256884317673197 (bridgy)
# maybe the "link" field?
ALIAS_FIELD = 'username'
DEFAULT_DB_FILE = thisdir('mockfacebook.db')
def get_db(filename):
"""Returns a SQLite db connection to the given file.
Also creates the mockfacebook and FQL schemas if they don't already exist.
Args:
filename: the SQLite database file
"""
conn = sqlite3.connect(filename)
for schema in MOCKFACEBOOK_SCHEMA_SQL_FILE, FQL_SCHEMA_SQL_FILE:
with open(schema) as f:
conn.executescript(f.read())
return conn
def values_to_sqlite(input):
"""Serializes Python values into a comma separated SQLite value string.
The returned string can be used in the VALUES(...) section of an INSERT
statement.
"""
output = []
for val in input:
if isinstance(val, bool):
val = str(int(val))
elif isinstance(val, basestring):
# can't use json.dumps() because SQLite doesn't support backslash escapes.
# also note that sqlite escapes 's by doubling them.
val = "'%s'" % val.replace("'", "''").encode('utf8')
# val = string_to_sqlite(val)
elif val is None:
val = 'NULL'
else:
val = json.dumps(val)
output.append(val)
return ',\n '.join(output)
class PySqlFiles(object):
"""A mixin that stores data in a Python file and a SQL file.
Subclasses must override to_sql() and py_attrs if they want SQL and Python
file output, respectively.
Attributes:
py_file: string filename
sql_file: string filename
Class attributes:
py_attrs: tuple of string attributes to store in the .py file
"""
py_attrs = ()
def __init__(self, py_file, sql_file=None):
self.py_file = py_file
self.sql_file = sql_file
def to_sql(self):
pass
def write(self, db_file=None):
"""Writes to the Python and optionally SQL and SQLite database files.
Args:
db_file: string, SQLite database filename
"""
with open(self.py_file, 'w') as f:
print >> f, PY_HEADER
data = dict((attr, getattr(self, attr)) for attr in self.py_attrs)
pprint.pprint(data, f)
self.wrote_message(self.py_file)
sql = self.to_sql()
if self.sql_file:
with open(self.sql_file, 'w') as f:
print >> f, SQL_HEADER
print >> f, sql
self.wrote_message(self.sql_file)
if db_file:
get_db(db_file).executescript(sql)
@classmethod
def read(cls):
"""Factory method.
"""
inst = cls()
with open(inst.py_file) as f:
for attr, val in eval(f.read()).items():
setattr(inst, attr, val)
return inst
def wrote_message(self, filename):
print 'Wrote %s to %s.' % (self.__class__.__name__, filename)
# A column in a table.
#
# Defined at the top level so that Column(...)'s in .py files can be eval'ed.
#
# Attributes:
# name: string
# fb_type: string FQL type
# sqlite_type: string SQLite type
# indexable: boolean
Column = collections.namedtuple(
'Column', ('name', 'fb_type', 'sqlite_type', 'indexable'))
class Schema(PySqlFiles):
"""An FQL or Graph API schema.
Attributes:
tables: dict mapping string table name to tuple of Column
"""
py_attrs = ('tables',)
def __init__(self, *args, **kwargs):
super(Schema, self).__init__(*args, **kwargs)
self.tables = {}
def get_column(self, table, column):
"""Looks up a column.
Args:
table: string
column: string
Returns: Column or None
"""
# TODO: store schema columns in ordered-dict (python 2.7), then remove this.
for col in self.tables[table]:
if col.name == column:
return col
def to_sql(self):
"""Returns the SQL CREATE TABLE statements for this schema.
"""
tables = []
# order tables alphabetically
for table, cols in sorted(self.tables.items()):
col_defs = ',\n'.join(' %s %s' % (c.name, c.sqlite_type) for c in cols)
col_names = ', '.join(c.name for c in cols)
tables.append("""
CREATE TABLE IF NOT EXISTS `%s` (
%s,
UNIQUE (%s)
);
""" % (table, col_defs, col_names))
return ''.join(tables)
def json_to_sqlite(self, object, table):
"""Serializes a JSON object into a comma separated SQLite value string.
The order of the values will match the order of the columns in the schema.
Args:
object: decoded JSON dict
table: string
Returns: string
"""
columns = self.tables[table]
values = []
for i, col in enumerate(columns):
val = object.get(col.name, '')
if isinstance(val, (list, dict)):
# store composite types as JSON strings
val = json.dumps(val)
values.append(val)
return values_to_sqlite(values)
def sqlite_to_json(self, cursor, table):
"""Converts SQLite query results to JSON result objects.
This is used in fql.py.
Args:
cursor: SQLite query cursor
table: string
Returns:
list of dicts representing JSON result objects
"""
colnames = [d[0] for d in cursor.description]
columns = [self.get_column(table, name) for name in colnames]
objects = []
for row in cursor.fetchall():
object = {}
for colname, column, val in zip(colnames, columns, row):
# by default, use the SQLite type
object[colname] = val
# ...except for a couple special cases
if column:
if val and not column.sqlite_type:
# composite types are stored as JSON strings
object[colname] = json.loads(val)
elif column.fb_type == 'bool':
object[colname] = bool(val)
objects.append(object)
return objects
class FqlSchema(Schema):
"""The FQL schema.
"""
def __init__(self):
super(FqlSchema, self).__init__(FQL_SCHEMA_PY_FILE, FQL_SCHEMA_SQL_FILE)
class GraphSchema(Schema):
"""The Graph API schema.
Attributes:
connections: dict mapping string table name to tuple of string connection
names
"""
py_attrs = ('tables', 'connections')
connections = {}
def __init__(self):
super(GraphSchema, self).__init__(GRAPH_SCHEMA_PY_FILE)
# A collection of objects for a given FQL or Graph API table.
#
# Defined at the top level so that Data(...)'s in .py files can be eval'ed.
#
# Attributes:
# query: FQL query or Graph API path used to fetch the data.
# data: decoded JSON object (usually dict or list of dicts)
Data = collections.namedtuple('Data', ('table', 'query', 'data'))
# A single Graph API connection.
#
# Attributes:
# table: table name
# id: id of the source object of this connection
# name: name of this connection
# data: decoded JSON object (usually dict or list of dicts)
# query (derived, read only): Graph API path used to fetch the data.
Connection = collections.namedtuple('Connection', ('table', 'id', 'name', 'data'))
@property
def _Connection_query(self):
return '%s/%s' % (self.id, self.name)
Connection.query = _Connection_query
class Dataset(PySqlFiles):
"""A set of FQL or Graph API example data.
Attributes:
schema: Schema
data: dict mapping string FQL table name or Graph API object id to Data
"""
py_attrs = ('data',)
def __init__(self, py_file, sql_file=None, schema=None):
super(Dataset, self).__init__(py_file, sql_file)
self.schema = schema
self.data = {}
class FqlDataset(Dataset):
"""An FQL dataset.
"""
def __init__(self, schema=None):
if not schema:
schema = FqlSchema.read()
super(FqlDataset, self).__init__(FQL_DATA_PY_FILE, FQL_DATA_SQL_FILE, schema)
def to_sql(self):
"""Returns a string with the SQL INSERT statements for this data.
"""
output = ['BEGIN TRANSACTION;\n']
# order tables alphabetically
for table, data in sorted(self.data.items()):
output.append("""
-- %s
--
-- %s
""" % (table, data.query))
columns_str = ', '.join('`%s`' % col.name
for col in self.schema.tables[table])
for object in data.data:
# order columns to match schema (which is the order in FQL docs)
try:
values_str = self.schema.json_to_sqlite(object, table)
except:
continue
output.append("""\
INSERT OR IGNORE INTO `%s` (
%s
) VALUES (
%s
);
""" % (table, columns_str, values_str))
output.append('COMMIT;')
return '\n'.join(output)
class GraphDataset(Dataset):
"""A Graph API dataset.
Attributes:
connections: list of (table name, Connection) tuples
"""
py_attrs = ('data', 'connections')
def __init__(self, schema=None):
if not schema:
schema = GraphSchema.read()
super(GraphDataset, self).__init__(GRAPH_DATA_PY_FILE, GRAPH_DATA_SQL_FILE,
schema)
self.connections = {}
def to_sql(self):
"""Generate SQL INSERT statements for the Graph API tables.
One insert per row in SQLite, unfortunately. Details:
http://stackoverflow.com/questions/1609637/
"""
output = ['BEGIN TRANSACTION;']
# objects and aliases
for data in self.data.values():
id = data.data['id']
alias = data.data.get(ALIAS_FIELD)
output.append(self.make_insert('graph_objects',
id, alias, json.dumps(data.data)))
# connections
for conn in self.connections.values():
for object in conn.data['data']:
output.append(self.make_insert('graph_connections',
conn.id, conn.name, json.dumps(object)))
output.append('COMMIT;')
return '\n'.join(output)
def make_insert(self, table, *values):
"""Generates an INSERT statement for the given table and column values.
Args:
table: string
values: string column values
Returns: string
"""
return """INSERT OR IGNORE INTO %s VALUES (\n %s\n);""" % (
table, values_to_sqlite(values))