-
Notifications
You must be signed in to change notification settings - Fork 0
/
schema.sql
559 lines (401 loc) · 16 KB
/
schema.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
-- DATABASE: imdb
-- DROP DATABASE IF EXISTS imdb;
CREATE DATABASE imdb WITH
OWNER = postgres
TEMPLATE = template0
ENCODING = 'UTF8'
CONNECTION LIMIT = 10;
-- SCHEMA: imdb_schema
-- DROP SCHEMA IF EXISTS imdb_schema;
CREATE SCHEMA imdb_schema
AUTHORIZATION postgres;
COMMENT ON SCHEMA imdb_schema
IS 'IMDb schema as used in Leis et al. (2018).';
GRANT USAGE ON SCHEMA imdb_schema TO postgres;
GRANT ALL ON SCHEMA imdb_schema TO postgres;
-- imdb_schema.char_name definition
-- Drop table
-- DROP TABLE imdb_schema.char_name;
CREATE TABLE imdb_schema.char_name (
id int4 NOT NULL,
"name" text NOT NULL,
imdb_index varchar(12) NULL,
imdb_id int4 NULL,
name_pcode_nf varchar(5) NULL,
surname_pcode varchar(5) NULL,
md5sum varchar(32) NULL,
CONSTRAINT char_name_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.char_name OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.char_name TO postgres;
-- imdb_schema.comp_cast_type definition
-- Drop table
-- DROP TABLE imdb_schema.comp_cast_type;
CREATE TABLE imdb_schema.comp_cast_type (
id int4 NOT NULL,
kind varchar(32) NOT NULL,
CONSTRAINT comp_cast_type_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.comp_cast_type OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.comp_cast_type TO postgres;
-- imdb_schema.company_name definition
-- Drop table
-- DROP TABLE imdb_schema.company_name;
CREATE TABLE imdb_schema.company_name (
id int4 NOT NULL,
"name" text NOT NULL,
country_code varchar(255) NULL,
imdb_id int4 NULL,
name_pcode_nf varchar(5) NULL,
name_pcode_sf varchar(5) NULL,
md5sum varchar(32) NULL,
CONSTRAINT company_name_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.company_name OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.company_name TO postgres;
-- imdb_schema.company_type definition
-- Drop table
-- DROP TABLE imdb_schema.company_type;
CREATE TABLE imdb_schema.company_type (
id int4 NOT NULL,
kind varchar(32) NOT NULL,
CONSTRAINT company_type_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.company_type OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.company_type TO postgres;
-- imdb_schema.info_type definition
-- Drop table
-- DROP TABLE imdb_schema.info_type;
CREATE TABLE imdb_schema.info_type (
id int4 NOT NULL,
info varchar(32) NOT NULL,
CONSTRAINT info_type_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.info_type OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.info_type TO postgres;
-- imdb_schema.keyword definition
-- Drop table
-- DROP TABLE imdb_schema.keyword;
CREATE TABLE imdb_schema.keyword (
id int4 NOT NULL,
keyword text NOT NULL,
phonetic_code varchar(5) NULL,
CONSTRAINT keyword_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.keyword OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.keyword TO postgres;
-- imdb_schema.kind_type definition
-- Drop table
-- DROP TABLE imdb_schema.kind_type;
CREATE TABLE imdb_schema.kind_type (
id int4 NOT NULL,
kind varchar(15) NOT NULL,
CONSTRAINT kind_type_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.kind_type OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.kind_type TO postgres;
-- imdb_schema.link_type definition
-- Drop table
-- DROP TABLE imdb_schema.link_type;
CREATE TABLE imdb_schema.link_type (
id int4 NOT NULL,
link varchar(32) NOT NULL,
CONSTRAINT link_type_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.link_type OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.link_type TO postgres;
-- imdb_schema."name" definition
-- Drop table
-- DROP TABLE imdb_schema."name";
CREATE TABLE imdb_schema."name" (
id int4 NOT NULL,
"name" text NOT NULL,
imdb_index varchar(12) NULL,
imdb_id int4 NULL,
gender varchar(1) NULL,
name_pcode_cf varchar(5) NULL,
name_pcode_nf varchar(5) NULL,
surname_pcode varchar(5) NULL,
md5sum varchar(32) NULL,
CONSTRAINT name_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema."name" OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema."name" TO postgres;
-- imdb_schema.role_type definition
-- Drop table
-- DROP TABLE imdb_schema.role_type;
CREATE TABLE imdb_schema.role_type (
id int4 NOT NULL,
"role" varchar(32) NOT NULL,
CONSTRAINT role_type_pkey PRIMARY KEY (id)
);
-- Permissions
ALTER TABLE imdb_schema.role_type OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.role_type TO postgres;
-- imdb_schema.aka_name definition
-- Drop table
-- DROP TABLE imdb_schema.aka_name;
CREATE TABLE imdb_schema.aka_name (
id int4 NOT NULL,
person_id int4 NOT NULL,
"name" text NOT NULL,
imdb_index varchar(12) NULL,
name_pcode_cf varchar(5) NULL,
name_pcode_nf varchar(5) NULL,
surname_pcode varchar(5) NULL,
md5sum varchar(32) NULL,
CONSTRAINT aka_name_pkey PRIMARY KEY (id),
CONSTRAINT aka_name_name_fk FOREIGN KEY (person_id) REFERENCES imdb_schema."name"(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX person_id_aka_name ON imdb_schema.aka_name USING btree (person_id);
-- Permissions
ALTER TABLE imdb_schema.aka_name OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.aka_name TO postgres;
-- imdb_schema.person_info definition
-- Drop table
-- DROP TABLE imdb_schema.person_info;
CREATE TABLE imdb_schema.person_info (
id int4 NOT NULL,
person_id int4 NOT NULL,
info_type_id int4 NOT NULL,
info text NOT NULL,
note text NULL,
CONSTRAINT person_info_pkey PRIMARY KEY (id),
CONSTRAINT person_info_info_type_fk FOREIGN KEY (info_type_id) REFERENCES imdb_schema.info_type(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT person_info_name_fk FOREIGN KEY (person_id) REFERENCES imdb_schema."name"(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX info_type_id_person_info ON imdb_schema.person_info USING btree (info_type_id);
CREATE INDEX person_id_person_info ON imdb_schema.person_info USING btree (person_id);
-- Permissions
ALTER TABLE imdb_schema.person_info OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.person_info TO postgres;
-- imdb_schema.title definition
-- Drop table
-- DROP TABLE imdb_schema.title;
CREATE TABLE imdb_schema.title (
id int4 NOT NULL,
title text NOT NULL,
imdb_index varchar(12) NULL,
kind_id int4 NOT NULL,
production_year int4 NULL,
imdb_id int4 NULL,
phonetic_code varchar(5) NULL,
episode_of_id int4 NULL,
season_nr int4 NULL,
episode_nr int4 NULL,
series_years varchar(49) NULL,
md5sum varchar(32) NULL,
CONSTRAINT title_pkey PRIMARY KEY (id),
CONSTRAINT title_kind_type_fk FOREIGN KEY (kind_id) REFERENCES imdb_schema.kind_type(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX kind_id_title ON imdb_schema.title USING btree (kind_id);
-- Permissions
ALTER TABLE imdb_schema.title OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.title TO postgres;
-- NOTE: Additional insert into the "title" and "kind_type" relations.
-- This is required to create foreign keys correctly in the IMDb dataset used in Leis et al. (2018).
INSERT INTO imdb_schema.kind_type (id, kind) VALUES
(7, 'episode');
INSERT INTO imdb_schema.title (id,title,imdb_index,kind_id,production_year,imdb_id,phonetic_code,episode_of_id,season_nr,episode_nr,series_years,md5sum) VALUES
(0,'*NOTE: Added to create Foreign Key in aka_title*',NULL,7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
-- imdb_schema.aka_title definition
-- Drop table
-- DROP TABLE imdb_schema.aka_title;
CREATE TABLE imdb_schema.aka_title (
id int4 NOT NULL,
movie_id int4 NOT NULL,
title text NOT NULL,
imdb_index varchar(12) NULL,
kind_id int4 NOT NULL,
production_year int4 NULL,
phonetic_code varchar(5) NULL,
episode_of_id int4 NULL,
season_nr int4 NULL,
episode_nr int4 NULL,
note text NULL,
md5sum varchar(32) NULL,
CONSTRAINT aka_title_pkey PRIMARY KEY (id),
CONSTRAINT aka_title_title_fk FOREIGN KEY (movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX kind_id_aka_title ON imdb_schema.aka_title USING btree (kind_id);
CREATE INDEX movie_id_aka_title ON imdb_schema.aka_title USING btree (movie_id);
-- Permissions
ALTER TABLE imdb_schema.aka_title OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.aka_title TO postgres;
-- imdb_schema.cast_info definition
-- Drop table
-- DROP TABLE imdb_schema.cast_info;
CREATE TABLE imdb_schema.cast_info (
id int4 NOT NULL,
person_id int4 NOT NULL,
movie_id int4 NOT NULL,
person_role_id int4 NULL,
note text NULL,
nr_order int4 NULL,
role_id int4 NOT NULL,
CONSTRAINT cast_info_pkey PRIMARY KEY (id),
CONSTRAINT cast_info_char_name_fk FOREIGN KEY (person_role_id) REFERENCES imdb_schema.char_name(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT cast_info_name_fk FOREIGN KEY (person_id) REFERENCES imdb_schema."name"(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT cast_info_role_type_fk FOREIGN KEY (role_id) REFERENCES imdb_schema.role_type(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT cast_info_title_fk FOREIGN KEY (movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX movie_id_cast_info ON imdb_schema.cast_info USING btree (movie_id);
CREATE INDEX person_id_cast_info ON imdb_schema.cast_info USING btree (person_id);
CREATE INDEX person_role_id_cast_info ON imdb_schema.cast_info USING btree (person_role_id);
CREATE INDEX role_id_cast_info ON imdb_schema.cast_info USING btree (role_id);
-- Permissions
ALTER TABLE imdb_schema.cast_info OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.cast_info TO postgres;
-- imdb_schema.complete_cast definition
-- Drop table
-- DROP TABLE imdb_schema.complete_cast;
CREATE TABLE imdb_schema.complete_cast (
id int4 NOT NULL,
movie_id int4 NULL,
subject_id int4 NOT NULL,
status_id int4 NOT NULL,
CONSTRAINT complete_cast_pkey PRIMARY KEY (id),
CONSTRAINT complete_cast_comp_cast_type_fk FOREIGN KEY (subject_id) REFERENCES imdb_schema.comp_cast_type(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT complete_cast_status_id_comp_cast_type_fk FOREIGN KEY (status_id) REFERENCES imdb_schema.comp_cast_type(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT complete_cast_title_fk FOREIGN KEY (movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX movie_id_complete_cast ON imdb_schema.complete_cast USING btree (movie_id);
-- Permissions
ALTER TABLE imdb_schema.complete_cast OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.complete_cast TO postgres;
-- imdb_schema.movie_companies definition
-- Drop table
-- DROP TABLE imdb_schema.movie_companies;
CREATE TABLE imdb_schema.movie_companies (
id int4 NOT NULL,
movie_id int4 NOT NULL,
company_id int4 NOT NULL,
company_type_id int4 NOT NULL,
note text NULL,
CONSTRAINT movie_companies_pkey PRIMARY KEY (id),
CONSTRAINT movie_companies_company_name_fk FOREIGN KEY (company_id) REFERENCES imdb_schema.company_name(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT movie_companies_company_type_fk FOREIGN KEY (company_type_id) REFERENCES imdb_schema.company_type(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT movie_companies_title_fk FOREIGN KEY (movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX company_id_movie_companies ON imdb_schema.movie_companies USING btree (company_id);
CREATE INDEX company_type_id_movie_companies ON imdb_schema.movie_companies USING btree (company_type_id);
CREATE INDEX movie_id_movie_companies ON imdb_schema.movie_companies USING btree (movie_id);
-- Permissions
ALTER TABLE imdb_schema.movie_companies OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.movie_companies TO postgres;
-- imdb_schema.movie_info definition
-- Drop table
-- DROP TABLE imdb_schema.movie_info;
CREATE TABLE imdb_schema.movie_info (
id int4 NOT NULL,
movie_id int4 NOT NULL,
info_type_id int4 NOT NULL,
info text NOT NULL,
note text NULL,
CONSTRAINT movie_info_pkey PRIMARY KEY (id),
CONSTRAINT movie_info_info_type_fk FOREIGN KEY (info_type_id) REFERENCES imdb_schema.info_type(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT movie_info_title_fk FOREIGN KEY (movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX info_type_id_movie_info ON imdb_schema.movie_info USING btree (info_type_id);
CREATE INDEX movie_id_movie_info ON imdb_schema.movie_info USING btree (movie_id);
-- Permissions
ALTER TABLE imdb_schema.movie_info OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.movie_info TO postgres;
-- imdb_schema.movie_info_idx definition
-- Drop table
-- DROP TABLE imdb_schema.movie_info_idx;
CREATE TABLE imdb_schema.movie_info_idx (
id int4 NOT NULL,
movie_id int4 NOT NULL,
info_type_id int4 NOT NULL,
info text NOT NULL,
note text NULL,
CONSTRAINT movie_info_idx_pkey PRIMARY KEY (id),
CONSTRAINT movie_info_idx_info_type_fk FOREIGN KEY (info_type_id) REFERENCES imdb_schema.info_type(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT movie_info_idx_title_fk FOREIGN KEY (movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX info_type_id_movie_info_idx ON imdb_schema.movie_info_idx USING btree (info_type_id);
CREATE INDEX movie_id_movie_info_idx ON imdb_schema.movie_info_idx USING btree (movie_id);
-- Permissions
ALTER TABLE imdb_schema.movie_info_idx OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.movie_info_idx TO postgres;
-- imdb_schema.movie_keyword definition
-- Drop table
-- DROP TABLE imdb_schema.movie_keyword;
CREATE TABLE imdb_schema.movie_keyword (
id int4 NOT NULL,
movie_id int4 NOT NULL,
keyword_id int4 NOT NULL,
CONSTRAINT movie_keyword_pkey PRIMARY KEY (id),
CONSTRAINT movie_keyword_keyword_fk FOREIGN KEY (keyword_id) REFERENCES imdb_schema.keyword(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT movie_keyword_title_fk FOREIGN KEY (movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX keyword_id_movie_keyword ON imdb_schema.movie_keyword USING btree (keyword_id);
CREATE INDEX movie_id_movie_keyword ON imdb_schema.movie_keyword USING btree (movie_id);
-- Permissions
ALTER TABLE imdb_schema.movie_keyword OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.movie_keyword TO postgres;
-- imdb_schema.movie_link definition
-- Drop table
-- DROP TABLE imdb_schema.movie_link;
CREATE TABLE imdb_schema.movie_link (
id int4 NOT NULL,
movie_id int4 NOT NULL,
linked_movie_id int4 NOT NULL,
link_type_id int4 NOT NULL,
CONSTRAINT movie_link_pkey PRIMARY KEY (id),
CONSTRAINT movie_link_link_type_fk FOREIGN KEY (link_type_id) REFERENCES imdb_schema.link_type(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT movie_link_linked_movie_id_title_fk FOREIGN KEY (linked_movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT movie_link_title_fk FOREIGN KEY (movie_id) REFERENCES imdb_schema.title(id) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE INDEX link_type_id_movie_link ON imdb_schema.movie_link USING btree (link_type_id);
CREATE INDEX linked_movie_id_movie_link ON imdb_schema.movie_link USING btree (linked_movie_id);
CREATE INDEX movie_id_movie_link ON imdb_schema.movie_link USING btree (movie_id);
-- Permissions
ALTER TABLE imdb_schema.movie_link OWNER TO postgres;
GRANT ALL ON TABLE imdb_schema.movie_link TO postgres;
-- Permissions
GRANT ALL ON SCHEMA imdb_schema TO postgres;
-- Data import
-- Recommended order of importing the CSV data, one-by-one,
-- to avoid referential integrity constraint errors:
-- company_name (234,997 rows)
-- company_type (4 rows)
-- keyword (134,170 rows)
-- info_type (113 rows)
-- role_type (12 rows)
-- name (4,167,491 rows)
-- kind_type (7 rows)
-- person_info (2,963,664 rows)
-- link_type (18 rows)
-- comp_cast_type (4 rows)
-- char_name (3,140,339 rows)
-- aka_name (901,343 rows)
-- title (2,528,313 rows)
-- movie_keyword (4,523,930 rows)
-- movie_info_idx (1,380,035 rows)
-- aka_title (361,472 rows)
-- complete_cast (135,086 rows)
-- movie_link (29,997 rows)
-- movie_companies (2,609,129 rows)
-- movie_info (14,835,720 rows)
-- cast_info (36,244,344 rows)
-- Used to record the training data from the query workload
CREATE TABLE query_log (
id SERIAL PRIMARY KEY,
query_text TEXT,
qep TEXT,
actual_rows INTEGER,
estimated_rows INTEGER,
q_error INTEGER,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);