-
Notifications
You must be signed in to change notification settings - Fork 0
/
createRedShiftDB.py
80 lines (77 loc) · 2.31 KB
/
createRedShiftDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
"""
Created on Sat Sept 13 2020
@author: gari.ciodaro.guerra
DAG of AirFlow tables schemas for Arxiv database.
Run on demand.
"""
import datetime
from airflow.operators.postgres_operator import PostgresOperator
from airflow import DAG
from airflow.utils.dates import days_ago
args = {
'owner': 'arXiv-haystack-app',
'start_date': days_ago(2),
'catchup': False,
'depends_on_past':False
}
#Default configuration for DAG
#prevent parallelized process with max_active_runs
dag = DAG('create_tables_redshift',
default_args=args,
description='create tables on redshift',
max_active_runs=1,
schedule_interval=None
)
# set of queries for start schema.
create_table = PostgresOperator(
task_id="create_tables",
postgres_conn_id="redshift",
dag=dag,
sql=("""
BEGIN;
DROP TABLE IF EXISTS public.papers;
CREATE TABLE public.papers (
id varchar(256) NOT NULL,
title varchar(256) NOT NULL,
categories varchar(256) NULL,
doi varchar(256) NULL,
comments varchar(256) NULL,
journalref varchar(256) NULL,
license varchar(256) NULL,
reportno varchar(256) NULL,
submitter varchar(256) NULL,
updatedate varchar(256) NULL
) diststyle even;
DROP TABLE IF EXISTS public.authors;
CREATE TABLE public.authors (
id varchar(256) NOT NULL,
author varchar(256) NOT NULL
) diststyle all;
DROP TABLE IF EXISTS public.abstracts;
CREATE TABLE public.abstracts (
id varchar(256) NOT NULL PRIMARY KEY,
abstract varchar(2048) NOT NULL,
origin varchar(256) NOT NULL
) diststyle auto;
DROP TABLE IF EXISTS public.categories;
CREATE TABLE public.categories (
id varchar(256) NOT NULL,
category varchar(256) NOT NULL
) diststyle all;
DROP TABLE IF EXISTS public.versions;
CREATE TABLE public.versions (
id varchar(256) NOT NULL,
created varchar(256) NOT NULL,
version varchar(10) NOT NULL,
month varchar(10) NOT NULL,
year varchar(10) NOT NULL
) diststyle all;
DROP TABLE IF EXISTS public.titles;
CREATE TABLE public.titles (
id varchar(256) NOT NULL PRIMARY KEY,
title varchar(256) NOT NULL
) diststyle all;
COMMIT;
"""),
)