/
pgsql.py
345 lines (323 loc) · 11.8 KB
/
pgsql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# -*- coding: UTF-8 -*-
"""
@author: hhyo、yyukai
@license: Apache Licence
@file: pgsql.py
@time: 2019/03/29
"""
import re
import psycopg2
import logging
import traceback
import sqlparse
from common.config import SysConfig
from common.utils.timer import FuncTimer
from sql.utils.sql_utils import get_syntax_type
from . import EngineBase
from .models import ResultSet, ReviewSet, ReviewResult
from sql.utils.data_masking import simple_column_mask
__author__ = "hhyo、yyukai"
logger = logging.getLogger("default")
class PgSQLEngine(EngineBase):
test_query = "SELECT 1"
def get_connection(self, db_name=None):
db_name = db_name or self.db_name or "postgres"
if self.conn:
return self.conn
self.conn = psycopg2.connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
dbname=db_name,
connect_timeout=10,
)
return self.conn
@property
def name(self):
return "PgSQL"
@property
def info(self):
return "PgSQL engine"
def get_all_databases(self):
"""
获取数据库列表
:return:
"""
result = self.query(sql=f"SELECT datname FROM pg_database;")
db_list = [
row[0]
for row in result.rows
if row[0] not in ["postgres", "template0", "template1"]
]
result.rows = db_list
return result
def get_all_schemas(self, db_name, **kwargs):
"""
获取模式列表
:return:
"""
result = self.query(
db_name=db_name, sql=f"select schema_name from information_schema.schemata;"
)
schema_list = [
row[0]
for row in result.rows
if row[0]
not in [
"information_schema",
"pg_catalog",
"pg_toast_temp_1",
"pg_temp_1",
"pg_toast",
]
]
result.rows = schema_list
return result
def get_all_tables(self, db_name, **kwargs):
"""
获取表列表
:param db_name:
:param schema_name:
:return:
"""
schema_name = kwargs.get("schema_name")
sql = f"""SELECT table_name
FROM information_schema.tables
where table_schema ='{schema_name}';"""
result = self.query(db_name=db_name, sql=sql)
tb_list = [row[0] for row in result.rows if row[0] not in ["test"]]
result.rows = tb_list
return result
def get_all_columns_by_tb(self, db_name, tb_name, **kwargs):
"""
获取字段列表
:param db_name:
:param tb_name:
:param schema_name:
:return:
"""
schema_name = kwargs.get("schema_name")
sql = f"""SELECT column_name
FROM information_schema.columns
where table_name='{tb_name}'
and table_schema ='{schema_name}';"""
result = self.query(db_name=db_name, sql=sql)
column_list = [row[0] for row in result.rows]
result.rows = column_list
return result
def describe_table(self, db_name, tb_name, **kwargs):
"""
获取表结构信息
:param db_name:
:param tb_name:
:param schema_name:
:return:
"""
schema_name = kwargs.get("schema_name")
sql = f"""select
col.column_name,
col.data_type,
col.character_maximum_length,
col.numeric_precision,
col.numeric_scale,
col.is_nullable,
col.column_default,
des.description
from
information_schema.columns col left join pg_description des on
col.table_name::regclass = des.objoid
and col.ordinal_position = des.objsubid
where table_name = '{tb_name}'
and col.table_schema = '{schema_name}'
order by ordinal_position;"""
result = self.query(db_name=db_name, schema_name=schema_name, sql=sql)
return result
def query_check(self, db_name=None, sql=""):
# 查询语句的检查、注释去除、切分
result = {"msg": "", "bad_query": False, "filtered_sql": sql, "has_star": False}
# 删除注释语句,进行语法判断,执行第一条有效sql
try:
sql = sqlparse.format(sql, strip_comments=True)
sql = sqlparse.split(sql)[0]
result["filtered_sql"] = sql.strip()
except IndexError:
result["bad_query"] = True
result["msg"] = "没有有效的SQL语句"
if re.match(r"^select", sql, re.I) is None:
result["bad_query"] = True
result["msg"] = "不支持的查询语法类型!"
if "*" in sql:
result["has_star"] = True
result["msg"] = "SQL语句中含有 * "
return result
def query(self, db_name=None, sql="", limit_num=0, close_conn=True, **kwargs):
"""返回 ResultSet"""
schema_name = kwargs.get("schema_name")
result_set = ResultSet(full_sql=sql)
try:
conn = self.get_connection(db_name=db_name)
max_execution_time = kwargs.get("max_execution_time", 0)
cursor = conn.cursor()
try:
cursor.execute(f"SET statement_timeout TO {max_execution_time};")
except:
pass
if schema_name:
cursor.execute(f"SET search_path TO {schema_name};")
cursor.execute(sql)
effect_row = cursor.rowcount
if int(limit_num) > 0:
rows = cursor.fetchmany(size=int(limit_num))
else:
rows = cursor.fetchall()
fields = cursor.description
result_set.column_list = [i[0] for i in fields] if fields else []
result_set.rows = rows
result_set.affected_rows = effect_row
except Exception as e:
logger.warning(f"PgSQL命令执行报错,语句:{sql}, 错误信息:{traceback.format_exc()}")
result_set.error = str(e)
finally:
if close_conn:
self.close()
return result_set
def filter_sql(self, sql="", limit_num=0):
# 对查询sql增加limit限制,# TODO limit改写待优化
sql_lower = sql.lower().rstrip(";").strip()
if re.match(r"^select", sql_lower):
if re.search(r"limit\s+(\d+)$", sql_lower) is None:
if re.search(r"limit\s+\d+\s*,\s*(\d+)$", sql_lower) is None:
return f"{sql.rstrip(';')} limit {limit_num};"
return f"{sql.rstrip(';')};"
def query_masking(self, db_name=None, sql="", resultset=None):
"""简单字段脱敏规则, 仅对select有效"""
if re.match(r"^select", sql, re.I):
filtered_result = simple_column_mask(self.instance, resultset)
filtered_result.is_masked = True
else:
filtered_result = resultset
return filtered_result
def execute_check(self, db_name=None, sql=""):
"""上线单执行前的检查, 返回Review set"""
config = SysConfig()
check_result = ReviewSet(full_sql=sql)
# 禁用/高危语句检查
line = 1
critical_ddl_regex = config.get("critical_ddl_regex", "")
p = re.compile(critical_ddl_regex)
check_result.syntax_type = 2 # TODO 工单类型 0、其他 1、DDL,2、DML
for statement in sqlparse.split(sql):
statement = sqlparse.format(statement, strip_comments=True)
# 禁用语句
if re.match(r"^select", statement.lower()):
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回不支持语句",
errormessage="仅支持DML和DDL语句,查询语句请使用SQL查询功能!",
sql=statement,
)
# 高危语句
elif critical_ddl_regex and p.match(statement.strip().lower()):
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回高危SQL",
errormessage="禁止提交匹配" + critical_ddl_regex + "条件的语句!",
sql=statement,
)
# 正常语句
else:
result = ReviewResult(
id=line,
errlevel=0,
stagestatus="Audit completed",
errormessage="None",
sql=statement,
affected_rows=0,
execute_time=0,
)
# 判断工单类型
if get_syntax_type(statement) == "DDL":
check_result.syntax_type = 1
check_result.rows += [result]
line += 1
# 统计警告和错误数量
for r in check_result.rows:
if r.errlevel == 1:
check_result.warning_count += 1
if r.errlevel == 2:
check_result.error_count += 1
return check_result
def execute_workflow(self, workflow, close_conn=True):
"""执行上线单,返回Review set"""
sql = workflow.sqlworkflowcontent.sql_content
execute_result = ReviewSet(full_sql=sql)
# 删除注释语句,切分语句,将切换CURRENT_SCHEMA语句增加到切分结果中
sql = sqlparse.format(sql, strip_comments=True)
split_sql = sqlparse.split(sql)
line = 1
statement = None
db_name = workflow.db_name
try:
conn = self.get_connection(db_name=db_name)
cursor = conn.cursor()
# 逐条执行切分语句,追加到执行结果中
for statement in split_sql:
statement = statement.rstrip(";")
with FuncTimer() as t:
cursor.execute(statement)
conn.commit()
execute_result.rows.append(
ReviewResult(
id=line,
errlevel=0,
stagestatus="Execute Successfully",
errormessage="None",
sql=statement,
affected_rows=cursor.rowcount,
execute_time=t.cost,
)
)
line += 1
except Exception as e:
logger.warning(
f"PGSQL命令执行报错,语句:{statement or sql}, 错误信息:{traceback.format_exc()}"
)
execute_result.error = str(e)
# 追加当前报错语句信息到执行结果中
execute_result.rows.append(
ReviewResult(
id=line,
errlevel=2,
stagestatus="Execute Failed",
errormessage=f"异常信息:{e}",
sql=statement or sql,
affected_rows=0,
execute_time=0,
)
)
line += 1
# 报错语句后面的语句标记为审核通过、未执行,追加到执行结果中
for statement in split_sql[line - 1 :]:
execute_result.rows.append(
ReviewResult(
id=line,
errlevel=0,
stagestatus="Audit completed",
errormessage=f"前序语句失败, 未执行",
sql=statement,
affected_rows=0,
execute_time=0,
)
)
line += 1
finally:
if close_conn:
self.close()
return execute_result
def close(self):
if self.conn:
self.conn.close()
self.conn = None