Skip to content

Commit d236c8f

Browse files
committed
new features
1 parent 4abecbf commit d236c8f

File tree

1 file changed

+172
-79
lines changed

1 file changed

+172
-79
lines changed

sql/index_advisor.py

Lines changed: 172 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,177 @@
1-
import mysql.connector
21
import re
3-
import os
4-
5-
def extract_columns_from_filter(filter_text):
6-
column_pattern = re.compile(r'([a-zA-Z_][\w]*)\.([a-zA-Z_][\w]*)')
7-
matches = column_pattern.findall(filter_text)
8-
return matches
9-
10-
def calculate_cardinality(cursor, table_name, column_name):
11-
query = "SELECT COUNT(DISTINCT {})/COUNT(*) AS cardinality FROM {}".format(column_name,table_name)
12-
cursor.execute(query)
13-
result = cursor.fetchone()
14-
return result[0] if result else 0
15-
16-
def index_advisor(sql_query, database_ip, database_port, database_name, username, password ):
17-
conn = mysql.connector.connect(
18-
host= database_ip,
19-
port= database_port,
20-
user= username,
21-
passwd= password,
22-
database= database_name
23-
)
24-
cursor=conn.cursor()
25-
26-
def find_indexes(table_name):
27-
query = "SHOW INDEX FROM {}".format(table_name)
28-
cursor.execute(query)
29-
result=cursor.fetchall()
30-
indexes_list=[]
31-
for comp in result:
32-
counter=0
33-
for i in comp:
34-
counter+=1
35-
if counter==5:
36-
indexes_list.append(i)
37-
set_indexes=list(set(indexes_list))
38-
return set_indexes
39-
40-
filter_query = "EXPLAIN FORMAT=tree " + sql_query
41-
cursor.execute(filter_query)
42-
result = cursor.fetchall()
43-
for row in result:
44-
explain_output = row[0]
45-
columns = extract_columns_from_filter(explain_output)
46-
new_columns=[]
47-
for t,c in columns:
48-
t_name=t
49-
new_columns.append(c)
50-
index_table= find_indexes(t_name)
51-
new_indexes=[]
52-
for i in new_columns:
53-
if not i in index_table:
54-
new_indexes.append(i)
55-
56-
indexed_columns_with_cardinality = []
57-
for i in new_indexes:
58-
cardinality = calculate_cardinality(cursor, t_name, i)
59-
indexed_columns_with_cardinality.append((i, cardinality))
60-
61-
62-
indexed_columns_with_cardinality.sort(key=lambda x: x[1], reverse=True)
63-
64-
for column, cardinality in indexed_columns_with_cardinality:
65-
hex_str = os.urandom(4).hex()
66-
print("create index ix_auto_{} on {}({})".format(hex_str, t_name, column))
67-
68-
69-
conn.commit()
70-
cursor.close()
71-
conn.close()
72-
73-
sql_query= input("SQL Query:")
74-
database_ip = input("Database ip: ")
75-
database_port= input("Database port: ")
76-
database_name= input("Database name: ")
77-
username= input("Username: ")
78-
password= input("Password: ")
79-
80-
index_advisor(sql_query, database_ip, database_port, database_name, username, password )
2+
import mysql.connector
3+
from mysql.connector import Error
4+
from datetime import datetime
5+
6+
def fetch_sample_value(connection, table, column):
7+
"""Fetches a sample value from a given table and column."""
8+
try:
9+
cursor = connection.cursor()
10+
sample_query = f"SELECT {column} FROM {table} WHERE {column} IS NOT NULL LIMIT 1"
11+
cursor.execute(sample_query)
12+
result = cursor.fetchone()
13+
return result[0] if result else None
14+
except Error as e:
15+
print(f"Error fetching sample value for {table}.{column}: {e}")
16+
return None
17+
18+
def get_column_data_type(connection, table, column):
19+
"""Retrieves the data type of a given column."""
20+
try:
21+
cursor = connection.cursor()
22+
cursor.execute(
23+
"""
24+
SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS
25+
WHERE TABLE_NAME = %s AND COLUMN_NAME = %s
26+
""", (table, column))
27+
result = cursor.fetchone()
28+
return result[0] if result else None
29+
except Error as e:
30+
print(f"Error fetching data type for {table}.{column}: {e}")
31+
return None
32+
33+
def replace_query_placeholders(connection, query):
34+
"""Replaces ? placeholders with actual values fetched from the database."""
35+
try:
36+
query = query.replace("`", "").replace("'", "")
37+
query = re.sub(r"\s*\.\s*", ".", query) # Remove spaces around dots
38+
39+
placeholders = query.count('?')
40+
if placeholders == 0:
41+
return query
42+
43+
query_parts = query.split('?')
44+
final_query = query_parts[0]
45+
46+
for i in range(1, len(query_parts)):
47+
prev_text = query_parts[i - 1].strip()
48+
49+
if prev_text.endswith("LIMIT"):
50+
replacement = "10"
51+
else:
52+
match = re.search(r"([a-zA-Z0-9_]+)\s*=\s*$", prev_text)
53+
if match:
54+
column = match.group(1)
55+
table_match = re.search(r"FROM\s+([a-zA-Z0-9_]+)", query, re.IGNORECASE)
56+
if table_match:
57+
table = table_match.group(1)
58+
sample_value = fetch_sample_value(connection, table, column)
59+
data_type = get_column_data_type(connection, table, column)
60+
61+
if sample_value is not None:
62+
if data_type in ["char", "varchar", "text"]:
63+
replacement = f"'{sample_value}'"
64+
else:
65+
replacement = str(sample_value)
66+
else:
67+
replacement = "IS NULL"
68+
else:
69+
replacement = "IS NULL"
70+
else:
71+
replacement = "IS NULL"
72+
73+
final_query += replacement + query_parts[i]
74+
75+
return final_query
76+
except Error as e:
77+
print(f"Error replacing query placeholders: {e}")
78+
return query
79+
80+
def fetch_top_queries(connection, database_name):
81+
"""Fetches the top 10 most time-consuming SELECT queries."""
82+
query = """
83+
SELECT DIGEST_TEXT
84+
FROM performance_schema.events_statements_summary_by_digest
85+
WHERE SCHEMA_NAME = %s
86+
AND UPPER(DIGEST_TEXT) LIKE 'SELECT%%'
87+
AND DIGEST_TEXT NOT LIKE '%%performance_schema%%'
88+
AND DIGEST_TEXT NOT LIKE '%%INFORMATION_SCHEMA%%'
89+
ORDER BY SUM_TIMER_WAIT DESC
90+
LIMIT 10;
91+
"""
92+
try:
93+
cursor = connection.cursor()
94+
cursor.execute(query, (database_name,))
95+
return [replace_query_placeholders(connection, row[0]) for row in cursor.fetchall()]
96+
except Error as e:
97+
print(f"Error fetching top queries: {e}")
98+
return []
99+
100+
def is_column_indexed(connection, table, column):
101+
"""Checks if a column is already indexed in the database."""
102+
try:
103+
cursor = connection.cursor()
104+
query = """
105+
SELECT COUNT(*)
106+
FROM INFORMATION_SCHEMA.STATISTICS
107+
WHERE TABLE_NAME = %s
108+
AND COLUMN_NAME = %s
109+
"""
110+
cursor.execute(query, (table, column))
111+
return cursor.fetchone()[0] > 0
112+
except Error as e:
113+
print(f"Index check error: {e}")
114+
return False
115+
116+
def suggest_missing_indexes(connection, query):
117+
"""Suggests indexes for missing columns in a query."""
118+
try:
119+
alias_pattern = r"(?:FROM|JOIN)\s+(\w+)\s+(?:AS\s+)?(\w+)?"
120+
alias_matches = re.findall(alias_pattern, query, re.IGNORECASE)
121+
alias_mapping = {alias: table for table, alias in alias_matches if alias}
122+
alias_mapping.update({table: table for table, alias in alias_matches if not alias})
123+
124+
column_pattern = r"(\w+)\.(\w+)"
125+
column_references = re.findall(column_pattern, query)
126+
resolved_columns = [(alias_mapping.get(table, table), column) for table, column in column_references]
127+
128+
table_column_map = {}
129+
for table, column in resolved_columns:
130+
if table not in table_column_map:
131+
table_column_map[table] = []
132+
if column not in table_column_map[table]: # Avoid duplicate column entries
133+
table_column_map[table].append(column)
134+
135+
for table, columns in table_column_map.items():
136+
missing_columns = [col for col in columns if not is_column_indexed(connection, table, col)]
137+
if missing_columns:
138+
timestamp = datetime.now().isoformat(timespec='minutes')
139+
index_name = f"idx_{table}_" + "_".join(missing_columns)
140+
print(
141+
f"Suggested Index for {table}: CREATE INDEX {index_name}_{timestamp} ON {table}({', '.join(missing_columns)});\n")
142+
except Error as e:
143+
print(f"Error suggesting missing indexes: {e}")
144+
145+
def main():
146+
"""database configurations"""
147+
db_config = {
148+
"host": "localhost",
149+
"port": "3309",
150+
"database": "classicmodels",
151+
"user": "root",
152+
"password": "1234",
153+
}
154+
"""database connection"""
155+
try:
156+
connection = mysql.connector.connect(**db_config)
157+
if connection.is_connected():
158+
print("Fetching top SELECT queries...")
159+
top_queries = fetch_top_queries(connection, db_config["database"])
160+
if not top_queries:
161+
print("No SELECT queries found.")
162+
return
81163

164+
for idx, query in enumerate(top_queries, 1):
165+
print(f"\n=== Query {idx} ===\n{query}\n")
166+
suggest_missing_indexes(connection, query)
82167

168+
except Error as e:
169+
print(f"Database error: {e}")
170+
finally:
171+
if connection.is_connected():
172+
connection.close()
173+
print("MySQL connection closed.")
83174

175+
if __name__ == "__main__":
176+
main()
84177

0 commit comments

Comments
 (0)