|
1 | | -import mysql.connector |
2 | 1 | import re |
3 | | -import os |
4 | | - |
5 | | -def extract_columns_from_filter(filter_text): |
6 | | - column_pattern = re.compile(r'([a-zA-Z_][\w]*)\.([a-zA-Z_][\w]*)') |
7 | | - matches = column_pattern.findall(filter_text) |
8 | | - return matches |
9 | | - |
10 | | -def calculate_cardinality(cursor, table_name, column_name): |
11 | | - query = "SELECT COUNT(DISTINCT {})/COUNT(*) AS cardinality FROM {}".format(column_name,table_name) |
12 | | - cursor.execute(query) |
13 | | - result = cursor.fetchone() |
14 | | - return result[0] if result else 0 |
15 | | - |
16 | | -def index_advisor(sql_query, database_ip, database_port, database_name, username, password ): |
17 | | - conn = mysql.connector.connect( |
18 | | - host= database_ip, |
19 | | - port= database_port, |
20 | | - user= username, |
21 | | - passwd= password, |
22 | | - database= database_name |
23 | | - ) |
24 | | - cursor=conn.cursor() |
25 | | - |
26 | | - def find_indexes(table_name): |
27 | | - query = "SHOW INDEX FROM {}".format(table_name) |
28 | | - cursor.execute(query) |
29 | | - result=cursor.fetchall() |
30 | | - indexes_list=[] |
31 | | - for comp in result: |
32 | | - counter=0 |
33 | | - for i in comp: |
34 | | - counter+=1 |
35 | | - if counter==5: |
36 | | - indexes_list.append(i) |
37 | | - set_indexes=list(set(indexes_list)) |
38 | | - return set_indexes |
39 | | - |
40 | | - filter_query = "EXPLAIN FORMAT=tree " + sql_query |
41 | | - cursor.execute(filter_query) |
42 | | - result = cursor.fetchall() |
43 | | - for row in result: |
44 | | - explain_output = row[0] |
45 | | - columns = extract_columns_from_filter(explain_output) |
46 | | - new_columns=[] |
47 | | - for t,c in columns: |
48 | | - t_name=t |
49 | | - new_columns.append(c) |
50 | | - index_table= find_indexes(t_name) |
51 | | - new_indexes=[] |
52 | | - for i in new_columns: |
53 | | - if not i in index_table: |
54 | | - new_indexes.append(i) |
55 | | - |
56 | | - indexed_columns_with_cardinality = [] |
57 | | - for i in new_indexes: |
58 | | - cardinality = calculate_cardinality(cursor, t_name, i) |
59 | | - indexed_columns_with_cardinality.append((i, cardinality)) |
60 | | - |
61 | | - |
62 | | - indexed_columns_with_cardinality.sort(key=lambda x: x[1], reverse=True) |
63 | | - |
64 | | - for column, cardinality in indexed_columns_with_cardinality: |
65 | | - hex_str = os.urandom(4).hex() |
66 | | - print("create index ix_auto_{} on {}({})".format(hex_str, t_name, column)) |
67 | | - |
68 | | - |
69 | | - conn.commit() |
70 | | - cursor.close() |
71 | | - conn.close() |
72 | | - |
73 | | -sql_query= input("SQL Query:") |
74 | | -database_ip = input("Database ip: ") |
75 | | -database_port= input("Database port: ") |
76 | | -database_name= input("Database name: ") |
77 | | -username= input("Username: ") |
78 | | -password= input("Password: ") |
79 | | - |
80 | | -index_advisor(sql_query, database_ip, database_port, database_name, username, password ) |
| 2 | +import mysql.connector |
| 3 | +from mysql.connector import Error |
| 4 | +from datetime import datetime |
| 5 | + |
| 6 | +def fetch_sample_value(connection, table, column): |
| 7 | + """Fetches a sample value from a given table and column.""" |
| 8 | + try: |
| 9 | + cursor = connection.cursor() |
| 10 | + sample_query = f"SELECT {column} FROM {table} WHERE {column} IS NOT NULL LIMIT 1" |
| 11 | + cursor.execute(sample_query) |
| 12 | + result = cursor.fetchone() |
| 13 | + return result[0] if result else None |
| 14 | + except Error as e: |
| 15 | + print(f"Error fetching sample value for {table}.{column}: {e}") |
| 16 | + return None |
| 17 | + |
| 18 | +def get_column_data_type(connection, table, column): |
| 19 | + """Retrieves the data type of a given column.""" |
| 20 | + try: |
| 21 | + cursor = connection.cursor() |
| 22 | + cursor.execute( |
| 23 | + """ |
| 24 | + SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS |
| 25 | + WHERE TABLE_NAME = %s AND COLUMN_NAME = %s |
| 26 | + """, (table, column)) |
| 27 | + result = cursor.fetchone() |
| 28 | + return result[0] if result else None |
| 29 | + except Error as e: |
| 30 | + print(f"Error fetching data type for {table}.{column}: {e}") |
| 31 | + return None |
| 32 | + |
| 33 | +def replace_query_placeholders(connection, query): |
| 34 | + """Replaces ? placeholders with actual values fetched from the database.""" |
| 35 | + try: |
| 36 | + query = query.replace("`", "").replace("'", "") |
| 37 | + query = re.sub(r"\s*\.\s*", ".", query) # Remove spaces around dots |
| 38 | + |
| 39 | + placeholders = query.count('?') |
| 40 | + if placeholders == 0: |
| 41 | + return query |
| 42 | + |
| 43 | + query_parts = query.split('?') |
| 44 | + final_query = query_parts[0] |
| 45 | + |
| 46 | + for i in range(1, len(query_parts)): |
| 47 | + prev_text = query_parts[i - 1].strip() |
| 48 | + |
| 49 | + if prev_text.endswith("LIMIT"): |
| 50 | + replacement = "10" |
| 51 | + else: |
| 52 | + match = re.search(r"([a-zA-Z0-9_]+)\s*=\s*$", prev_text) |
| 53 | + if match: |
| 54 | + column = match.group(1) |
| 55 | + table_match = re.search(r"FROM\s+([a-zA-Z0-9_]+)", query, re.IGNORECASE) |
| 56 | + if table_match: |
| 57 | + table = table_match.group(1) |
| 58 | + sample_value = fetch_sample_value(connection, table, column) |
| 59 | + data_type = get_column_data_type(connection, table, column) |
| 60 | + |
| 61 | + if sample_value is not None: |
| 62 | + if data_type in ["char", "varchar", "text"]: |
| 63 | + replacement = f"'{sample_value}'" |
| 64 | + else: |
| 65 | + replacement = str(sample_value) |
| 66 | + else: |
| 67 | + replacement = "IS NULL" |
| 68 | + else: |
| 69 | + replacement = "IS NULL" |
| 70 | + else: |
| 71 | + replacement = "IS NULL" |
| 72 | + |
| 73 | + final_query += replacement + query_parts[i] |
| 74 | + |
| 75 | + return final_query |
| 76 | + except Error as e: |
| 77 | + print(f"Error replacing query placeholders: {e}") |
| 78 | + return query |
| 79 | + |
| 80 | +def fetch_top_queries(connection, database_name): |
| 81 | + """Fetches the top 10 most time-consuming SELECT queries.""" |
| 82 | + query = """ |
| 83 | + SELECT DIGEST_TEXT |
| 84 | + FROM performance_schema.events_statements_summary_by_digest |
| 85 | + WHERE SCHEMA_NAME = %s |
| 86 | + AND UPPER(DIGEST_TEXT) LIKE 'SELECT%%' |
| 87 | + AND DIGEST_TEXT NOT LIKE '%%performance_schema%%' |
| 88 | + AND DIGEST_TEXT NOT LIKE '%%INFORMATION_SCHEMA%%' |
| 89 | + ORDER BY SUM_TIMER_WAIT DESC |
| 90 | + LIMIT 10; |
| 91 | + """ |
| 92 | + try: |
| 93 | + cursor = connection.cursor() |
| 94 | + cursor.execute(query, (database_name,)) |
| 95 | + return [replace_query_placeholders(connection, row[0]) for row in cursor.fetchall()] |
| 96 | + except Error as e: |
| 97 | + print(f"Error fetching top queries: {e}") |
| 98 | + return [] |
| 99 | + |
| 100 | +def is_column_indexed(connection, table, column): |
| 101 | + """Checks if a column is already indexed in the database.""" |
| 102 | + try: |
| 103 | + cursor = connection.cursor() |
| 104 | + query = """ |
| 105 | + SELECT COUNT(*) |
| 106 | + FROM INFORMATION_SCHEMA.STATISTICS |
| 107 | + WHERE TABLE_NAME = %s |
| 108 | + AND COLUMN_NAME = %s |
| 109 | + """ |
| 110 | + cursor.execute(query, (table, column)) |
| 111 | + return cursor.fetchone()[0] > 0 |
| 112 | + except Error as e: |
| 113 | + print(f"Index check error: {e}") |
| 114 | + return False |
| 115 | + |
| 116 | +def suggest_missing_indexes(connection, query): |
| 117 | + """Suggests indexes for missing columns in a query.""" |
| 118 | + try: |
| 119 | + alias_pattern = r"(?:FROM|JOIN)\s+(\w+)\s+(?:AS\s+)?(\w+)?" |
| 120 | + alias_matches = re.findall(alias_pattern, query, re.IGNORECASE) |
| 121 | + alias_mapping = {alias: table for table, alias in alias_matches if alias} |
| 122 | + alias_mapping.update({table: table for table, alias in alias_matches if not alias}) |
| 123 | + |
| 124 | + column_pattern = r"(\w+)\.(\w+)" |
| 125 | + column_references = re.findall(column_pattern, query) |
| 126 | + resolved_columns = [(alias_mapping.get(table, table), column) for table, column in column_references] |
| 127 | + |
| 128 | + table_column_map = {} |
| 129 | + for table, column in resolved_columns: |
| 130 | + if table not in table_column_map: |
| 131 | + table_column_map[table] = [] |
| 132 | + if column not in table_column_map[table]: # Avoid duplicate column entries |
| 133 | + table_column_map[table].append(column) |
| 134 | + |
| 135 | + for table, columns in table_column_map.items(): |
| 136 | + missing_columns = [col for col in columns if not is_column_indexed(connection, table, col)] |
| 137 | + if missing_columns: |
| 138 | + timestamp = datetime.now().isoformat(timespec='minutes') |
| 139 | + index_name = f"idx_{table}_" + "_".join(missing_columns) |
| 140 | + print( |
| 141 | + f"Suggested Index for {table}: CREATE INDEX {index_name}_{timestamp} ON {table}({', '.join(missing_columns)});\n") |
| 142 | + except Error as e: |
| 143 | + print(f"Error suggesting missing indexes: {e}") |
| 144 | + |
| 145 | +def main(): |
| 146 | + """database configurations""" |
| 147 | + db_config = { |
| 148 | + "host": "localhost", |
| 149 | + "port": "3309", |
| 150 | + "database": "classicmodels", |
| 151 | + "user": "root", |
| 152 | + "password": "1234", |
| 153 | + } |
| 154 | + """database connection""" |
| 155 | + try: |
| 156 | + connection = mysql.connector.connect(**db_config) |
| 157 | + if connection.is_connected(): |
| 158 | + print("Fetching top SELECT queries...") |
| 159 | + top_queries = fetch_top_queries(connection, db_config["database"]) |
| 160 | + if not top_queries: |
| 161 | + print("No SELECT queries found.") |
| 162 | + return |
81 | 163 |
|
| 164 | + for idx, query in enumerate(top_queries, 1): |
| 165 | + print(f"\n=== Query {idx} ===\n{query}\n") |
| 166 | + suggest_missing_indexes(connection, query) |
82 | 167 |
|
| 168 | + except Error as e: |
| 169 | + print(f"Database error: {e}") |
| 170 | + finally: |
| 171 | + if connection.is_connected(): |
| 172 | + connection.close() |
| 173 | + print("MySQL connection closed.") |
83 | 174 |
|
| 175 | +if __name__ == "__main__": |
| 176 | + main() |
84 | 177 |
|
0 commit comments