In [14]:
import re

def extract_sql_details(sql_query):
    # Extract table names from FROM and JOIN clauses
    table_pattern = r"FROM\s+([a-zA-Z0-9_]+)(?:\s+AS\s+\w+)?|JOIN\s+([a-zA-Z0-9_]+)(?:\s+AS\s+\w+)?"
    tables = re.findall(table_pattern, sql_query)
    tables = [table for table_tuple in tables for table in table_tuple if table]  # Flatten and filter out empty strings

    # Extract column names from SELECT, ON, WHERE, and ORDER BY clauses
    column_pattern = r"SELECT\s+(.*?)\s+FROM|ON\s+([\w\.]+)\s*=\s*([\w\.]+)|WHERE\s+([\w\.]+)\s*=\s*['\"]?(.*?)['\"]?|ORDER\s+BY\s+([\w\.]+)"
    columns = re.findall(column_pattern, sql_query)
    columns = [item for sublist in columns for item in sublist if item]  # Flatten and filter out empty strings

    # Extract JOIN clause
    join_pattern = r"(JOIN\s+[a-zA-Z0-9_]+\s+(?:AS\s+\w+\s+)?ON\s+[\w\.]+\s*=\s*[\w\.]+)"
    join_clause = re.findall(join_pattern, sql_query)

    # Extract encoded values from WHERE clause
    where_pattern = r"WHERE\s+([\w\.]+\s*=\s*['\"].*?['\"]|\d+)"
    encoded_values = re.findall(where_pattern, sql_query)

    # Output formatting
    print("Table Names:\n")
    for table in sorted(set(tables)):
        print(f"- {table}")

    print("\nColumn Names:\n")
    for column in sorted(set(columns)):
        print(f"- {column}")

    if join_clause:
        print("\nJoin Clause:\n")
        for join in join_clause:
            print(f"{join} (This joins the tables using the specified ON condition.)")

    if encoded_values:
        print("\nEncoded Values:\n")
        for value in encoded_values:
            print(f"- {value.strip()} (This filters the results to include only those rows where {value.split('=')[0].strip()} is equal to {value.split('=')[-1].strip()})")
    else:
        print("\nEncoded Values:\n- None")

# Example 1: With JOIN and WHERE clause
sql_query_with_join = """
SELECT ic.pcinstnm
FROM adm2022 a
JOIN ic2022campuses ic
ON a.UNITID = ic.UNITID
WHERE admcon3 = 1;
"""

# Example 2: With JOIN, WHERE, and ORDER BY clause
sql_query_ex3 = """
SELECT ic.pcinstnm
FROM ic2022campuses AS ic
JOIN gr2022 AS gr
ON ic.unitid = gr.unitid
WHERE ic.pcstabbr = 'TX'
ORDER BY gr.grtotlm DESC
LIMIT 1;
"""

# Test the function with both SQL queries
print("Example 1 Output:")
extract_sql_details(sql_query_with_join)
print("\n\nExample 2 Output:")
extract_sql_details(sql_query_ex3)


Example 1 Output:
Table Names:

- adm2022
- ic2022campuses

Column Names:

- a.UNITID
- admcon3
- ic.UNITID
- ic.pcinstnm

Encoded Values:
- None


Example 2 Output:
Table Names:

- gr2022
- ic2022campuses

Column Names:

- gr.grtotlm
- gr.unitid
- ic.pcinstnm
- ic.pcstabbr
- ic.unitid

Join Clause:

JOIN gr2022 AS gr 
ON ic.unitid = gr.unitid (This joins the tables using the specified ON condition.)

Encoded Values:

- ic.pcstabbr = 'TX' (This filters the results to include only those rows where ic.pcstabbr is equal to 'TX')


In [2]:
# Example SQL query
sql_query_ex3 = """
SELECT ic.pcinstnm
FROM ic2022campuses AS ic
JOIN gr2022 AS gr
ON ic.unitid = gr.unitid
WHERE ic.pcstabbr = 'TX'
ORDER BY gr.grtotlm DESC
LIMIT 1;
"""

extract_sql_details(sql_query_ex3)

Table Names:

- gr2022
- ic2022campuses

Column Names:

- gr.grtotlm
- gr.unitid
- ic.pcinstnm
- ic.pcstabbr
- ic.unitid

Join Clause:

JOIN gr2022 AS gr 
ON ic.unitid = gr.unitid (This joins the tables using the specified ON condition.)

Encoded Values:

- ic.pcstabbr = 'TX' (This filters the results to include only those rows where ic.pcstabbr is equal to 'TX')


In [3]:


# Example 1: With JOIN and WHERE clause
sql_query_with_join = """
SELECT ic.pcinstnm
FROM adm2022 a
JOIN ic2022campuses ic
ON a.UNITID = ic.UNITID
WHERE admcon3 = 1;
"""

# Test the function with different SQL queries
print("Example 1 Output:")
extract_sql_details(sql_query_with_join)


Example 1 Output:
Table Names:

- adm2022
- ic2022campuses

Column Names:

- a.UNITID
- admcon3
- ic.UNITID
- ic.pcinstnm

Encoded Values:
- None


In [13]:
# Example 2: Without JOIN, with WHERE clause
sql_query_without_join = """
SELECT webaddr
FROM hd2022
WHERE instnm = 'Boston University';
"""
print("\n\nExample 2 Output:")
extract_sql_details(sql_query_without_join)



Example 2 Output:
Table Names:

- hd2022

Column Names:

- instnm
- webaddr

Encoded Values:

- instnm = 'Boston University' (This filters the results to include only those rows where instnm is equal to 'Boston University')


In [15]:
import re

def extract_sql_details(sql_query):
    # Extract table names from FROM and JOIN clauses
    table_pattern = r"FROM\s+([a-zA-Z0-9_]+)(?:\s+AS\s+\w+)?|JOIN\s+([a-zA-Z0-9_]+)(?:\s+AS\s+\w+)?"
    tables = re.findall(table_pattern, sql_query)
    tables = [table for table_tuple in tables for table in table_tuple if table]  # Flatten and filter out empty strings

    # Extract column names from SELECT, ON, WHERE, and ORDER BY clauses
    column_pattern = r"SELECT\s+(.*?)\s+FROM|ON\s+([\w\.]+)\s*=\s*([\w\.]+)|WHERE\s+([\w\.]+)\s*=\s*['\"]?(.*?)['\"]?|ORDER\s+BY\s+([\w\.]+)"
    columns = re.findall(column_pattern, sql_query)
    columns = [item for sublist in columns for item in sublist if item]  # Flatten and filter out empty strings

    # Extract JOIN clause
    join_pattern = r"JOIN\s+[a-zA-Z0-9_]+\s+(?:AS\s+\w+\s+)?ON\s+[\w\.]+\s*=\s*[\w\.]+"
    join_clause = re.findall(join_pattern, sql_query)

    # Extract encoded values from WHERE clause
    where_pattern = r"WHERE\s+([\w\.]+\s*=\s*['\"].*?['\"]|\d+)"
    encoded_values = re.findall(where_pattern, sql_query)

    # Output formatting
    print("Table Names:\n")
    for table in sorted(set(tables)):
        print(f"- {table}")

    print("\nColumn Names:\n")
    for column in sorted(set(columns)):
        print(f"- {column}")

    if join_clause:
        print("\nJoin Clause:\n")
        for join in join_clause:
            print(f"{join} (This joins the tables using the specified ON condition.)")

    if encoded_values:
        print("\nEncoded Values:\n")
        for value in encoded_values:
            key_value = value.split('=')
            print(f"- {key_value[0].strip()} = {key_value[1].strip()} (This filters the results to include only those rows where {key_value[0].strip()} is equal to {key_value[1].strip()})")
    else:
        print("\nEncoded Values:\n- None")

# Example 1: With JOIN and WHERE clause
sql_query_with_join = """
SELECT ic.pcinstnm
FROM adm2022 a
JOIN ic2022campuses ic
ON a.UNITID = ic.UNITID
WHERE admcon3 = 1;
"""

# Example 2: With JOIN, WHERE, and ORDER BY clause
sql_query_ex3 = """
SELECT ic.pcinstnm
FROM ic2022campuses AS ic
JOIN gr2022 AS gr
ON ic.unitid = gr.unitid
WHERE ic.pcstabbr = 'TX'
ORDER BY gr.grtotlm DESC
LIMIT 1;
"""

# Example 3: Without JOIN, with WHERE clause
sql_query_without_join = """
SELECT webaddr
FROM hd2022
WHERE instnm = 'Boston University';
"""

# Test the function with the SQL queries
print("Example 1 Output:")
extract_sql_details(sql_query_with_join)
print("\n\nExample 2 Output:")
extract_sql_details(sql_query_ex3)
print("\n\nExample 3 Output:")
extract_sql_details(sql_query_without_join)


Example 1 Output:
Table Names:

- adm2022
- ic2022campuses

Column Names:

- a.UNITID
- admcon3
- ic.UNITID
- ic.pcinstnm

Encoded Values:
- None


Example 2 Output:
Table Names:

- gr2022
- ic2022campuses

Column Names:

- gr.grtotlm
- gr.unitid
- ic.pcinstnm
- ic.pcstabbr
- ic.unitid

Join Clause:

JOIN gr2022 AS gr 
ON ic.unitid = gr.unitid (This joins the tables using the specified ON condition.)

Encoded Values:

- ic.pcstabbr = 'TX' (This filters the results to include only those rows where ic.pcstabbr is equal to 'TX')


Example 3 Output:
Table Names:

- hd2022

Column Names:

- instnm
- webaddr

Encoded Values:

- instnm = 'Boston University' (This filters the results to include only those rows where instnm is equal to 'Boston University')
