In [1]:
import pandas as pd

In [2]:
# Load the Excel data
excel_file_path = 'ValidationReport.xlsx'
excel_data = pd.read_excel(excel_file_path, sheet_name=None)
excel_data

{'Validation':     SourceSchema   SourceTable
 0   AustinSchema   AustinTable
 1   DallasSchema   DallasTable
 2  HoustonSchema  HoustonTable
 3  SanJoseSchema  SanJoseTable,
 'AustinTable':           Name  Age      City
 0  AustinAlice   30    Austin
 1    AustinBob   25    Dallas
 2  AustinCarol   35   Houston
 3  AustinDavid   40  San Jose
 4    AustinEve   28    Austin,
 'DallasTable':           Name  Age      City
 0  DallasAlice   30    Austin
 1    DallasBob   25    Dallas
 2  DallasCarol   35   Houston
 3  DallasDavid   40  San Jose
 4    DallasEve   28    Austin,
 'HoustonTable':            Name  Age      City
 0  HoustonAlice   30    Austin
 1    HoustonBob   25    Dallas
 2  HoustonCarol   35   Houston
 3  HoustonDavid   40  San Jose
 4    HoustonEve   28    Austin,
 'SanJoseTable':            Name  Age      City
 0  SanJoseAlice   30    Austin
 1    SanJoseBob   25    Dallas
 2  SanJoseCarol   35   Houston
 3  SanJoseDavid   40  San Jose
 4    SanJoseEve   28    Austin}

In [3]:
# Extract SourceSchema and SourceTable from the 'Validation' sheet
validation_data = excel_data['Validation']
validation_data

Unnamed: 0,SourceSchema,SourceTable
0,AustinSchema,AustinTable
1,DallasSchema,DallasTable
2,HoustonSchema,HoustonTable
3,SanJoseSchema,SanJoseTable


In [4]:
validation_data['SourceSchema']

0     AustinSchema
1     DallasSchema
2    HoustonSchema
3    SanJoseSchema
Name: SourceSchema, dtype: object

In [5]:
source_schema = validation_data['SourceSchema'][0]
source_schema

'AustinSchema'

In [6]:
validation_data['SourceTable']

0     AustinTable
1     DallasTable
2    HoustonTable
3    SanJoseTable
Name: SourceTable, dtype: object

In [7]:
source_table = validation_data['SourceTable'][0]
source_table

'AustinTable'

In [8]:
# Generate the SELECT * statement using SourceSchema and SourceTable
select_statement = f"SELECT * FROM {source_schema}.{source_table}"
select_statement

'SELECT * FROM AustinSchema.AustinTable'

In [9]:
# Generate the WHERE clause using all columns
data_sheet_names = [sheet_name for sheet_name in excel_data.keys()
                    if sheet_name != 'Validation']
data_sheet_names

['AustinTable', 'DallasTable', 'HoustonTable', 'SanJoseTable']

In [10]:
where_conditions = []

for data_sheet_name in data_sheet_names:
    data = excel_data[data_sheet_name].iloc[1:, :] # Skip the header row
    for index, row in data.iterrows():
        conditions = []
        for column, value in row.items():
            if isinstance(value, str):
                condition = f"{column} = '{value}'"
            else:
                condition = f"{column} = {value}"
            conditions.append(condition)
        where_conditions.append(" AND ".join(conditions))

where_conditions

["Name = 'AustinBob' AND Age = 25 AND City = 'Dallas'",
 "Name = 'AustinCarol' AND Age = 35 AND City = 'Houston'",
 "Name = 'AustinDavid' AND Age = 40 AND City = 'San Jose'",
 "Name = 'AustinEve' AND Age = 28 AND City = 'Austin'",
 "Name = 'DallasBob' AND Age = 25 AND City = 'Dallas'",
 "Name = 'DallasCarol' AND Age = 35 AND City = 'Houston'",
 "Name = 'DallasDavid' AND Age = 40 AND City = 'San Jose'",
 "Name = 'DallasEve' AND Age = 28 AND City = 'Austin'",
 "Name = 'HoustonBob' AND Age = 25 AND City = 'Dallas'",
 "Name = 'HoustonCarol' AND Age = 35 AND City = 'Houston'",
 "Name = 'HoustonDavid' AND Age = 40 AND City = 'San Jose'",
 "Name = 'HoustonEve' AND Age = 28 AND City = 'Austin'",
 "Name = 'SanJoseBob' AND Age = 25 AND City = 'Dallas'",
 "Name = 'SanJoseCarol' AND Age = 35 AND City = 'Houston'",
 "Name = 'SanJoseDavid' AND Age = 40 AND City = 'San Jose'",
 "Name = 'SanJoseEve' AND Age = 28 AND City = 'Austin'"]

In [11]:
# Construct the final SQL query with SELECT statement and WHERE clause
final_query = f"""{select_statement} WHERE {' OR '.join(f'({condition})'
        for condition in where_conditions)}"""

print(final_query)

SELECT * FROM AustinSchema.AustinTable WHERE (Name = 'AustinBob' AND Age = 25 AND City = 'Dallas') OR (Name = 'AustinCarol' AND Age = 35 AND City = 'Houston') OR (Name = 'AustinDavid' AND Age = 40 AND City = 'San Jose') OR (Name = 'AustinEve' AND Age = 28 AND City = 'Austin') OR (Name = 'DallasBob' AND Age = 25 AND City = 'Dallas') OR (Name = 'DallasCarol' AND Age = 35 AND City = 'Houston') OR (Name = 'DallasDavid' AND Age = 40 AND City = 'San Jose') OR (Name = 'DallasEve' AND Age = 28 AND City = 'Austin') OR (Name = 'HoustonBob' AND Age = 25 AND City = 'Dallas') OR (Name = 'HoustonCarol' AND Age = 35 AND City = 'Houston') OR (Name = 'HoustonDavid' AND Age = 40 AND City = 'San Jose') OR (Name = 'HoustonEve' AND Age = 28 AND City = 'Austin') OR (Name = 'SanJoseBob' AND Age = 25 AND City = 'Dallas') OR (Name = 'SanJoseCarol' AND Age = 35 AND City = 'Houston') OR (Name = 'SanJoseDavid' AND Age = 40 AND City = 'San Jose') OR (Name = 'SanJoseEve' AND Age = 28 AND City = 'Austin')
