In [1]:
import pandas as pd
import sqlalchemy as db

engine = db.create_engine('sqlite:///../data/RawHealthData.db')
connection = engine.connect()

In [7]:
from collections import defaultdict

table_names = [
    'average_price_of_a_pack_of_cigarettes',
    'comparing_the_share_of_men_and_women_who_are_smoking',
    'consumption_per_smoker_per_day',
    'consumption_per_smoker_per_day_bounds',
    'daily_smoking_prevalence_bounds',
    'enforcement_of_bans_on_tobacco_advertising',
    'number_of_deaths_from_secondhand_smoke',
    'number_of_deaths_from_tobacco_smoking',
    'number_of_total_daily_smokers',
    'sales_of_cigarettes_per_adult_per_day',
    'secondhand_smoke_deaths_by_age'
]

In [8]:
select_map = defaultdict(list)
for table in table_names:
    header_df = pd.read_sql_query("SELECT * FROM {} LIMIT 1".format(table), con=engine) # get first row to read column names
    select_map[table].extend(header_df.columns[4:])

In [9]:
alias_map = {}
alphabet = list('abcdefghijklmnoprstuvwxyz')
for table in select_map.keys():
    alias_map[table] = alphabet.pop()

In [10]:
sql_select = "SELECT z.[Code],\n\tz.[Year]," 
sql_join = ""
last_alias = None # not last alias that exist, but last alias that has been used
for idx, (table, columns) in enumerate(select_map.items()):
    alias = alias_map[table]
    
    # build the select part
    sql_select += ', '.join(["\n\t{}.[{}]".format(alias, column) for column in columns]) + ','
        
    # build the from/join part
    if last_alias: # INNER JOIN
        sql_join += "\nINNER JOIN {table} {alias} \
        \n\tON {alias}.[Code] = {last_alias}.[Code] AND {alias}.[Year] = {last_alias}.[Year]".format(table=table, \
                                                                                                 alias=alias, \
                                                                                                 last_alias=last_alias)
    else: # FROM
        sql_join = "\nFROM {} {}".format(table, alias)
    last_alias = alias
sql = sql_select[:-1] + sql_join # [:-1] to get rid of last commata
print(sql)

SELECT z.[Code],
	z.[Year],
	z.[Indicator:Average -  cigarette price in international dollars (],
	y.[Share of women (% of women)], 
	y.[Share of men (% of men)], 
	y.[Unnamed: 5],
	x.[Cigarette consumption per smoker per day (cigarettes)],
	w.[Estimated daily consumption (cigarettes)], 
	w.[Upper bound (cigarettes)], 
	w.[Lower bound (cigarettes)],
	u.[Indicator:Enforce bans on tobacco advertising],
	t.[Tobacco smoking]
FROM average_price_of_a_pack_of_cigarettes z
INNER JOIN comparing_the_share_of_men_and_women_who_are_smoking y         
	ON y.[Code] = z.[Code] AND y.[Year] = z.[Year]
INNER JOIN consumption_per_smoker_per_day x         
	ON x.[Code] = y.[Code] AND x.[Year] = y.[Year]
INNER JOIN consumption_per_smoker_per_day_bounds w         
	ON w.[Code] = x.[Code] AND w.[Year] = x.[Year]
INNER JOIN daily_smoking_prevalence_bounds v         
	ON v.[Code] = w.[Code] AND v.[Year] = w.[Year]
INNER JOIN enforcement_of_bans_on_tobacco_advertising u         
	ON u.[Code] = v.[Code] AND u.[

In [11]:
df = pd.read_sql_query(sql, con=engine)
df.shape

(0, 15)