In [120]:
#!python3 -m pip install scipy
import time
import scipy
import openai
import re
import pandas as pd

from scipy.spatial.distance import cosine
from textwrap import dedent
from openai.api_requestor import error

In [116]:
def choose_table(tables, question):
    newline ='\n'
    
    prompt = dedent(f'''List at most three tables, separated by '|', needed to answer each question. 
        You have the following SQL tables of US Census data to choose from:
        
        {(newline).join(tables)}
        
        Question: `Where do people have more dial-up internet than other kinds?`
        Answer: acs_internet_access
        
        Question: `Which areas have the most kids?`
        Answer: acs_sex_by_age
        
        Question: `Where has the most rich people and people on Medicare?`
        Answer: acs_ratio_of_income_to_poverty_level|acs_medicare
        
        Question: `areas with long commutes`
        Answer: acs_commute_times
        
        Question: `{question}`
        Answer:''')
    
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.9,
        max_tokens=1000,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    
    tables = response.choices[0].text
    tables = re.split('\||\n', tables)
    tables = [s.strip() for s in tables]
    print(tables)
    return tables

def draft_query(query_tables, DDLs, question, df_similar, most_similar_indexes):
    prompt = dedent(f'''Convert text to SQL.
        
        You have the following tables and columns:
    ''')
    
    for i in range(len(query_tables)):
        prompt += dedent(f'''
        {query_tables[i]}: {DDLs[query_tables[i]]}
        ''')
        
    prompt += dedent(f'''
        Question: {df_similar.loc[most_similar_indexes[0]]['question']}
        SQL: {df_similar.loc[most_similar_indexes[0]]['query']}
        
        Question: {df_similar.loc[most_similar_indexes[1]]['question']}
        SQL: {df_similar.loc[most_similar_indexes[1]]['query']}
        
        Question: {df_similar.loc[most_similar_indexes[2]]['question']}
        SQL: {df_similar.loc[most_similar_indexes[2]]['query']}
        
        Question: {question}
        SQL:''')
        
    return prompt

def text_to_sql(tables, DDLs, question, question_embeddings, df_similar):
    while True:
        try:
            query_tables = choose_table(tables, question)
            
            #Remove tables it hallucinated
            query_tables = [t for t in query_tables if t in tables]
            
            new_question_embedding = generate_embeddings(question)
            most_similar_indexes = find_n_most_similar(new_question_embedding, question_embeddings, 3)
            prompt = draft_query(query_tables, DDLs, question, df_similar, most_similar_indexes)

            response = openai.Completion.create(
                engine="text-davinci-003",
                prompt=prompt,
                temperature=0.9,
                max_tokens=1000,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
            )

        except error.RateLimitError:
            print('RateLimitError')
            time.sleep(15)
            continue

        print(response['choices'][0]['text'])
        return response['choices'][0]['text']

def generate_embeddings(prompt):
    response = openai.Embedding.create(
        input=prompt,
        model="text-embedding-ada-002"
    )
    embedding = response['data'][0]['embedding']

    return embedding

def find_most_similar(new_question_embedding, question_embeddings):
    most_similar_index = None
    highest_similarity = -1

    for i, embedding in enumerate(question_embeddings):
        similarity = 1 - cosine(embedding, new_question_embedding)
        if similarity > highest_similarity:
            highest_similarity = similarity
            most_similar_index = i
    
    return most_similar_index

def find_n_most_similar(new_question_embedding, question_embeddings, n):
    similarities = []

    for i, embedding in enumerate(question_embeddings):
        similarities.append(1 - cosine(embedding, new_question_embedding))
        
    most_similar = sorted(similarities, reverse=True)[:n]
    
    return [i for i in range(len(similarities)) if similarities[i] in most_similar]



In [90]:
tables = [
    "acs_sex_by_age",
    "acs_race",
    "acs_hispanic",
    "acs_commute_times",
    "acs_employment_by_industry",
    "acs_commute_modes",
    "acs_education_subjects",
    "acs_housing_year_built",
    "acs_poverty_status",
    "acs_ratio_of_income_to_poverty_level",
    "acs_earners_in_household",
    "acs_housing",
    "acs_medicare",
    "acs_gross_rent_household_income_ratio",
    "acs_household_size_by_vehicles_available",
    "acs_internet_access",
    "acs_educational_attainment",
    "acs_hh_income"
]

DDLs = {
"acs_sex_by_age": "zcta VARCHAR, total_population REAL, num_male REAL, num_male_under_5_years REAL, num_male_5_to_9_years REAL, num_male_10_to_14_years REAL, num_male_15_to_17_years REAL, num_male_18_and_19_years REAL, num_male_20_years REAL, num_male_21_years REAL, num_male_22_to_24_years REAL, num_male_25_to_29_years REAL, num_male_30_to_34_years REAL, num_male_35_to_39_years REAL, num_male_40_to_44_years REAL, num_male_45_to_49_years REAL, num_male_50_to_54_years REAL, num_male_55_to_59_years REAL, num_male_60_and_61_years REAL, num_male_62_to_64_years REAL, num_male_65_and_66_years REAL, num_male_67_to_69_years REAL, num_male_70_to_74_years REAL, num_male_75_to_79_years REAL, num_male_80_to_84_years REAL, num_male_85_years_and_over REAL, num_female REAL, num_female_under_5_years REAL, num_female_5_to_9_years REAL, num_female_10_to_14_years REAL, num_female_15_to_17_years REAL, num_female_18_and_19_years REAL, num_female_20_years REAL, num_female_21_years REAL, num_female_22_to_24_years REAL, num_female_25_to_29_years REAL, num_female_30_to_34_years REAL, num_female_35_to_39_years REAL, num_female_40_to_44_years REAL, num_female_45_to_49_years REAL, num_female_50_to_54_years REAL, num_female_55_to_59_years REAL, num_female_60_and_61_years REAL, num_female_62_to_64_years REAL, num_female_65_and_66_years REAL, num_female_67_to_69_years REAL, num_female_70_to_74_years REAL, num_female_75_to_79_years REAL, num_female_80_to_84_years REAL, num_female_85_years_and_over REAL"
,"acs_race": "zcta VARCHAR, total_population REAL, num_white_alone REAL, num_black_or_african_american_alone REAL, num_american_indian_and_alaska_native_alone REAL, num_asian_alone REAL, num_native_hawaiian_and_other_pacific_islander_alone REAL, num_some_other_race_alone REAL, num_two_or_more_races REAL, num_two_or_more_races_two_races_including_some_other_race REAL, num_two_or_more_races_two_races_excluding_some_other_race_and_three_or_more_races REAL"
,"acs_hispanic": "zcta VARCHAR, total_population REAL, num_not_hispanic_or_latino REAL, num_not_hispanic_or_latino_white_alone REAL, num_not_hispanic_or_latino_black_or_african_american_alone REAL, num_not_hispanic_or_latino_american_indian_and_alaska_native_alone REAL, num_not_hispanic_or_latino_asian_alone REAL, num_not_hispanic_or_latino_native_hawaiian_and_other_pacific_islander_alone REAL, num_not_hispanic_or_latino_some_other_race_alone REAL, num_not_hispanic_or_latino_two_or_more_races REAL, num_not_hispanic_or_latino_two_or_more_races_two_races_including_some_other_race REAL, num_not_hispanic_or_latino_two_or_more_races_two_races_excluding_some_other_race_and_three_or_more_races REAL, num_hispanic_or_latino REAL, num_hispanic_or_latino_white_alone REAL, num_hispanic_or_latino_black_or_african_american_alone REAL, num_hispanic_or_latino_american_indian_and_alaska_native_alone REAL, num_hispanic_or_latino_asian_alone REAL, num_hispanic_or_latino_native_hawaiian_and_other_pacific_islander_alone REAL, num_hispanic_or_latino_some_other_race_alone REAL, num_hispanic_or_latino_two_or_more_races REAL, num_hispanic_or_latino_two_or_more_races_two_races_including_some_other_race REAL, num_hispanic_or_latino_two_or_more_races_two_races_excluding_some_other_race_and_three_or_more_races REAL"
,"acs_commute_times": "zcta VARCHAR, total_commuter_population REAL, num_less_than_5_minutes REAL, num_5_to_9_minutes REAL, num_10_to_14_minutes REAL, num_15_to_19_minutes REAL, num_20_to_24_minutes REAL, num_25_to_29_minutes REAL, num_30_to_34_minutes REAL, num_35_to_39_minutes REAL, num_40_to_44_minutes REAL, num_45_to_59_minutes REAL, num_60_to_89_minutes REAL, num_90_or_more_minutes REAL"
,"acs_employment_by_industry": "zcta VARCHAR, total_working_population REAL, num_agriculture_forestry_fishing_and_hunting_and_mining REAL, num_construction REAL, num_manufacturing REAL, num_transportation_and_warehousing_and_utilities REAL, num_information REAL, num_finance_and_insurance_and_real_estate_and_rental_and_leasing REAL, num_professional_scientific_and_management_and_administrative_and_waste_management_services REAL, num_educational_services_and_health_care_and_social_assistance REAL, num_arts_entertainment_and_recreation_and_accommodation_and_food_services REAL, num_other_services REAL, num_public_administration REAL, num_armed_forces REAL"
,"acs_commute_modes": "zcta VARCHAR, total_working_population REAL, num_drove_alone REAL, num_carpooled REAL, num_public_transportation REAL, num_walked REAL, num_taxicab_motorcycle_bicycle_or_other REAL, num_worked_from_home REAL"
,"acs_education_subjects": "zcta VARCHAR, total_num_bachelors_holders REAL, num_science_and_engineering_computers_mathematics_and_statistics REAL, num_science_and_engineering_biological_agricultural_and_environmental_sciences REAL, num_science_and_engineering_physical_and_related_sciences REAL, num_science_and_engineering_psychology REAL, num_science_and_engineering_social_sciences REAL, num_science_and_engineering_engineering REAL, num_science_and_engineering_multidisciplinary_studies REAL, num_science_and_engineering_related_fields REAL, num_business REAL, num_education REAL, num_arts_humanities_and_other_literature_and_languages REAL, num_arts_humanities_and_other_liberal_arts_and_history REAL, num_arts_humanities_and_other_visual_and_performing_arts REAL, num_arts_humanities_and_other_communications REAL, num_arts_humanities_and_other_other REAL"
,"acs_housing_year_built": "zcta VARCHAR, total_structures REAL, num_built_2020_or_later REAL, num_built_2010_to_2019 REAL, num_built_2000_to_2009 REAL, num_built_1990_to_1999 REAL, num_built_1980_to_1989 REAL, num_built_1970_to_1979 REAL, num_built_1960_to_1969 REAL, num_built_1950_to_1959 REAL, num_built_1940_to_1949 REAL, num_built_1939_or_earlier REAL"
,"acs_poverty_status": "zcta VARCHAR, total_households REAL, num_income_in_the_past_12_months_below_poverty_level REAL, num_income_in_the_past_12_months_below_poverty_level_under_6_years REAL, num_income_in_the_past_12_months_below_poverty_level_6_to_11_years REAL, num_income_in_the_past_12_months_below_poverty_level_12_to_17_years REAL, num_income_in_the_past_12_months_below_poverty_level_18_to_59_years REAL, num_income_in_the_past_12_months_below_poverty_level_60_to_74_years REAL, num_income_in_the_past_12_months_below_poverty_level_75_to_84_years REAL, num_income_in_the_past_12_months_below_poverty_level_85_years_and_over REAL, num_income_in_the_past_12_months_at_or_above_poverty_level REAL, num_income_in_the_past_12_months_at_or_above_poverty_level_under_6_years REAL, num_income_in_the_past_12_months_at_or_above_poverty_level_6_to_11_years REAL, num_income_in_the_past_12_months_at_or_above_poverty_level_12_to_17_years REAL, num_income_in_the_past_12_months_at_or_above_poverty_level_18_to_59_years REAL, num_income_in_the_past_12_months_at_or_above_poverty_level_60_to_74_years REAL, num_income_in_the_past_12_months_at_or_above_poverty_level_75_to_84_years REAL, num_income_in_the_past_12_months_at_or_above_poverty_level_85_years_and_over REAL"
,"acs_ratio_of_income_to_poverty_level": "zcta VARCHAR, total_households REAL, num_under_50_pct REAL, num_50_to_74_pct REAL, num_75_to_99_pct REAL, num_100_to_124_pct REAL, num_125_to_149_pct REAL, num_150_to_174_pct REAL, num_175_to_184_pct REAL, num_185_to_199_pct REAL, num_200_to_299_pct REAL, num_300_to_399_pct REAL, num_400_to_499_pct REAL, num_500_pct_and_over REAL"
,"acs_earners_in_household": "zcta VARCHAR, total_num_earners FLOAT, num_no_earners FLOAT, num_1_earner FLOAT, num_2_earners FLOAT, num_3_or_more_earners FLOAT"
,"acs_housing": "zcta TEXT, num_housing_units REAL, num_heated_by_utility_gas REAL, num_heated_by_bottled_tank_or_lp_gas REAL, num_heated_by_electricity REAL, num_heated_by_fuel_oil_kerosene_etc REAL, num_heated_by_coal_or_coke REAL, num_heated_by_wood REAL, num_heated_by_solar_energy REAL, num_heated_by_other_fuel REAL, num_heated_by_no_fuel_used REAL, num_owner_occupied REAL, num_renter_occupied REAL, num_complete_kitchen_facilities REAL, num_lacking_complete_kitchen_facilities REAL, aggregate_gross_rent REAL, aggregate_price_asked REAL, median_home_value REAL, num_housing_units_with_a_mortgage REAL, num_housing_units_with_a_mortgage_with_no_second_mortgage REAL, num_housing_units_without_a_mortgage REAL, total_not_charged_not_used_or_payment_included_in_other_fees REAL, total_charged_for_electricity REAL, total_charged_for_electricity_less_than_usd_50 REAL, total_charged_for_electricity_usd_50_to_usd_99 REAL, total_charged_for_electricity_usd_100_to_usd_149 REAL, total_charged_for_electricity_usd_150_to_usd_199 REAL, total_charged_for_electricity_usd_200_to_usd_249 REAL, total_charged_for_electricity_usd_250_or_more REAL, total_not_charged_or_payment_included_in_other_fees REAL, total_charged_for_water_and_sewer REAL, total_charged_for_water_and_sewer_less_than_usd_125 REAL, total_charged_for_water_and_sewer_usd_125_to_usd_249 REAL, total_charged_for_water_and_sewer_usd_250_to_usd_499 REAL, total_charged_for_water_and_sewer_usd_500_to_usd_749 REAL, total_charged_for_water_and_sewer_usd_750_to_usd_999 REAL, total_charged_for_water_and_sewer_usd_1000_or_more REAL, num_has_one_or_more_types_of_computing_devices REAL, num_has_one_or_more_types_of_computing_devices_desktop_or_laptop REAL, num_no_computer REAL"
,"acs_medicare": "zcta TEXT, total_population REAL, num_allocated_medicare REAL, num_not_allocated_medicare REAL"
,"acs_gross_rent_household_income_ratio": "zcta VARCHAR, num_hh_less_than_10pct REAL, num_hh_10pct_to_15pct REAL, num_hh_15pct_to_20pct REAL, num_hh_20pct_to_25pct REAL, num_hh_25pct_to_30pct REAL, num_hh_30pct_to_35pct REAL, num_hh_35pct_to_40pct REAL, num_hh_40pct_to_45pct REAL, num_hh_45pct_to_50pct REAL, num_hh_50pct_or_more REAL, num_hh_ratio_not_computed REAL"
,"acs_household_size_by_vehicles_available": "zcta STRING, num_hh REAL, num_hh_0_vehicles REAL, num_hh_1_vehicle REAL, num_hh_2_vehicles REAL, num_hh_3_vehicles REAL, num_hh_4_or_more_vehicles REAL, num_1_person_hh REAL, num_1_person_hh_0_vehicles REAL, num_1_person_hh_1_vehicles REAL, num_1_person_hh_2_vehicles REAL, num_1_person_hh_3_vehicles REAL, num_1_person_hh_4_or_more_vehicles REAL, num_2_person_hh REAL, num_2_person_hh_no_vehicle REAL, num_2_person_hh_1_vehicle REAL, num_2_person_hh_2_vehicles REAL, num_2_person_hh_3_vehicles REAL, num_2_person_hh_4_or_more_vehicles REAL, num_3_person_hh REAL, num_3_person_hh_no_vehicle REAL, num_3_person_hh_1_vehicle REAL, num_3_person_hh_2_vehicles REAL, num_3_person_hh_3_vehicles REAL, num_3_person_hh_4_or_more_vehicles REAL, num_4_or_more_person_hh REAL, num_4_or_more_person_hh_no_vehicle REAL, num_4_or_more_person_hh_1_vehicle REAL, num_4_or_more_person_hh_2_vehicles REAL, num_4_or_more_person_hh_3_vehicles REAL, num_4_or_more_person_hh_4_or_more_vehicles REAL"
,"acs_internet_access": "zcta TEXT, num_hh FLOAT, num_hh_with_internet FLOAT, num_hh_with_internet_via_dialup FLOAT, num_hh_with_internet_via_cable_fiber_optic_or_dsl FLOAT, num_hh_with_internet_via_satellite_internet FLOAT, num_hh_with_internet_via_other FLOAT, num_hh_with_no_internet FLOAT"
,"acs_educational_attainment": "zcta TEXT, num_pop_25_and_older FLOAT, num_less_than_high_school FLOAT, num_at_least_high_school FLOAT, num_at_least_some_college FLOAT, num_at_least_bachelors FLOAT, num_at_least_graduate_or_professional_degree FLOAT"
,"acs_hh_income": "zcta TEXT, num_hh FLOAT, num_hh_lt_10k FLOAT, num_hh_10k_to_15k FLOAT, num_hh_15k_to_19k FLOAT, num_hh_20k_to_24k FLOAT, num_hh_25k_to_29k FLOAT, num_hh_30k_to_34k FLOAT, num_hh_35k_to_39k FLOAT, num_hh_40k_to_44k FLOAT, num_hh_45k_to_49k FLOAT, num_hh_50k_to_59k FLOAT, num_hh_60k_to_74k FLOAT, num_hh_75k_to_99k FLOAT, num_hh_100k_to_124k FLOAT, num_hh_125k_to_149k FLOAT, num_hh_150k_to_199k FLOAT, num_hh_200k_or_more FLOAT, median_hh_income FLOAT"
}

In [113]:
df = pd.read_csv('questions.csv')
df.columns = ['question', '2', 'query', '4']
df = df[['question', 'query']]
df['new_query'] = ''

df

Unnamed: 0,question,query,new_query
0,"Approximately, what is the typical age of home...","SELECT zcta, (2020 * num_built_2020_or_later ...",
1,areas where the number of people exceeds the n...,"SELECT zcta, total_population FROM acs_sex_by_...",
2,areas where the number of people is less than ...,"SELECT zcta, total_population FROM acs_sex_by_...",
3,areas with long commutes,"SELECT zcta, (num_60_to_89_minutes + num_90_or...",
4,areas with the highest percentage of people wi...,"SELECT zcta, num_at_least_bachelors / num_pop_...",
...,...,...,...
138,zctas where the majority of people identify as...,"SELECT zcta, num_native_hawaiian_and_other_pac...",
139,zctas where the majority of people identify as...,"SELECT zcta, num_white_alone / total_populatio...",
140,zip code areas with the highest percentage of ...,"SELECT zcta, num_90_or_more_minutes / total_co...",
141,zip code areas with the highest percentage of ...,"SELECT zcta, (num_less_than_5_minutes + num_5_...",


In [114]:
question_embeddings = [generate_embeddings(q) for q in df['question']]

In [121]:
data = []
for idx, row in df.iterrows():
    print(idx)
    print(row['question'])
    
    if row['new_query'] != '':
        print('already completed')
        data.append(row['new_query'])
        continue
        
    new_query = text_to_sql(tables, DDLs, row['question'], question_embeddings, df)
    data.append(new_query)
    df.loc[idx,'new_query'] = new_query

df

0
Approximately, what is the typical age of homes in each ZIP code area?
already completed
1
areas where the number of people exceeds the national average
already completed
2
areas where the number of people is less than the national average
already completed
3
areas with long commutes
already completed
4
areas with the highest percentage of people with a bachelor's degree
already completed
5
areas with the highest percentage of people with a high school diploma
already completed
6
areas with the highest percentage of people with a master's degree
already completed
7
areas with the highest percentage of people with a professional degree
already completed
8
areas with the highest percentage of people with some college
already completed
9
areas with the highest percentage of people with some high school
already completed
10
areas with the most people
already completed
11
How common is it to have 3 or more income earners in a household?
already completed
12
How does the distribution of em

 SELECT zcta, num_manufacturing / total_working_population AS fraction_manufacturing FROM acs_employment_by_industry WHERE total_working_population > 0 ORDER BY fraction_manufacturing DESC
109
Where is the highest concentration of workers in the Professional, scientific, and technical services industry?
['acs_employment_by_industry', 'acs_race', 'acs_hispanic']
 SELECT zcta, num_professional_scientific_and_management_and_administrative_and_waste_management_services/total_working_population AS fraction_professional_service FROM acs_employment_by_industry WHERE total_working_population > 0 ORDER BY fraction_professional_service DESC;
110
Where is the highest concentration of workers in the Retail trade industry?
['acs_employment_by_industry', 'acs_housing']
 SELECT zcta, num_wholesale_trade / total_working_population AS fraction_wholesale_trade FROM acs_employment_by_industry WHERE total_working_population > 0 ORDER BY fraction_wholesale_trade DESC
111
Where is the highest concentration 

['acs_sex_by_age', 'acs_poverty_status', 'acs_medicare']
 SELECT zcta, (num_income_in_the_past_12_months_below_poverty_level + num_income_in_the_past_12_months_below_poverty_level_under_6_years + num_income_in_the_past_12_months_below_poverty_level_6_to_11_years + num_income_in_the_past_12_months_below_poverty_level_12_to_17_years + num_income_in_the_past_12_months_below_poverty_level_18_to_59_years + num_income_in_the_past_12_months_below_poverty_level_60_to_74_years + num_income_in_the_past_12_months_below_poverty_level_75_to_84_years + num_income_in_the_past_12_months_below_poverty_level_85_years_and_over) / total_households AS fraction_poverty FROM acs_poverty_status WHERE total_households > 0 ORDER BY fraction_poverty DESC
131
Which areas lack internet access?
['acs_internet_access']
 SELECT zcta, num_hh_with_no_internet / num_hh AS fraction_no_internet FROM acs_internet_access WHERE num_hh > 0 ORDER BY fraction_no_internet DESC
132
Which ZCTA has the largest population of people 

Unnamed: 0,question,query,new_query
0,"Approximately, what is the typical age of home...","SELECT zcta, (2020 * num_built_2020_or_later ...","SELECT zcta, (2020 * num_built_2020_or_later ..."
1,areas where the number of people exceeds the n...,"SELECT zcta, total_population FROM acs_sex_by_...","SELECT zcta, num_hh FROM acs_household_size_b..."
2,areas where the number of people is less than ...,"SELECT zcta, total_population FROM acs_sex_by_...","SELECT zcta, total_population FROM acs_sex_by..."
3,areas with long commutes,"SELECT zcta, (num_60_to_89_minutes + num_90_or...","SELECT zcta, (num_45_to_59_minutes + num_60_t..."
4,areas with the highest percentage of people wi...,"SELECT zcta, num_at_least_bachelors / num_pop_...","SELECT zcta, num_at_least_bachelors / num_pop..."
...,...,...,...
138,zctas where the majority of people identify as...,"SELECT zcta, num_native_hawaiian_and_other_pac...","SELECT zcta, num_native_hawaiian_and_other_pa..."
139,zctas where the majority of people identify as...,"SELECT zcta, num_white_alone / total_populatio...","SELECT zcta, num_not_hispanic_or_latino_white..."
140,zip code areas with the highest percentage of ...,"SELECT zcta, num_90_or_more_minutes / total_co...","SELECT zcta, num_90_or_more_minutes / total_c..."
141,zip code areas with the highest percentage of ...,"SELECT zcta, (num_less_than_5_minutes + num_5_...","SELECT zcta, (num_less_than_5_minutes + num_5..."


In [123]:
df.to_csv('questions_custom_prompts.csv')

In [84]:
# for i in range(df.shape[0]):
#     print(i)
#     print(df.iloc[i]['question'])
    
#     tmp = question_embeddings.copy()
#     tmp_df = df.copy().drop(i)
    
#     new_question_embedding = tmp[i]
#     del tmp[i]
#     print(tmp_df.iloc[find_most_similar(new_question_embedding, tmp)]['question'])

0
Approximately, what is the typical age of homes in each ZIP code area?
What is the distribution of homeownership across different ZIP code areas?
1
areas where the number of people exceeds the national average
areas where the number of people is less than the national average
2
areas where the number of people is less than the national average
areas where the number of people exceeds the national average
3
areas with long commutes
zip code areas with the highest percentage of residents with long commutes of 90 minutes or more
4
areas with the highest percentage of people with a bachelor's degree
Which areas have the highest percentage of people with a bachelor's degree?
5
areas with the highest percentage of people with a high school diploma
areas with the highest percentage of people with some high school
6
areas with the highest percentage of people with a master's degree
areas with the highest percentage of people with a bachelor's degree
7
areas with the highest percentage of peo

Where do people spend the most on electricity?
77
Where do people still use dialup internet?
Which areas lack internet access?
78
Where do people work in the educational services industry
Where am I likely to find the most people working in the Educational services industry?
79
Where do renters exceed homeowners?
Where do homeowners exceed renters?
80
Where do the agriculture majors live?
Where do the education majors live?
81
Where do the arts majors live?
Where do the humanities majors live?
82
Where do the business degree holders live that have at least 100 people?
Where do the business majors live?
83
Where do the business majors live?
Where do the education majors live?
84
Where do the computer science majors live?
Where do the social sciences majors live?
85
Where do the education majors live?
Where do the business majors live?
86
Where do the engineering majors live?
Where do the education majors live?
87
Where do the health majors live?
Where do the education majors live?
88
Wh