In [1]:
import os
import zipfile
import pandas as pd
from datetime import datetime
from app.models.engine.app import db
from app.models import ElectionDate, ElectionRace, ElectionRaceCandidate, CountyElectionCandidateResult, CountyElectionResult, State, County, Precinct, PrecinctElectionResult, PrecinctElectionCandidateResult, PrecinctResult

DOWNLOAD_DIR = os.path.join(
    os.getcwd(), 'election-results-scraper/scraped_files')
SOURCE_DIR = os.path.join(os.getcwd(), 'data/source/')
COUNTY_RESULTS_DIR = os.path.join(SOURCE_DIR, 'county_election_results')
PRECINCT_RESULTS_DIR = os.path.join(SOURCE_DIR, 'precinct_election_results')
STATE_RESULTS_DIR = os.path.join(SOURCE_DIR, 'state_election_results')


def unzipCountyResults():
    countyZipFiles = [f for f in os.listdir(DOWNLOAD_DIR) if f.split('_')[
        1].startswith('County')]
    currentSourceFiles = [f for f in os.listdir(COUNTY_RESULTS_DIR)]
    countyZipFiles = [
        z for z in countyZipFiles if z.replace('_csv.zip', '.csv') not in currentSourceFiles]
    print(f'Unzipping {len(countyZipFiles)} county results files...')
    for f in countyZipFiles:
        with zipfile.ZipFile(os.path.join(DOWNLOAD_DIR, f), 'r') as z:
            z.extractall(COUNTY_RESULTS_DIR)


def unzipPrecinctResults():
    precinctZipFiles = [f for f in os.listdir(DOWNLOAD_DIR) if f.split('_')[
        1].startswith('Precinct')]
    currentSourceFiles = [f for f in os.listdir(PRECINCT_RESULTS_DIR)]
    precinctZipFiles = [
        z for z in precinctZipFiles if z.replace('_csv.zip', '.csv') not in currentSourceFiles]
    print(f'Unzipping {len(precinctZipFiles)} precinct results files...')
    for f in precinctZipFiles:
        with zipfile.ZipFile(os.path.join(DOWNLOAD_DIR, f), 'r') as z:
            z.extractall(PRECINCT_RESULTS_DIR)


def getCountyResultsDF():
    unzipCountyResults()
    source_files = [f for f in os.listdir(COUNTY_RESULTS_DIR)]
    df = pd.concat([pd.read_csv(os.path.join(COUNTY_RESULTS_DIR, f),
                                engine='python', delimiter=",", encoding='utf-8', index_col=False) for f in source_files])
    df['elec_date'] = df.apply(
        lambda r: datetime.strptime(r['elec_date'], '%m/%d/%Y'), axis=1)
    df['elec_date_id'] = df.apply(
        lambda r: int(r['elec_date'].strftime('%Y%m%d')), axis=1)
    return df


def getPrecinctResultsDF():
    unzipPrecinctResults()
    source_files = [f for f in os.listdir(PRECINCT_RESULTS_DIR)]
    df = pd.concat([pd.read_csv(os.path.join(PRECINCT_RESULTS_DIR, f),
                                engine='python', delimiter=",", encoding='utf-8', index_col=False) for f in source_files])
    df['elec_date'] = df.apply(
        lambda r: datetime.strptime(r['elec_date'], '%m/%d/%Y'), axis=1)
    df['elec_date_id'] = df.apply(
        lambda r: int(r['elec_date'].strftime('%Y%m%d')), axis=1)
    return df


def dropTable(table):
    connection = db.engine.raw_connection()
    cursor = connection.cursor()
    command = f"DROP TABLE IF EXISTS {table};"
    cursor.execute(command)
    connection.commit()
    cursor.close()


In [2]:
db.drop_all()
db.create_all()


In [3]:
ok = State()
ok.id = 40
ok.name = "Oklahoma"
ok.postal_code = "OK"
db.session.add(ok)
db.session.commit()

df_counties = pd.read_csv(os.path.join(SOURCE_DIR, 'counties/ok_counties.csv'))
df_precincts = pd.read_csv(os.path.join(
    SOURCE_DIR, 'precincts/ok_precincts.csv'))
df_precincts['county_id'] = df_precincts.apply(
    lambda r: df_counties[df_counties['id'] == r['county_id']].iloc[0]['geoid'], axis=1)
df_counties = df_counties.drop(columns=['id']).rename(columns={'geoid': 'id'})
County.load_dataframe(df_counties)
Precinct.load_dataframe(df_precincts)


County.load_dataframe: 77 rows
  Dropped DF columns: Unnamed: 0, total_precincts, geometry
Precinct.load_dataframe: 1948 rows
  Dropped DF columns: Unnamed: 0


{'rows': 1948}

In [4]:

SOURCE_DIR = os.path.join(os.getcwd(), 'data/source/state_election_results')
STAGING_ROOT = os.path.join(os.getcwd(), 'data/staging')

source_files = [f for f in os.listdir(SOURCE_DIR)]
df_source = pd.concat([pd.read_csv(os.path.join(SOURCE_DIR, f),
                                   engine='python', delimiter=",", encoding='utf-8', index_col=False) for f in source_files])

df_source.head()


Unnamed: 0,elec_date,entity_description,race_number,race_description,race_party,tot_race_prec,race_prec_reporting,cand_number,cand_name,cand_party,cand_absmail_votes,cand_early_votes,cand_elecday_votes,cand_tot_votes,race_county_owner
0,4/7/2015,"FEDERAL, STATE AND COUNTY",11001,FOR STATE SENATOR DISTRICT 11 (UNEXPIRED TERM),DEM,35.0,35.0,1,KEVIN MATTHEWS,DEM,88,73,1458,1619,
1,4/7/2015,"FEDERAL, STATE AND COUNTY",11001,FOR STATE SENATOR DISTRICT 11 (UNEXPIRED TERM),DEM,35.0,35.0,2,REGINA GOODWIN,DEM,73,95,1149,1317,
2,4/7/2015,"FEDERAL, STATE AND COUNTY",11001,FOR STATE SENATOR DISTRICT 11 (UNEXPIRED TERM),DEM,35.0,35.0,3,HEATHER NASH,DEM,17,12,457,486,
3,4/7/2015,BECKHAM COUNTY,30001,COUNTY QUESTION,,13.0,13.0,1,FOR THE PROPOSITION - YES,,41,34,1272,1347,BECKHAM
4,4/7/2015,BECKHAM COUNTY,30001,COUNTY QUESTION,,13.0,13.0,2,AGAINST THE PROPOSITION - NO,,15,15,919,949,BECKHAM


In [5]:
# import election_dates
df_dates = pd.read_csv(os.path.join(STAGING_ROOT, 'election_dates.csv'))
ElectionDate.load_dataframe(df_dates)

df_source['elec_date'] = df_source.apply(
    lambda r: datetime.strptime(r['elec_date'], '%m/%d/%Y'), axis=1)
df_source['elec_date_id'] = df_source.apply(
    lambda r: int(r['elec_date'].strftime('%Y%m%d')), axis=1)


ElectionDate.load_dataframe: 100 rows


In [6]:

# import election_races
df_source.to_sql('TEMP_df_source', con=db.engine, if_exists='replace')
q = """ 
    SELECT 
        race_number, 
        elec_date,
        elec_date_id,
        entity_description as race_entity_description,
        race_county_owner,
        race_description,
        race_party,
        tot_race_prec as race_tot_prec,	
        race_prec_reporting,
        SUM(cand_tot_votes) as race_tot_votes,
        SUM(cand_absmail_votes) as race_absmail_votes,
        SUM(cand_early_votes) as race_early_votes,	
        SUM(cand_elecday_votes) as race_elecday_votes,
        MAX(cand_number) as race_num_candidates
    FROM TEMP_df_source
        GROUP BY race_number, 
        entity_description,
        race_county_owner,
        elec_date, 
        elec_date_id,
        race_description,
        race_party,
        tot_race_prec,
        race_prec_reporting
     """
df_election_races = pd.read_sql(q, con=db.engine)

ElectionRace.load_dataframe(df_election_races)
connection = db.engine.raw_connection()
cursor = connection.cursor()
command = "DROP TABLE IF EXISTS TEMP_df_source;"
cursor.execute(command)
connection.commit()
cursor.close()

df_election_races = pd.read_sql(
    "SELECT id, race_number, elec_date_id, race_description FROM election_races", con=db.engine)

df_election_races.head()

ElectionRace.load_dataframe: 8384 rows
  Missing Table columns: id


Unnamed: 0,id,race_number,elec_date_id,race_description
0,1,6061,20130611,PROPOSITION BLAINE COUNTY EMS
1,2,6061,20140826,PROPOSITION OKEENE PUBLIC SCHOOLS
2,3,6141,20130402,PROPOSITION CITY OF NORMAN - PROPOSITION
3,4,6141,20140401,PROPOSITION CITY OF NORMAN - PROPOSITION
4,5,6241,20131008,PROPOSITION SPECIAL COUNTY


In [7]:
df_election_race_candidates = df_source[['elec_date_id', 'race_number', 'race_description', 'cand_number', 'cand_name', 'cand_party', 'cand_absmail_votes',
                                         'cand_early_votes', 'cand_elecday_votes', 'cand_tot_votes']]
df_election_race_candidates.to_sql(
    'TEMP_df_election_race_candidates', con=db.engine, if_exists='replace')
q = """
    SELECT 
    r.id as election_race_id,
    t.cand_number,
    t.cand_name,
    t.cand_party,
    case when m.winning_votes = t.cand_tot_votes then TRUE
        else FALSE end as cand_is_winner,
    t.cand_absmail_votes,
    t.cand_early_votes,
    t.cand_elecday_votes, 
    t.cand_tot_votes,
    CAST(t.cand_tot_votes as float)/r.race_tot_votes as percent_tot_votes,
    CAST(t.cand_absmail_votes as float)/NULLIF(r.race_absmail_votes, 0) as percent_absmail_votes,
    CAST(t.cand_elecday_votes as float)/NULLIF(r.race_elecday_votes, 0) as percent_elecday_votes,
    CAST(t.cand_early_votes as float)/NULLIF(r.race_early_votes, 0) as percent_early_votes
    FROM election_races as r
    INNER JOIN TEMP_df_election_race_candidates as t
    ON r.elec_date_id = t.elec_date_id
    and r.race_number = t.race_number
    and r.race_description = t.race_description
    INNER JOIN (
        SELECT
        i.elec_date_id,
        i.race_number,
        i.race_description,
        max(i.cand_tot_votes) as winning_votes
        FROM TEMP_df_election_race_candidates as i
        GROUP BY i.elec_date_id, i.race_number, i.race_description
    ) as m
    ON t.elec_date_id = m.elec_date_id
    and t.race_number = m.race_number
    and t.race_description = m.race_description
    """
df_election_race_candidates = pd.read_sql(q, con=db.engine)
connection = db.engine.raw_connection()
cursor = connection.cursor()
command = "DROP TABLE IF EXISTS TEMP_df_election_race_candidates;"
cursor.execute(command)
connection.commit()
cursor.close()
ElectionRaceCandidate.load_dataframe(df_election_race_candidates)


ElectionRaceCandidate.load_dataframe: 19231 rows
  Missing Table columns: id


{'rows': 19231}

In [8]:
df_county_results = getCountyResultsDF()
# import TEMP_df_county_results

df_county_results.to_sql('TEMP_df_county_results',
                         con=db.engine, if_exists='replace')


Unzipping 0 county results files...


In [9]:
# Load missing county-level results to election_results
q = """
    SELECT 
        t.race_number, 
        t.elec_date,
        t.elec_date_id,
        t.entity_description as race_entity_description,
        t.race_county_owner,
        t.race_description,
        t.race_party,
        t.tot_race_prec as race_tot_prec,	
        t.race_prec_reporting,
        SUM(t.cand_tot_votes) as race_tot_votes,
        SUM(t.cand_absmail_votes) as race_absmail_votes,
        SUM(t.cand_early_votes) as race_early_votes,	
        SUM(t.cand_elecday_votes) as race_elecday_votes,
        MAX(t.cand_number) as race_num_candidates
    FROM TEMP_df_county_results as t
    LEFT JOIN election_races as r
        on t.elec_date_id = r.elec_date_id
        and t.race_number = r.race_number
        and t.race_description like r.race_description
    WHERE r.id is null
    GROUP BY t.race_number, 
        t.elec_date,
        t.elec_date_id,
        t.entity_description,
        t.race_county_owner,
        t.race_description,
        t.race_party,
        t.tot_race_prec,	
        t.race_prec_reporting
    
     """
df_missing_election_races = pd.read_sql(q, con=db.engine)
ElectionRace.load_dataframe(df_missing_election_races)


ElectionRace.load_dataframe: 49 rows
  Missing Table columns: id


{'rows': 8433}

In [10]:
# Load missing county-level candidates to election_result_candidates
q = """
        SELECT 
        r.id as election_race_id,
        t.cand_number,
        t.cand_name,
        t.cand_party,
        r.race_tot_votes,
        r.race_elecday_votes,
        r.race_absmail_votes,
        r.race_early_votes,
        SUM(t.cand_absmail_votes) as cand_absmail_votes,
        SUM(t.cand_early_votes) as cand_early_votes,
        SUM(t.cand_elecday_votes) as cand_elecday_votes, 
        SUM(t.cand_tot_votes) as cand_tot_votes
        FROM TEMP_df_county_results as t
        INNER JOIN election_races as r
            ON t.elec_date_id = r.elec_date_id
            and t.race_number = r.race_number
            and t.race_description = r.race_description
        WHERE NOT EXISTS (
            SELECT * FROM election_race_candidates as c 
            WHERE c.election_race_id = r.id
            and c.cand_number = t.cand_number
        )
        GROUP BY r.id,
        r.race_tot_votes,
        r.race_elecday_votes,
        r.race_absmail_votes,
        r.race_early_votes,
        t.cand_number,
        t.cand_name,
        t.cand_party
"""
df_missing_election_candidates = pd.read_sql(q, con=db.engine)
df_missing_election_candidates.to_sql('TEMP_missing_candidates',
                                      con=db.engine, if_exists='replace')
q = """
    SELECT 
    t.election_race_id,
    t.cand_number,
    t.cand_name,
    t.cand_party,
    case when m.winning_votes = t.cand_tot_votes then TRUE
        else FALSE end as cand_is_winner,
    t.cand_absmail_votes,
    t.cand_early_votes,
    t.cand_elecday_votes, 
    t.cand_tot_votes,
    CAST(t.cand_tot_votes as float)/t.race_tot_votes as percent_tot_votes,
    CAST(t.cand_absmail_votes as float)/NULLIF(t.race_absmail_votes, 0) as percent_absmail_votes,
    CAST(t.cand_elecday_votes as float)/NULLIF(t.race_elecday_votes, 0) as percent_elecday_votes,
    CAST(t.cand_early_votes as float)/NULLIF(t.race_early_votes, 0) as percent_early_votes
    FROM TEMP_missing_candidates as t
    INNER JOIN (
        SELECT
        i.election_race_id,
        max(i.cand_tot_votes) as winning_votes
        FROM TEMP_missing_candidates as i
        GROUP BY i.election_race_id
    ) as m
    ON m.election_race_id = t.election_race_id
    """
df_missing_election_candidates = pd.read_sql(q, con=db.engine)
ElectionRaceCandidate.load_dataframe(df_missing_election_candidates)

ElectionRaceCandidate.load_dataframe: 98 rows
  Missing Table columns: id


{'rows': 19329}

In [11]:
q = """
    SELECT
        cn.id as county_id,
        r.id as election_race_id,
        SUM(t.cand_tot_votes) as county_tot_votes,
        SUM(t.cand_absmail_votes) as county_absmail_votes,
        SUM(t.cand_elecday_votes) as county_elecday_votes,
        SUM(t.cand_early_votes) as county_early_votes,
        CAST(SUM(t.cand_tot_votes) as float) / NULLIF(r.race_tot_votes, 0) as percent_tot_votes,
        CAST(SUM(t.cand_absmail_votes) as float) / NULLIF(r.race_absmail_votes, 0)  as percent_absmail_votes,
        CAST(SUM(t.cand_elecday_votes) as float) / NULLIF(r.race_elecday_votes, 0)  as percent_elecday_votes,
        CAST(SUM(t.cand_early_votes) as float) / NULLIF(r.race_early_votes, 0) as percent_early_votes
    FROM TEMP_df_county_results as t
    INNER JOIN election_races as r
        ON t.elec_date_id = r.elec_date_id
        and t.race_number = r.race_number
        and t.race_description = r.race_description
    INNER JOIN counties as cn
        on t.county = REPLACE(UPPER(cn.name), 'LE ', 'LE')
    GROUP BY cn.id,
        r.id,
        r.race_tot_votes,
        r.race_elecday_votes,
        r.race_absmail_votes,
        r.race_early_votes
"""
df_county_election_results = pd.read_sql(q, con=db.engine)
CountyElectionResult.load_dataframe(df_county_election_results)


CountyElectionResult.load_dataframe: 23538 rows
  Missing Table columns: id, county_tot_prec, county_prec_reporting


{'rows': 23538}

In [12]:

q = """
WITH results_cte
as
(
    SELECT
    cr.id as county_election_result_id
    , cand.id as election_race_candidate_id
    , t.cand_absmail_votes
    , t.cand_early_votes
    , t.cand_elecday_votes
    , t.cand_tot_votes
    , CAST(t.cand_tot_votes as float) / NULLIF(cr.county_tot_votes, 0) as percent_tot_votes
    , CAST(t.cand_absmail_votes as float) / NULLIF(cr.county_absmail_votes, 0) as percent_absmail_votes
    , CAST(t.cand_elecday_votes as float) / NULLIF(cr.county_elecday_votes, 0) as percent_elecday_votes
    , CAST(t.cand_early_votes as float) / NULLIF(cr.county_early_votes, 0) as percent_early_votes
    FROM TEMP_df_county_results as t
    INNER JOIN election_races as r
        ON t.elec_date_id = r.elec_date_id
        and t.race_number = r.race_number
        and t.race_description = r.race_description
    INNER JOIN counties as cn
        on t.county = REPLACE(UPPER(cn.name), 'LE ', 'LE')
    INNER JOIN county_election_results as cr
        ON r.id = cr.election_race_id
        and cn.id = cr.county_id
    INNER JOIN election_race_candidates as cand
        ON t.cand_number = cand.cand_number
        and r.id = cand.election_race_id
)
SELECT
    r.*
FROM results_cte as r
"""
df_county_election_candidate_results = pd.read_sql(q, con=db.engine)
df_county_election_candidate_results.to_sql(
    'TEMP_df_county_election_candidate_results', con=db.engine, if_exists='replace')
q = """
WITH winner_cte as (
    SELECT
        r.county_election_result_id
        , MAX(r.cand_tot_votes) as winning_votes
    FROM TEMP_df_county_election_candidate_results as r
    GROUP BY r.county_election_result_id
)
SELECT r.*,
    CASE
        when w.winning_votes = r.cand_tot_votes then TRUE
        else FALSE end as cand_is_winner
FROM TEMP_df_county_election_candidate_results as r
INNER JOIN winner_cte as w
    ON r.county_election_result_id = w.county_election_result_id
"""
df_county_election_candidate_results = pd.read_sql(q, con=db.engine)
CountyElectionCandidateResult.load_dataframe(
    df_county_election_candidate_results)


CountyElectionCandidateResult.load_dataframe: 58235 rows
  Dropped DF columns: index
  Missing Table columns: id


{'rows': 58235}

In [13]:
df_precinct_election_results = getPrecinctResultsDF()
print(f"Loaded {len(df_precinct_election_results.index)} rows for df_precinct_election_results")
df_precinct_election_results.to_sql(
    'TEMP_df_precinct_election_results', con=db.engine, if_exists='replace')

q = """
    SELECT DISTINCT
    t.precinct as id,
    0 as ok_district_id,
    t.precinct as precinct_num,
    0 as county_id,
    0 as SHAPE_Area,
    0 as SHAPE_center_lat,
    0 as SHAPE_center_lon
    FROM TEMP_df_precinct_election_results as t
    LEFT JOIN precincts as pct
        ON CAST(t.precinct as int) = pct.id
    WHERE pct.id is null
"""
df_missing_pcts = pd.read_sql(q, con=db.engine)
unknownCounty = County()
unknownCounty.id = 0
unknownCounty.name = 'unknown'
unknownCounty.state_id = 40
unknownCounty.SHAPE_Area = 0
unknownCounty.SHAPE_center_lat = 0
unknownCounty.SHAPE_center_lon = 0
db.session.add(unknownCounty)
db.session.commit()
Precinct.load_dataframe(df_missing_pcts)



Unzipping 0 precinct results files...
Loaded 1092862 rows for df_precinct_election_results
Precinct.load_dataframe: 16 rows
  Missing Table columns: county_number, ok_commissioner_district_id, ok_house_district_id, ok_senate_district_id, us_congressional_district_id


{'rows': 1964}

In [14]:
q = """
    SELECT
    cast(t.precinct as int) as precinct_id
    , r.id as election_race_id
    , SUM(t.cand_tot_votes) as precinct_tot_votes
    , SUM(t.cand_absmail_votes) as precinct_absmail_votes
    , SUM(t.cand_early_votes) as precinct_early_votes
    , SUM(t.cand_elecday_votes) as precinct_elecday_votes
    FROM TEMP_df_precinct_election_results as t
    INNER JOIN election_races as r
        ON t.elec_date_id = r.elec_date_id
        and t.race_number = r.race_number
        and t.race_description = r.race_description
    GROUP BY t.precinct, r.id
"""
df_precinct_election_results = pd.read_sql(q, con=db.engine)
PrecinctElectionResult.load_dataframe(df_precinct_election_results)



PrecinctElectionResult.load_dataframe: 419896 rows
  Missing Table columns: id


{'rows': 419896}

In [15]:
q = """
    SELECT
    cr.id as election_race_candidate_id
    , pr.id as precinct_election_result_id
    , r.id as election_race_id
    , t.cand_tot_votes
    , t.cand_absmail_votes
    , t.cand_early_votes
    , t.cand_elecday_votes
    , CAST(t.cand_tot_votes as float)/NULLIF(pr.precinct_tot_votes, 0) as percent_tot_votes
    , CAST(t.cand_absmail_votes as float)/NULLIF(pr.precinct_absmail_votes, 0) as percent_absmail_votes
    , CAST(t.cand_early_votes as float)/NULLIF(pr.precinct_early_votes, 0) as percent_early_votes
    , CAST(t.cand_elecday_votes as float)/NULLIF(pr.precinct_elecday_votes, 0) as percent_elecday_votes
    FROM TEMP_df_precinct_election_results as t
    INNER JOIN election_races as r
        ON t.elec_date_id = r.elec_date_id
        and t.race_number = r.race_number
        and t.race_description = r.race_description
    INNER JOIN election_race_candidates as cr
        ON r.id = cr.election_race_id
        and t.cand_number = cr.cand_number
    INNER JOIN precinct_election_results as pr
        ON r.id = pr.election_race_id
        and CAST(t.precinct as int) = pr.precinct_id
"""
df_precinct_candidate_results = pd.read_sql(q, con=db.engine)
df_precinct_candidate_results.to_sql(
    'TEMP_df_precinct_election_results', con=db.engine, if_exists='replace')
q = """
    SELECT
    t.*,
    case
        when w.winning_votes = t.cand_tot_votes then TRUE
        else FALSE end as cand_is_winner
    FROM TEMP_df_precinct_election_results as t
    INNER JOIN (
        SELECT
            i.precinct_election_result_id,
            MAX(i.cand_tot_votes) as winning_votes
        FROM TEMP_df_precinct_election_results as i
        GROUP BY i.precinct_election_result_id
    ) as w
    ON t.precinct_election_result_id = w.precinct_election_result_id
   
"""
df_precinct_candidate_results = pd.read_sql(q, con=db.engine)
PrecinctElectionCandidateResult.load_dataframe(df_precinct_candidate_results)


PrecinctElectionCandidateResult.load_dataframe: 1083476 rows
  Dropped DF columns: index, election_race_id
  Missing Table columns: id


{'rows': 1083476}

In [16]:
q = """
    SELECT
        pr.precinct_id
        , pr.election_race_id
        , pr.precinct_tot_votes
        , pr.precinct_absmail_votes
        , pr.precinct_early_votes
        , pr.precinct_elecday_votes
        , c.election_race_candidate_id
        , c.cand_is_winner
        , c.cand_absmail_votes
        , c.cand_early_votes
        , c.cand_elecday_votes
        , c.cand_tot_votes
    FROM precinct_election_results as pr
    INNER JOIN precinct_election_candidate_results as c
        ON pr.id = c.precinct_election_result_id
"""
df_precinct_results = pd.read_sql(q, con=db.engine)
PrecinctResult.load_dataframe(df_precinct_results)


PrecinctResult.load_dataframe: 1083476 rows
  Missing Table columns: id


{'rows': 1083476}

In [17]:



df_tables = pd.read_sql(
    "SELECT name FROM sqlite_master WHERE type='table';", con=db.engine)

# drop temp tables
for i, table in df_tables.iterrows():
    if table['name'].startswith('TEMP_'):
        dropTable(table['name'])

pd.read_sql(
    "SELECT name FROM sqlite_master WHERE type='table';", con=db.engine)


Unnamed: 0,name
0,election_dates
1,states
2,election_races
3,counties
4,election_race_candidates
5,precincts
6,county_election_results
7,county_election_candidate_results
8,precinct_election_results
9,precinct_results


In [18]:
dropTable('precinct_election_candidate_results')
dropTable('precinct_election_results')