#### Zip Codes import

In [1]:
import json
import os
import sqlite3
import sys
from datetime import datetime

import logzero
import numpy as np
import pandas as pd
import yaml
from logzero import logger

In [2]:
log_path = "logs/"
log_file = "zip_code_import.log"

logzero.logfile(log_path + log_file, maxBytes=1e6, backupCount=5, disableStderrLogger=True)
logger.info(f"{log_path}, {log_file}")

In [4]:
sys.path.append("../data/sql")
import queries

In [5]:
logger.info(sys.path)

In [6]:
try:
    with open("../source/config.yml", "r") as config_in:
        configs = yaml.load(config_in, Loader=yaml.SafeLoader)
        logger.info(configs)
except:
    logger.error(f"config file open failure.")
    exit(1)

data_path = configs["file_locations"]["data_path_usc"]
data_file = configs["file_locations"]["data_file_usc"]

db_path = configs["file_locations"]["db_path"]
db_file = configs["file_locations"]["db_file"]

In [7]:
logger.info(f"{data_path}, {data_file}")
logger.info(f"{db_path}, {db_file}")

In [8]:
original = ["GEOID", "ALAND", "AWATER", "ALAND_SQMI", "AWATER_SQMI", "INTPTLAT", "INTPTLONG"]
header = [
    "ZIPCODE",
    "LAND_AREA_MSQ",
    "WATER_AREA_MSQ",
    "LAND_AREA_SQMI",
    "WATER_AREA_SQMI",
    "LAT_ZC",
    "LON_ZC",
]

names = [x.lower() for x in header]

logger.info("Dataframe and db column names")
logger.info(names)

In [9]:
dtypes = {
    names[0]: object,
    names[1]: np.float64,
    names[2]: np.float64,
    names[3]: np.float64,
    names[4]: np.float64,
    names[5]: np.float64,
    names[6]: np.float64,
}

In [10]:
try:
    df_raw = pd.read_csv(data_path + data_file, sep="\t", dtype=dtypes, names=names, header=0)
    logger.info("CSV file successfully read.")
except:
    logger.error("error reading CSV file.")

In [11]:
df_raw

Unnamed: 0,zipcode,land_area_msq,water_area_msq,land_area_sqmi,water_area_sqmi,lat_zc,lon_zc
0,00601,1.666597e+08,799292.0,64.348,0.309,18.180555,-66.749961
1,00602,7.930754e+07,4428428.0,30.621,1.710,18.361945,-67.175597
2,00603,8.188720e+07,181412.0,31.617,0.070,18.455183,-67.119887
3,00606,1.095800e+08,12487.0,42.309,0.005,18.158327,-66.932928
4,00610,9.301343e+07,4172059.0,35.913,1.611,18.294032,-67.127156
...,...,...,...,...,...,...,...
33139,99923,4.249520e+07,2117.0,16.407,0.001,56.000518,-130.037474
33140,99925,1.440710e+08,34333408.0,55.626,13.256,55.550203,-132.945947
33141,99926,3.439446e+08,292859017.0,132.798,113.074,55.138352,-131.470425
33142,99927,5.896501e+08,18041593.0,227.665,6.966,56.239062,-133.457924


In [12]:
# establish db connection and cursor
conn = sqlite3.connect(db_path + db_file)
cursor = conn.cursor()

In [13]:
cursor.execute(queries.create_table_geo_zipcodes)
conn.commit()
cursor.execute(queries.create_table_nsrdb)
conn.commit()

In [14]:
df_raw.to_sql(
    "geo_zipcodes",
    conn,
    if_exists="append",
    index=False,
    method="multi",
)
conn.commit()

In [15]:
conn.close()