
# Jupyter Notebook: Setup for O&G Impact Analysis - PostGIS Database Initialization


In [None]:

import os
import psycopg2
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL

# --- Configuration (edit these as needed) ---
DB_NAME = "og_impact"
DB_USER = "postgres"
DB_PASSWORD = "your_password"
DB_HOST = "localhost"
DB_PORT = "5432"

# --- Create SQLAlchemy engine ---
connection_url = URL.create(
    "postgresql+psycopg2",
    username=DB_USER,
    password=DB_PASSWORD,
    host=DB_HOST,
    port=DB_PORT,
    database=DB_NAME,
)

engine = create_engine(connection_url, echo=True)

# --- Define Table Creation SQL ---
create_tables_sql = """
CREATE EXTENSION IF NOT EXISTS postgis;

CREATE TABLE IF NOT EXISTS counties (
    county_fips VARCHAR PRIMARY KEY,
    name TEXT,
    state TEXT,
    geom GEOMETRY(MULTIPOLYGON, 4326)
);

CREATE TABLE IF NOT EXISTS economic_indicators (
    county_fips VARCHAR REFERENCES counties(county_fips),
    year INTEGER,
    population INTEGER,
    median_income DOUBLE PRECISION,
    unemployment_rate DOUBLE PRECISION,
    net_migration INTEGER,
    personal_income DOUBLE PRECISION,
    PRIMARY KEY (county_fips, year)
);

CREATE TABLE IF NOT EXISTS qcew_employment (
    county_fips VARCHAR REFERENCES counties(county_fips),
    year INTEGER,
    naics_code VARCHAR,
    industry TEXT,
    employment_level INTEGER,
    wages DOUBLE PRECISION,
    PRIMARY KEY (county_fips, year, naics_code)
);

CREATE TABLE IF NOT EXISTS government_finance (
    county_fips VARCHAR REFERENCES counties(county_fips),
    year INTEGER,
    revenue_total DOUBLE PRECISION,
    expenditures_total DOUBLE PRECISION,
    impact_fees_collected DOUBLE PRECISION,
    severance_tax_collected DOUBLE PRECISION,
    PRIMARY KEY (county_fips, year)
);

CREATE TABLE IF NOT EXISTS environmental_data (
    county_fips VARCHAR REFERENCES counties(county_fips),
    year INTEGER,
    tri_chemical_releases DOUBLE PRECISION,
    aqs_pm25_avg DOUBLE PRECISION,
    camd_emissions DOUBLE PRECISION,
    nwis_surface_water_quality TEXT,
    fracfocus_well_count INTEGER,
    satellite_night_lights DOUBLE PRECISION,
    satellite_pm25 DOUBLE PRECISION,
    PRIMARY KEY (county_fips, year)
);

CREATE TABLE IF NOT EXISTS osha_injuries (
    county_fips VARCHAR REFERENCES counties(county_fips),
    year INTEGER,
    total_cases DOUBLE PRECISION,
    total_dafw DOUBLE PRECISION,  -- Days Away From Work
    total_djtr DOUBLE PRECISION,  -- Days of Job Transfer or Restriction
    PRIMARY KEY (county_fips, year)
);

-- Add index for faster queries
CREATE INDEX IF NOT EXISTS idx_osha_county_year ON osha_injuries (county_fips, year);

-- Add comment for documentation
COMMENT ON TABLE osha_injuries IS 'OSHA-reported injuries in oil and gas industry by county and year';
"""

# --- Execute SQL to create tables ---
with engine.begin() as conn:
    conn.execute(text(create_tables_sql))

print("PostGIS database and tables initialized successfully.")
