# Import & Load NENE data

### Config

In [14]:
import sys, os
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))


sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [9]:
remote_path = 'http://pacha.datawheel.us/datachile/economy/nene/analysis/2_occupational_status/'
local_path = '../data/'


### Imports

In [11]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load file

In [None]:
download_zip_file(remote_path,
                  local_path,
                  "occupational_status.zip")

extract_zip_file(local_path, "temp.zip")

In [21]:
facts_nene = pd.read_csv(os.path.join(local_path, "analysis", "2_occupational_status", "occupational_status.csv"),
                         low_memory=True)

In [19]:
facts_nene.dtypes

year                               int64
moving_quarter                    object
region_id                          int64
age                                int64
age_range_id                       int64
sex_id                             int64
icse_id                            int64
isco_id                          float64
isic_rev3_id                     float64
isic_cl_caenes_id                float64
isced_id                         float64
occupied_id                        int64
general_economic_condition_id      int64
weight                           float64
dtype: object

In [24]:
#facts_nene.astype({'year':'int','region_id':'int','age':'int','age_range_id':'int','icse_id':'int','sex_id':'int','isced_id':'int','isco_id':'int','occupied_id':'int','icse_id':'int','general_economic_condition_id':'int', 'isic_rev3_id': 'int', 'isic_cl_caenes_id': 'int'})
facts_nene.rename(columns={'general_economic_condition_id': 'general_economic_condition', 'weight': 'weights'}, inplace=True)

### Ingest

In [27]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(facts_nene, 'economy', 'fact_occupational_status_nene_mobile_quarters_new')

DROP TABLE IF EXISTS economy.fact_occupational_status_nene_mobile_quarters_new;
CREATE TABLE "economy"."fact_occupational_status_nene_mobile_quarters_new" (
"year" INTEGER,
  "moving_quarter" TEXT,
  "region_id" INTEGER,
  "age" INTEGER,
  "age_range_id" INTEGER,
  "sex_id" INTEGER,
  "icse_id" INTEGER,
  "isco_id" REAL,
  "isic_rev3_id" REAL,
  "isic_cl_caenes_id" REAL,
  "isced_id" REAL,
  "occupied_id" INTEGER,
  "general_economic_condition" INTEGER,
  "weights" REAL
)
COPY "economy"."fact_occupational_status_nene_mobile_quarters_new" ("year","moving_quarter","region_id","age","age_range_id","sex_id","icse_id","isco_id","isic_rev3_id","isic_cl_caenes_id","isced_id","occupied_id","general_economic_condition","weights") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Indexes & FK

In [None]:
engine.execute("""
UPDATE economy.fact_occupational_status_nene_mobile_quarters_new
SET moving_quarter = concat(year,'_',moving_quarter)
""")

In [33]:
engine.execute("""
CREATE INDEX fact_occupational_status_nene_mobile_quarters_new_region_id 
ON economy.fact_occupational_status_nene_mobile_quarters_new (region_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x7fe71d262518>

### Related dim

In [None]:
d1 = download_file(remote_path,local_path,'age_range_id.csv')
print (inline_table_xml(d1, 'age_range', 'age_range_id', 'age_range'))

In [None]:
d2 = download_file(remote_path,local_path,'icse_id.csv')
print (inline_table_xml(d2, 'icse', 'icse_id', 'icse'))

In [None]:
d3 = download_file(remote_path,local_path,'isced_id.csv')
print (inline_table_xml(d3, 'isced', 'isced_id', 'isced'))

In [None]:
d4 = download_file(remote_path,local_path,'isco_id.csv')
print (inline_table_xml(d4, 'isco', 'isco_id', 'isco'))

In [None]:
d5 = download_file(remote_path,local_path,'sex_id.csv')
print (inline_table_xml(d5, 'sex', 'sex_id', 'sex'))

In [None]:
d6 = download_file(remote_path,local_path,'general_economic_condition_id.csv')
print (inline_table_xml(d6, 'general_economic_condition', 'general_economic_condition_id', 'general_economic_condition'))

In [None]:
d7 = download_file(remote_path,local_path,'occupied_id.csv')
print (inline_table_xml(d7, 'occupied', 'occupied_id', 'occupied'))