# Import & Load Internet access Survey

### Config

In [16]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [17]:
remote_path = 'http://pacha.datawheel.us/telecomunicaciones/encuesta_internet/'
local_path = '../data/'

### Imports

In [18]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load file

In [21]:
df = download_file(remote_path,local_path,'seventh_internet_access_survey_processed.csv')
df = df.rename(columns={'zona':'zone_id','region':'region_id','household_weights':'weights'});
df = df.astype({'year':'int','zone_id':'int','region_id':'int','home_access_id':'int','internet_plan_id':'int','desktop_access_id':'int','laptop_access_id':'int','tablet_access_id':'int','cellphone_access_id':'int','games_consoles_access_id':'int','tv_access_id':'int'})
list(df)

['year',
 'zone_id',
 'region_id',
 'home_access_id',
 'internet_plan_id',
 'desktop_access_id',
 'laptop_access_id',
 'tablet_access_id',
 'cellphone_access_id',
 'games_consoles_access_id',
 'tv_access_id',
 'weights']

### Ingest

In [24]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'environment', 'fact_internet_access_survey')

CREATE SCHEMA IF NOT EXISTS environment;
DROP TABLE IF EXISTS environment.fact_internet_access_survey;
CREATE TABLE "environment"."fact_internet_access_survey" (
"year" INTEGER,
  "zone_id" INTEGER,
  "region_id" INTEGER,
  "home_access_id" INTEGER,
  "internet_plan_id" INTEGER,
  "desktop_access_id" INTEGER,
  "laptop_access_id" INTEGER,
  "tablet_access_id" INTEGER,
  "cellphone_access_id" INTEGER,
  "games_consoles_access_id" INTEGER,
  "tv_access_id" INTEGER,
  "weights" REAL
)
COPY "environment"."fact_internet_access_survey" ("year","zone_id","region_id","home_access_id","internet_plan_id","desktop_access_id","laptop_access_id","tablet_access_id","cellphone_access_id","games_consoles_access_id","tv_access_id","weights") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Indexes & FK

In [25]:
engine.execute("""
CREATE INDEX fact_internet_access_survey_region_id 
ON environment.fact_internet_access_survey (region_id)
""")

engine.execute("""
ALTER TABLE environment.fact_internet_access_survey
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE environment.fact_internet_access_survey
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = environment.fact_internet_access_survey.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

<sqlalchemy.engine.result.ResultProxy at 0x1106a9c50>

### Related dim zone

In [7]:
d0 = download_file(remote_path,local_path,'zona_id.csv')
d0 = d0.rename(columns={'zona_id':'zone_id','zona':'zone'});
print (inline_table_xml(d0, 'zone', 'zone_id', 'zone'))


<InlineTable alias="zone">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">URBANO</Value>
      <Value column="es_description">URBANO</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">RURAL</Value>
      <Value column="es_description">RURAL</Value>
    </Row>
  </Rows>
</InlineTable>
    


### About *_access_id cols

List:
* home_access_id
* internet_plan_id
* desktop_access_id
* laptop_access_id
* tablet_access_id
* cellphone_access_id
* games_consoles_access_id
* tv_access_id

Every *_access_id column has the same options available, consider to create just one dimension in the mapping xml: access.

```xml
<InlineTable alias="access">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">NO</Value>
      <Value column="es_description">NO</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">YES</Value>
      <Value column="es_description">SI</Value>
    </Row>
  </Rows>
</InlineTable>
```

In [8]:
d1 = download_file(remote_path,local_path,'home_access_id.csv')
list(d1)
print (inline_table_xml(d1, 'home_access', 'home_access_id', 'home_access'))


<InlineTable alias="home_access">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">No</Value>
      <Value column="es_description">No</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Yes</Value>
      <Value column="es_description">Yes</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [9]:
d2 = download_file(remote_path,local_path,'internet_plan_id.csv')
print (inline_table_xml(d2, 'internet_plan', 'internet_plan_id', 'internet_plan'))


<InlineTable alias="internet_plan">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">No</Value>
      <Value column="es_description">No</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Yes</Value>
      <Value column="es_description">Yes</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [10]:
d3 = download_file(remote_path,local_path,'desktop_access_id.csv')
print (inline_table_xml(d3, 'desktop_access', 'desktop_access_id', 'desktop_access'))


<InlineTable alias="desktop_access">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">No</Value>
      <Value column="es_description">No</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Yes</Value>
      <Value column="es_description">Yes</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [11]:
d4 = download_file(remote_path,local_path,'laptop_access_id.csv')
print (inline_table_xml(d4, 'laptop_access', 'laptop_access_id', 'laptop_access'))


<InlineTable alias="laptop_access">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">No</Value>
      <Value column="es_description">No</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Yes</Value>
      <Value column="es_description">Yes</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [12]:
d5 = download_file(remote_path,local_path,'tablet_access_id.csv')
print (inline_table_xml(d5, 'tablet_access', 'tablet_access_id', 'tablet_access'))


<InlineTable alias="tablet_access">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">No</Value>
      <Value column="es_description">No</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Yes</Value>
      <Value column="es_description">Yes</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [13]:
d6 = download_file(remote_path,local_path,'cellphone_access_id.csv')
print (inline_table_xml(d6, 'cellphone_access', 'cellphone_access_id', 'cellphone_access'))


<InlineTable alias="cellphone_access">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">No</Value>
      <Value column="es_description">No</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Yes</Value>
      <Value column="es_description">Yes</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [14]:
d7 = download_file(remote_path,local_path,'games_consoles_access_id.csv')
print (inline_table_xml(d7, 'games_consoles_access', 'games_consoles_access_id', 'games_consoles_access'))


<InlineTable alias="games_consoles_access">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">No</Value>
      <Value column="es_description">No</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Yes</Value>
      <Value column="es_description">Yes</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [15]:
d8 = download_file(remote_path,local_path,'tv_access_id.csv')
print (inline_table_xml(d8, 'tv_access', 'tv_access_id', 'tv_access'))


<InlineTable alias="tv_access">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">No</Value>
      <Value column="es_description">No</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Yes</Value>
      <Value column="es_description">Yes</Value>
    </Row>
  </Rows>
</InlineTable>
    
