In [7]:
import luigi

In [12]:
import os
import csv
import json
import luigi
import time
import random
import requests
import psycopg2
from luigi.contrib.postgres import CopyToTable, PostgresTarget, PostgresQuery

  """)


In [13]:

class DeleteTableProducto(PostgresQuery):
    producto = luigi.Parameter()
    host = os.environ.get('DB_HOST', '0.0.0.0:5433')
    database = os.environ.get('DB_DATABASE', 'QQP')
    user = os.environ.get('DB_USER', 'QQP')
    password = os.environ.get('DB_PASSWORD', 'q1q2p')
    port = os.environ.get('DB_PORT', 5433)
    table = os.environ.get('DB_TABLE','PRODUCTO')
    update_id = str(int(round(time.time() * 1000) * random.random()))

    @property
    def query(self):
        return "DELETE FROM PRODUCTO;"


class DownloadProduct(luigi.Task):
    producto = luigi.Parameter()

    def requires(self):
        return DeleteTableProducto(self.producto)

    def run(self):
        page = 1
        must_continue = True
        list_product = []

        while must_continue:
            print("Peticion al API pagina: ", str(page))
            self.set_status_message("Peticion al API QQP, producto: {} pagina: {}".format(self.producto, str(page)))

            response = requests.get('https://api.datos.gob.mx/v1/profeco.precios', params={'producto': self.producto, 'page': str(page)})
            print("Respuesta del servidor", response.status_code)
            if response.status_code == 200:
                json_response = response.json().get('results', [])
                must_continue = len(json_response) > 0

                if must_continue:
                    list_product.extend(json_response)
                    page += 1

        if len(list_product) > 0:
            with self.output().open('w') as json_file:
                json.dump(list_product, json_file)

    def output(self):
        return luigi.LocalTarget('/tmp/qqp/{}/data.json'.format(self.producto))


class ConvertJSONToCSV(luigi.Task):
    producto = luigi.Parameter()

    def requires(self):
        return DownloadProduct(self.producto)

    def run(self):
        with self.input().open('r') as json_file:
            json_product = json.load(json_file)

        print(len(json_product))
        headers = json_product[0].keys()

        with open('/tmp/qqp/{0}/headers.csv'.format(self.producto), 'w+') as header_file:
            json.dump(list(headers), header_file)

        with self.output().open('w') as csv_file:
            writer = csv.writer(csv_file, delimiter='|', quotechar='"')

            for product in json_product:
                writer.writerow(list(product.values()))

    def output(self):
        return luigi.LocalTarget('/tmp/qqp/{0}/data.csv'.format(self.producto))


class InsertDataInDataBase(CopyToTable):
    producto = luigi.Parameter()
    host = os.environ.get('DB_HOST', '0.0.0.0:5433')
    database = os.environ.get('DB_DATABASE', 'QQP')
    user = os.environ.get('DB_USER', 'QQP')
    password = os.environ.get('DB_PASSWORD', 'q1q2p')
    port = os.environ.get('DB_PORT', 5433)
    table = os.environ.get('DB_TABLE','PRODUCTO')
    update_id = str(int(round(time.time() * 1000) * random.random()))
    column_separator = "|"

    @property
    def columns(self):
        with open('/tmp/qqp/{0}/headers.csv'.format(self.producto), 'r') as header_file:
            return json.load(header_file)

    def requires(self):
        return ConvertJSONToCSV(self.producto)


class DropAggTableIfExists(PostgresQuery):
    producto = luigi.Parameter()
    host = os.environ.get('DB_HOST', '0.0.0.0:5433')
    database = os.environ.get('DB_DATABASE', 'QQP')
    user = os.environ.get('DB_USER', 'QQP')
    password = os.environ.get('DB_PASSWORD', 'q1q2p')
    port = os.environ.get('DB_PORT', 5433)
    table = os.environ.get('DB_TABLE','PRODUCTO')
    update_id = str(int(round(time.time() * 1000) * random.random()))

    @property
    def query(self):
        return "DROP TABLE IF EXISTS agg_{0};".format(self.producto.lower().replace(' ', '_'))

    def requires(self):
        return InsertDataInDataBase(self.producto)


class AggretateByState(PostgresQuery):
    producto = luigi.Parameter()
    host = os.environ.get('DB_HOST', '0.0.0.0:5433')
    database = os.environ.get('DB_DATABASE', 'QQP')
    user = os.environ.get('DB_USER', 'QQP')
    password = os.environ.get('DB_PASSWORD', 'q1q2p')
    port = os.environ.get('DB_PORT', 5433)
    table = os.environ.get('DB_TABLE','PRODUCTO')
    update_id = str(int(round(time.time() * 1000) * random.random() ))

    @property
    def query(self):
        return "SELECT AVG(precio), cadenaComercial INTO agg_{0} FROM PRODUCTO GROUP BY cadenaComercial;".format(self.producto.lower().replace(' ', '_'))

    def requires(self):
        return DropAggTableIfExists(self.producto)


class StartPipeline(luigi.Task):
    producto = luigi.Parameter()

    def requires(self):
        return AggretateByState(self.producto)


In [1]:
import botocore