In [1]:
from __future__ import print_function
from __future__ import unicode_literals
import yaml
import json
import io
import procesamiento_microdatos as pm
import pandas as pd
import unicodecsv as csv
import openpyxl as pyxl
from collections import OrderedDict

In [2]:
panel = pm.generate_panel()

In [3]:
with open("grupos.yaml") as groups_file:
    grupos = yaml.load(groups_file)
    
with open("alias.yaml") as alias_file:
    alias = yaml.load(alias_file)
    
agrupamientos_usos = {
    "Otros Conceptos de Oferta": ['Variación de Stock', 'Búnker', 'No Aprovechado', 'Ajustes'],
    "Oferta Interna": ["Producción", "Importación", "Exportación", "Pérdidas", 'Variación de Stock', 'Búnker', 'No Aprovechado', 'Ajustes'],
    "Centrales Eléctricas": ['Servicio Público', 'Autoproducción'],
    "Otros Centros de Transformación": ['Aceiteras y Destilerías', 'Coquerías', 'Carboneras', 'Altos Hornos'],
    "Centros de Transformación": ['Servicio Público', 'Autoproducción', "Plantas de Tratamiento de Gas", "Refinerías", 'Aceiteras y Destilerías', 'Coquerías', 'Carboneras', 'Altos Hornos'],
    "Consumo Final": ["Consumo No Energético", "Residencial", "Transporte", "Comercial", "Industria", "Agropecuario"],
    "Consumo": ["Consumo Propio", "Consumo No Energético", "Residencial", "Transporte", "Comercial", "Industria", "Agropecuario"]
}

agrupamientos_energias = {
    "Energías Primarias": ["Energía Hidráulica", "Energía Nuclear", "Gas Natural de Pozo", "Petróleo", "Energía Eólica", "Energía Solar", "Carbón Mineral", "Alcohol Vegetal", "Aceites Vegetales", "Leña", "Bagazo", "Otros Primarios"],
    "Otras Fuentes Primarias de Energía": ["Energía Eólica", "Energía Solar", "Carbón Mineral", "Alcohol Vegetal", "Aceites Vegetales", "Leña", "Bagazo", "Otros Primarios"],
    "Aceites y Alcoholes Vegetales": ["Alcohol Vegetal", "Aceites Vegetales"],
    "Leña, Bagazo y Otros": ["Leña", "Bagazo", "Otros Primarios"],
    "Energías Secundarias":["Energía Eléctrica", "Gas Distribuido por Redes", "Gas Licuado", "Gas Oil", "Fuel Oil", "Motonaftas", "Biodiesel","Bioetanol", "Gas de Alto Horno", "No Energético de Carbón", "Carbón Vegetal", "Carbón Residual", "Coque de Petróleo", "Gas de Refinería", "Kerosene", "No Energético", "Otras Naftas", "Coque de Carbón", "Gas de Coquería",  "Etano", "Gasolina"],
    "Derivados del Petróleo": ["Gas Oil", "Fuel Oil", "Motonaftas"],
    "Biocombustibles": ["Biodiesel","Bioetanol"],
    "Otras Fuentes Secundarias de Energía": ["Gas de Alto Horno", "No Energético de Carbón", "Carbón Vegetal", "Carbón Residual", "Coque de Petróleo", "Gas de Refinería", "Kerosene", "No Energético", "Otras Naftas", "Coque de Carbón", "Gas de Coquería",  "Etano", "Gasolina"],
    "Energías Secundarias del Petróleo": ["Carbón Residual", "Coque de Petróleo", "Gas de Refinería", "Kerosene", "No Energético", "Otras Naftas"],
    "Energías Secundarias del Carbón": ["Coque de Carbón", "Gas de Coquería"],
    "Energías Secundarias del Gas": ["Etano", "Gasolina"]

}

In [4]:
def flatten(tree, flat_list=[]):
    """Toma una jerarquía de nodos (un 'arbol') y devuelve una lista plana con todos los nodos.
    
    Un arbol es una lista, que en cada posición tiene:
        - una string (un nodo 'base', sin hijos), o
        - un diccionario de una única clave (un nodo con hijos), donde
            - la clave es el nombre del nodo, y
            - el valor es una lista con los nodos hijos.
    """
    for node in tree:
        if isinstance(node, (unicode, str)):
            flat_list.append(node)
        else:
            for key in node:
                flat_list.append(key)
                flatten(node[key], flat_list)
    return flat_list


def unalias(df, alias=alias):
    """Usa el dict 'alias' para dar nombres completos al índice y las columnas de un DataFrame"""
    return df.rename(columns=alias, index=alias)

def calcular_perdidas(data):
    for uso in data.columns.get_values():
        if uso in agrupamientos_usos["Centros de Transformación"]:
            data.loc["Pérdidas por Transformación", uso] = -sum(data[uso].dropna())
        else:
            data.loc["Pérdidas por Transformación", uso] = 0
    return data

def get_yr(yr):
    """Devuelve la data correspondiente a un año del panel de microdatos con los nombres de energías y usos completos."""
    data = unalias(panel[yr])
    data = corregir_signo_consumo(data)
    data = calcular_perdidas(data)
    return data

def corregir_signo_consumo(df):
    # Corrijo signo de rubros de consumo para que "reciban" de las distintas formas de energía
    for consumo in ["Consumo No Energético", "Residencial", "Comercial", "Transporte", "Agropecuario", "Industria"]:
        df[consumo] = -df[consumo]
    return df

def make_links(yr):
    data = get_yr(yr)
    links = list()
    # Genero links directamente desde los microdatos cuando es posible
    for row in data.index.get_values():
        for col in data.columns.get_values():
            value = data.loc[row, col]
            if value < 0:
                links.append({"source": row, "target": col, "value": abs(value)})
            elif value > 0:
                links.append({"source": col, "target": row, "value": abs(value)})
    
    # Agrego los links desde las Centrales de Transformación a "Pérdidas por Transformación"
#    for centro in agrupamientos_usos["Centros de Transformación"]:
#        links.append({"source": centro,
#                      "target": "Pérdidas por Transformación",
#                      "value": abs(sum(data[centro]))
#                     })
    return links

def get_nodos(formato="dict"):
    wb = pyxl.load_workbook("maestro-nodos.xlsx")
    ws = wb.active
    raw_data = ws.values
    
    cols = next(raw_data)
    rows = list(raw_data)
    if formato == "dict":
        return [dict(zip(cols, row)) for row in rows]
    elif formato == "df":
        return pd.DataFrame.from_records(rows, columns=cols)
    else:
        print("Formato no reconodico: {}".format(formato))

In [5]:
nodos = get_nodos("df")

In [6]:
data2015 = get_yr(2015)

In [7]:
links2015 = make_links(2015)

In [8]:
data_energias = data2015.reset_index().rename_axis({"index": "Nombre"}, 1)

In [9]:
data_energias = nodos.merge(data_energias)

In [10]:
CAMPOS_DATAJSON = {
    "ID": "id",
    "Nombre": "name",
    "ID Padre": "parent",
    "Grupo": "group",
    "Posicion": "position"
}

def generar_lista_nodos(nodos):
    return nodos.drop(
        ["Eje", "Es Base","Nombre Padre"], axis=1
    ).rename(columns=CAMPOS_DATAJSON
    ).to_dict(orient='records')


In [11]:
lista_de_nodos = generar_lista_nodos(nodos)

In [12]:
def get_id_from_nombre(nombre_nodo):
    return [n for n in lista_de_nodos if n["name"] == nombre_nodo][0].get("id")

In [13]:
def convert_names_to_ids(links_list):
    return([{"source": get_id_from_nombre(link["source"]),
             "target": get_id_from_nombre(link["target"]),
             "value": round(link["value"],2)}
            for link in links_list if round(link["value"],2)>=0.01])

In [14]:
lista_de_links_con_id = convert_names_to_ids(links2015)

In [15]:
datajson_2015 = {"nodes": lista_de_nodos, "links": lista_de_links_con_id}

In [16]:
def write_json(obj, path):
    """Escribo un objeto a un archivo JSON con codificación UTF-8."""
    obj_str = json.dumps(obj, indent=4, separators=(",", ": "),
                         ensure_ascii=False)
    with io.open(path, "w", encoding='utf-8') as target:
        target.write(obj_str)


In [17]:
nodos = get_nodos("df")
lista_de_nodos = generar_lista_nodos(nodos)
for i in panel.items:
    data = get_yr(i)
    links = make_links(i)
    lista_de_links_con_id = convert_names_to_ids(links)
    datajson = {"nodes": lista_de_nodos, "links": lista_de_links_con_id}
    write_json(datajson, "output/data_{}.json".format(i))

In [19]:
panel[2015,:,:]

Unnamed: 0,PROD,IMPO,V.STK.,EXPO,BUNK,NO AP,PERD,AJUSTE,CENT,AUTO,...,COQ,CARB,A.HOR,C.PROP,NO ENER,RESID,COMER,TRANS,AGROP,INDUS
ACEITES VEGETALES,1672.54215,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ALCOHOL VEGETAL,425.433704,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BAGAZO,804.275595,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-145.99995,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,658.275645
BIODIESEL,1604.30243,0.0,0.0,-701.52114,0.0,0.0,0.0,0.0,-38.27,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BIOETANOL,413.053076,0.0,0.0,0.0,0.0,0.0,0.0,-5.961705,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CARBON MINERAL,19.82577,1431.617458,70.786037,-8.458906,0.0,0.0,0.0,-155.4898,-524.71638,-9.90551,...,-818.814932,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.843743
CARBON RESIDUAL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CARBON VEGETAL,335.075428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,335.075428,0.0,0.0,0.0,201.045257,134.030171,0.0,0.0,0.0
COQUE DE CARBON,614.111199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,614.111199,0.0,-614.111199,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COQUE PETROLEO,908.830532,0.0,0.0,0.0,0.0,0.0,0.0,0.08592449,0.0,0.0,...,0.0,0.0,-608.916456,0.0,300.0,0.0,0.0,0.0,0.0,0.0


In [18]:
def sumar_filas_df(df, filas, nueva_fila, borrar=True):
    """Suma todas las filas con índices en la lista `filas`,
    en una nueva fila con índice `nueva_fila`. Si `borrar`, no las incluye en el df retornado. Devuelve el df"""
    df.loc[nueva_fila] = reduce(function=pd.Series.add, sequence=[df.loc[f] for f in filas], initial=0)
    if borrar:
        df = df.drop(filas, axis=0)
    return df
        
def adaptar_df_a_entidades_minem(df):
    for 

Index([                   u'Producción',                   u'Importación',
                  u'Variación de Stock',                   u'Exportación',
                              u'Búnker',                u'No Aprovechado',
                            u'Pérdidas',                       u'Ajustes',
                    u'Servicio Público',                u'Autoproducción',
       u'Plantas de Tratamiento de Gas',                    u'Refinerías',
             u'Aceiteras y Destilerías',                     u'Coquerías',
                          u'Carboneras',                  u'Altos Hornos',
                      u'Consumo Propio',         u'Consumo No Energético',
                         u'Residencial',                     u'Comercial',
                          u'Transporte',                  u'Agropecuario',
                           u'Industria'],
      dtype='object')

In [40]:
data2015.drop(["Petróleo","Etano"], axis=1)

ValueError: labels [u'Petr\xf3leo' u'Etano'] not contained in axis