In [74]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [79]:
# Sacamos los nombres de los ficheros
paths_comunidades = [path for path in os.listdir('data/comunidades/') if path.endswith('.xlsx')]

# Creamos una lista para ir añadiendo los dataframes leidos
dfs = list()

for path_comunidad in paths_comunidades:
    # Extraemos el nombre de la comunidad
    comunidad = os.path.splitext(path_comunidad)[0]

    # Leemos el fichero, reseteamos el index y le cambiamos el nombre de la columna a ID
    fichero = pd.read_excel('./data/comunidades/'+path_comunidad).reset_index(drop=True).rename(columns={'Unnamed: 0': 'ID'})

    # Descartamos las filas donde el la columna BIO no es nula.
    # Asignamos el nombre de la comunidad a una nueva columna.
    fichero_sin_bio = fichero[fichero['SEGMENTO BIO'].isna()].reset_index(drop=True).assign(COMUNIDAD=comunidad)

    # Añadimos el fichero leido a nuestra lista de dataframes
    dfs.append(fichero_sin_bio)

# Montamos el dataframe final
datos = pd.concat(dfs, ignore_index=True)

# Eliminamos la columna SEGMENTO BIO
datos.drop('SEGMENTO BIO', axis=1, inplace=True)

In [80]:
datos.shape

(9432, 166)

In [90]:
datos.head(20)

Unnamed: 0,ID,SECTOR,SECCION,CATEGORIA,FAMILIA,SEMANA 40-16,SEMANA 41-16,SEMANA 42-16,SEMANA 43-16,SEMANA 44-16,...,SEMANA 35-19,SEMANA 36-19,SEMANA 37-19,SEMANA 38-19,SEMANA 39-19,SEMANA 40-19,SEMANA 41-19,SEMANA 42-19,SEMANA 43-19,COMUNIDAD
0,TOTAL PGC,,,,,0.0,0.0,0.0,0.0,0.0,...,2853.52,2699.68,2703.08,2484.19,2853.67,2844.06,2412.15,2559.18,2647.53,Ceuta_y_Melilla
1,ALIM. Y BEBIDAS,ALIM. Y BEBIDAS,,,,0.0,0.0,0.0,0.0,0.0,...,1421.87,1364.16,1405.32,1269.16,1457.1,1477.37,1288.87,1375.99,1442.49,Ceuta_y_Melilla
2,ALIM.SECA,ALIM. Y BEBIDAS,ALIM.SECA,,,0.0,0.0,0.0,0.0,0.0,...,782.74,760.74,791.09,717.49,829.03,829.07,739.01,813.17,891.25,Ceuta_y_Melilla
3,ACEITE,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,,0.0,0.0,0.0,0.0,0.0,...,72.97,67.83,71.55,62.32,73.07,76.68,61.45,60.28,63.69,Ceuta_y_Melilla
4,GIRASOL..,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,GIRASOL..,0.0,0.0,0.0,0.0,0.0,...,20.78,20.41,20.16,18.68,21.06,22.47,16.94,17.49,19.44,Ceuta_y_Melilla
5,OLIVA,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,OLIVA,0.0,0.0,0.0,0.0,0.0,...,48.58,44.35,47.56,40.15,47.73,50.83,41.63,39.63,41.4,Ceuta_y_Melilla
6,RESTO ACEITES,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,RESTO ACEITES,0.0,0.0,0.0,0.0,0.0,...,3.61,3.06,3.84,3.49,4.29,3.39,2.88,3.17,2.85,Ceuta_y_Melilla
7,ADITIVOS COCINA,ALIM. Y BEBIDAS,ALIM.SECA,ADITIVOS COCINA,,0.0,0.0,0.0,0.0,0.0,...,15.52,14.98,16.48,15.77,16.12,16.09,14.38,14.43,17.0,Ceuta_y_Melilla
8,ADEREZOS,ALIM. Y BEBIDAS,ALIM.SECA,ADITIVOS COCINA,ADEREZOS,0.0,0.0,0.0,0.0,0.0,...,0.25,0.25,0.27,0.22,0.31,0.24,0.19,0.18,0.21,Ceuta_y_Melilla
9,ESPECIAS,ALIM. Y BEBIDAS,ALIM.SECA,ADITIVOS COCINA,ESPECIAS,0.0,0.0,0.0,0.0,0.0,...,6.47,6.14,6.45,6.15,6.57,6.62,6.22,6.01,8.11,Ceuta_y_Melilla


In [89]:
datos['COMUNIDAD'].value_counts()

Ceuta_y_Melilla       524
Catalunya             524
Canarias              524
Navarra               524
Castilla_La_Mancha    524
C_Valenciana          524
Extremadura           524
La_Rioja              524
Pais_Vasco            524
Murcia                524
Asturias              524
Castilla_y_Leon       524
Cantabria             524
Baleares              524
Madrid                524
Andalucia             524
Galicia               524
Aragon                524
TOTAL PGC               1
Name: COMUNIDAD, dtype: int64

In [82]:
datos.isnull().sum().head(20)

ID                 0
SECTOR            18
SECCION           72
CATEGORIA        270
FAMILIA         1980
SEMANA 40-16       0
SEMANA 41-16       0
SEMANA 42-16       0
SEMANA 43-16       0
SEMANA 44-16       0
SEMANA 45-16       0
SEMANA 46-16       0
SEMANA 47-16       0
SEMANA 48-16       0
SEMANA 49-16       0
SEMANA 50-16       0
SEMANA 51-16       0
SEMANA 52-16       0
SEMANA 01-17       0
SEMANA 02-17       0
dtype: int64

In [120]:
totales_comunidad = datos[datos['ID'] == 'TOTAL PGC'].set_index('COMUNIDAD').T.iloc[5:]

In [121]:
totales_comunidad

COMUNIDAD,Ceuta_y_Melilla,Asturias,Aragon,Galicia,Andalucia,Madrid,Baleares,Cantabria,Castilla_y_Leon,Murcia,Catalunya,Pais_Vasco,La_Rioja,Extremadura,C_Valenciana,Castilla_La_Mancha,Navarra,Canarias,TOTAL PGC
SEMANA 40-16,0.0,0.0,10894.0,0.0,58480.38,34775.94,14727.87,8352.53,19572.94,0.0,70327.94,33151.33,3114.32,5598.69,47716.06,13823.75,7624.01,10887.32,TOTAL PGC
SEMANA 41-16,0.0,0.0,9770.08,0.0,57141.87,33001.18,14442.84,7712.24,18342.98,0.0,68023.56,34488.84,2863.75,5366.86,46107.44,12755.0,7340.7,10534.78,TOTAL PGC
SEMANA 42-16,0.0,0.0,10033.98,0.0,57290.85,33943.81,13911.56,7835.98,18869.46,0.0,68846.55,34488.99,2869.65,5556.26,45983.02,13126.11,7701.33,10598.53,TOTAL PGC
SEMANA 43-16,0.0,0.0,10569.76,0.0,59618.23,35007.28,13613.85,8435.52,20258.06,0.0,72378.68,35815.16,3055.23,5978.39,47920.0,13695.59,7679.61,10684.07,TOTAL PGC
SEMANA 44-16,0.0,0.0,10261.46,0.0,56082.58,33989.72,12570.51,7699.21,18826.93,0.0,70959.18,35738.11,2969.88,5375.58,45731.9,13250.86,7523.9,10851.14,TOTAL PGC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SEMANA 39-19,2853.67,22446.36,27198.2,54212.46,175969.14,148466.75,32996.6,14490.55,47749.1,28428.44,171309.65,46221.68,6684.87,20165.38,121632.29,37284.35,12391.23,56434.31,TOTAL PGC
SEMANA 40-19,2844.06,22481.75,28762.09,56541.1,182950.03,154439.89,34023.32,14532.08,49217.56,29363.39,174007.15,48603.86,7170.19,20789.07,122601.75,39093.29,12988.61,60763.91,TOTAL PGC
SEMANA 41-19,2412.15,21114.67,25231.16,51945.26,168597.49,142017.02,30865.15,13237.48,44465.79,27340.09,161318.16,41283.56,6432.52,19122.6,111520.51,35726.29,11843.84,54070.5,TOTAL PGC
SEMANA 42-19,2559.18,21963.94,27756.19,54680.5,173629.17,149172.94,31354.86,14028.56,48939.5,28051.77,167443.57,49189.84,6809.92,19766.56,119289.98,37187.16,12786.32,53661.01,TOTAL PGC


In [122]:
import plotly.express as px

fig = px.line(totales_comunidad)
fig.show()