In [74]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [123]:
URL_DATOS_COMUNIDADES = 'https://drive.google.com/drive/folders/1pV3vaOJJTRe2rBfLDZxwE4U8qg9yZqnt?usp=sharing'

In [79]:
# Sacamos los nombres de los ficheros
paths_comunidades = [path for path in os.listdir('data/comunidades/') if path.endswith('.xlsx')]

# Creamos una lista para ir añadiendo los dataframes leidos
dfs = list()

for path_comunidad in paths_comunidades:
    # Extraemos el nombre de la comunidad
    comunidad = os.path.splitext(path_comunidad)[0]

    # Leemos el fichero, reseteamos el index y le cambiamos el nombre de la columna a ID
    fichero = pd.read_excel('./data/comunidades/'+path_comunidad).reset_index(drop=True).rename(columns={'Unnamed: 0': 'ID'})

    # Descartamos las filas donde el la columna BIO no es nula.
    # Asignamos el nombre de la comunidad a una nueva columna.
    fichero_sin_bio = fichero[fichero['SEGMENTO BIO'].isna()].reset_index(drop=True).assign(COMUNIDAD=comunidad)

    # Añadimos el fichero leido a nuestra lista de dataframes
    dfs.append(fichero_sin_bio)

# Montamos el dataframe final
datos = pd.concat(dfs, ignore_index=True)

# Eliminamos la columna SEGMENTO BIO
datos.drop('SEGMENTO BIO', axis=1, inplace=True)

In [80]:
datos.shape

(9432, 166)

In [90]:
datos.head(20)

Unnamed: 0,ID,SECTOR,SECCION,CATEGORIA,FAMILIA,SEMANA 40-16,SEMANA 41-16,SEMANA 42-16,SEMANA 43-16,SEMANA 44-16,...,SEMANA 35-19,SEMANA 36-19,SEMANA 37-19,SEMANA 38-19,SEMANA 39-19,SEMANA 40-19,SEMANA 41-19,SEMANA 42-19,SEMANA 43-19,COMUNIDAD
0,TOTAL PGC,,,,,0.0,0.0,0.0,0.0,0.0,...,2853.52,2699.68,2703.08,2484.19,2853.67,2844.06,2412.15,2559.18,2647.53,Ceuta_y_Melilla
1,ALIM. Y BEBIDAS,ALIM. Y BEBIDAS,,,,0.0,0.0,0.0,0.0,0.0,...,1421.87,1364.16,1405.32,1269.16,1457.1,1477.37,1288.87,1375.99,1442.49,Ceuta_y_Melilla
2,ALIM.SECA,ALIM. Y BEBIDAS,ALIM.SECA,,,0.0,0.0,0.0,0.0,0.0,...,782.74,760.74,791.09,717.49,829.03,829.07,739.01,813.17,891.25,Ceuta_y_Melilla
3,ACEITE,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,,0.0,0.0,0.0,0.0,0.0,...,72.97,67.83,71.55,62.32,73.07,76.68,61.45,60.28,63.69,Ceuta_y_Melilla
4,GIRASOL..,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,GIRASOL..,0.0,0.0,0.0,0.0,0.0,...,20.78,20.41,20.16,18.68,21.06,22.47,16.94,17.49,19.44,Ceuta_y_Melilla
5,OLIVA,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,OLIVA,0.0,0.0,0.0,0.0,0.0,...,48.58,44.35,47.56,40.15,47.73,50.83,41.63,39.63,41.4,Ceuta_y_Melilla
6,RESTO ACEITES,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,RESTO ACEITES,0.0,0.0,0.0,0.0,0.0,...,3.61,3.06,3.84,3.49,4.29,3.39,2.88,3.17,2.85,Ceuta_y_Melilla
7,ADITIVOS COCINA,ALIM. Y BEBIDAS,ALIM.SECA,ADITIVOS COCINA,,0.0,0.0,0.0,0.0,0.0,...,15.52,14.98,16.48,15.77,16.12,16.09,14.38,14.43,17.0,Ceuta_y_Melilla
8,ADEREZOS,ALIM. Y BEBIDAS,ALIM.SECA,ADITIVOS COCINA,ADEREZOS,0.0,0.0,0.0,0.0,0.0,...,0.25,0.25,0.27,0.22,0.31,0.24,0.19,0.18,0.21,Ceuta_y_Melilla
9,ESPECIAS,ALIM. Y BEBIDAS,ALIM.SECA,ADITIVOS COCINA,ESPECIAS,0.0,0.0,0.0,0.0,0.0,...,6.47,6.14,6.45,6.15,6.57,6.62,6.22,6.01,8.11,Ceuta_y_Melilla


In [82]:
datos.isnull().sum().head(20)

ID                 0
SECTOR            18
SECCION           72
CATEGORIA        270
FAMILIA         1980
SEMANA 40-16       0
SEMANA 41-16       0
SEMANA 42-16       0
SEMANA 43-16       0
SEMANA 44-16       0
SEMANA 45-16       0
SEMANA 46-16       0
SEMANA 47-16       0
SEMANA 48-16       0
SEMANA 49-16       0
SEMANA 50-16       0
SEMANA 51-16       0
SEMANA 52-16       0
SEMANA 01-17       0
SEMANA 02-17       0
dtype: int64

In [120]:
totales_comunidad = datos[datos['ID'] == 'TOTAL PGC'].set_index('COMUNIDAD').T.iloc[5:]

In [144]:
totales_comunidad.head(20)

COMUNIDAD,Ceuta_y_Melilla,Asturias,Aragon,Galicia,Andalucia,Madrid,Baleares,Cantabria,Castilla_y_Leon,Murcia,Catalunya,Pais_Vasco,La_Rioja,Extremadura,C_Valenciana,Castilla_La_Mancha,Navarra,Canarias,TOTAL PGC
SEMANA 40-16,0.0,0.0,10894.0,0.0,58480.38,34775.94,14727.87,8352.53,19572.94,0.0,70327.94,33151.33,3114.32,5598.69,47716.06,13823.75,7624.01,10887.32,TOTAL PGC
SEMANA 41-16,0.0,0.0,9770.08,0.0,57141.87,33001.18,14442.84,7712.24,18342.98,0.0,68023.56,34488.84,2863.75,5366.86,46107.44,12755.0,7340.7,10534.78,TOTAL PGC
SEMANA 42-16,0.0,0.0,10033.98,0.0,57290.85,33943.81,13911.56,7835.98,18869.46,0.0,68846.55,34488.99,2869.65,5556.26,45983.02,13126.11,7701.33,10598.53,TOTAL PGC
SEMANA 43-16,0.0,0.0,10569.76,0.0,59618.23,35007.28,13613.85,8435.52,20258.06,0.0,72378.68,35815.16,3055.23,5978.39,47920.0,13695.59,7679.61,10684.07,TOTAL PGC
SEMANA 44-16,0.0,0.0,10261.46,0.0,56082.58,33989.72,12570.51,7699.21,18826.93,0.0,70959.18,35738.11,2969.88,5375.58,45731.9,13250.86,7523.9,10851.14,TOTAL PGC
SEMANA 45-16,0.0,0.0,10237.13,0.0,55857.44,33885.31,12045.9,7804.07,19278.35,0.0,68674.24,36322.44,2874.37,5601.62,45053.48,13248.93,7571.39,10699.41,TOTAL PGC
SEMANA 46-16,0.0,0.0,9941.42,0.0,56393.82,33986.99,11664.0,7681.76,18418.48,0.0,69479.82,34770.46,2828.38,5496.96,44779.41,12943.47,7233.76,10550.83,TOTAL PGC
SEMANA 47-16,0.0,0.0,10158.05,0.0,57950.35,34714.84,11897.83,7902.93,19093.35,0.0,74072.96,35517.44,2942.42,5541.73,45908.06,13319.54,7492.23,10585.9,TOTAL PGC
SEMANA 48-16,0.0,0.0,11275.83,0.0,59667.07,35787.73,12224.21,8392.31,20954.23,0.0,75280.19,38454.08,3470.6,5874.62,47920.82,14164.32,7943.23,10849.93,TOTAL PGC
SEMANA 49-16,0.0,0.0,10846.13,0.0,59942.99,34156.61,12657.6,8352.91,19722.43,0.0,72560.67,37070.0,3199.61,7886.47,46536.78,13993.09,8110.41,10872.25,TOTAL PGC


In [141]:
import plotly.express as px

fig = px.line(totales_comunidad)
fig.show()

In [146]:
df_andalucia = pd.read_excel('data/comunidades/andalucia.xlsx')

df_andalucia

Unnamed: 0.1,Unnamed: 0,SECTOR,SECCION,CATEGORIA,FAMILIA,SEGMENTO BIO,SEMANA 40-16,SEMANA 41-16,SEMANA 42-16,SEMANA 43-16,...,SEMANA 34-19,SEMANA 35-19,SEMANA 36-19,SEMANA 37-19,SEMANA 38-19,SEMANA 39-19,SEMANA 40-19,SEMANA 41-19,SEMANA 42-19,SEMANA 43-19
0,TOTAL PGC,,,,,,58480.38,57141.87,57290.85,59618.23,...,188132.24,192093.81,189974.88,178804.61,172582.70,175969.14,182950.03,168597.49,173629.17,172746.30
1,ALIM. Y BEBIDAS,ALIM. Y BEBIDAS,,,,,33395.23,32434.20,32454.36,34180.60,...,99823.54,100268.31,98722.82,92727.81,90061.88,92561.15,96130.52,89210.10,92298.34,92697.70
2,ALIM.SECA,ALIM. Y BEBIDAS,ALIM.SECA,,,,15382.91,15192.06,15131.80,16137.47,...,42888.22,44509.88,43885.76,43117.00,42111.14,43215.28,45113.61,42072.22,45120.71,46619.83
3,ACEITE,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,,,2126.83,1998.50,1930.66,2105.13,...,3738.19,4285.95,4105.34,3929.12,3699.01,3972.54,4068.29,3644.77,3581.93,3753.63
4,GIRASOL..,ALIM. Y BEBIDAS,ALIM.SECA,ACEITE,GIRASOL..,,397.01,389.52,375.48,404.93,...,621.83,674.72,664.54,631.64,582.11,623.85,648.24,590.65,613.55,631.47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1198,RQN.AL CORTE BIO,PROD.FRESCOS,QUESOS,RESTO QUESOS NATURALES,RQN.AL CORTE,BIO,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
1199,RQN.AL CORTE NO BIO,PROD.FRESCOS,QUESOS,RESTO QUESOS NATURALES,RQN.AL CORTE,NO BIO,2.36,1.98,2.08,2.04,...,81.40,83.63,84.15,84.28,79.59,77.52,80.64,74.64,79.65,77.31
1200,RQN.EMPAQUETADO,PROD.FRESCOS,QUESOS,RESTO QUESOS NATURALES,RQN.EMPAQUETADO,,80.49,76.75,74.22,77.10,...,201.93,206.61,208.32,208.11,207.77,210.93,216.81,200.43,202.98,201.14
1201,RQN.EMPAQUETADO BIO,PROD.FRESCOS,QUESOS,RESTO QUESOS NATURALES,RQN.EMPAQUETADO,BIO,0.03,0.03,0.02,0.02,...,3.24,3.60,3.93,3.64,4.47,3.15,4.49,4.42,3.97,4.18


In [131]:
diabetes = pd.read_excel('./data/enfermedades/diabetes.xls', skiprows=1)

In [132]:
diabetes

Unnamed: 0,CC.AA.,Años,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15
0,,2016,,,2017,,,2018,,,2019,,,2020,,
1,,Hombres,Mujeres,Total,Hombres,Mujeres,Total,Hombres,Mujeres,Total,Hombres,Mujeres,Total,Hombres,Mujeres,Total
2,España (ES),20.84,16.24,18.34,21.39,16.52,18.76,21.48,16.28,18.62,20.65,15.33,17.7,23.92,17.63,20.46
3,Melilla (ML),31.79,48.17,41.8,57.36,32.8,44.26,42.94,58.24,53.5,27.29,39.24,35.09,55.9,55.37,55.63
4,Ceuta (CE),68.98,39.16,50.36,54.09,25.67,35.33,45.36,16.14,27.14,21.32,19.63,20.33,55.48,42.47,47.11
5,La Rioja (RI),21.95,11.14,15.88,18.06,12.81,15.51,26.35,14.03,19.18,19.78,11.98,15.2,17.07,13.96,15.82
6,País Vasco (PV),19.31,12.32,15.19,16.48,10.57,13.03,18.3,12.78,15.14,17.64,12.94,15.07,21.08,13.12,16.65
7,"Navarra, C. Foral de (NC)",23.8,17.28,20.41,26.23,15.14,20.21,20.72,12.58,16.13,16.34,14.66,15.77,21.56,12.86,16.34
8,"Murcia, Región de (MC)",21.14,20.52,20.92,27.31,23.08,25.17,24.65,18.52,21.24,28.79,17.17,22.4,34.73,27.48,30.91
9,"Madrid, Comunidad de (MD)",13.83,9.03,10.9,11.73,9.51,10.49,12.42,8.8,10.23,9.85,7.09,8.28,11.73,8.75,10.03


In [139]:
poblacion = pd.read_excel('./data/poblacion/poblacion_comunidades.xlsx', skiprows=1)

In [140]:
poblacion

Unnamed: 0,CC.AA.,2016,2017,2018,2019,2020
0,Ceuta_y_Melilla,170545,171079,171528,171264,171278
1,La_Rioja,315794,315381,315675,316798,313914
2,Pais_Vasco,2189534,2194158,2199088,2207776,2220504
3,Navarra,640647,643234,647554,654214,661197
4,Murcia,1464847,1470273,1478509,1493898,1511251
5,Madrid,6466996,6507184,6578079,6663394,6779888
6,Galicia,2718525,2708339,2701743,2699499,2701819
7,Extremadura,1087778,1079920,1072863,1067710,1063987
8,C_Valenciana,4959968,4941509,4963703,5003769,5057353
9,Catalunya,7522596,7555830,7600065,7675217,7780479
