# Una nueva normalidad 

<img src="images/highres.jpg" alt="170" width="850"/>

In [1]:
import numpy as np
import pandas as pd
from pymongo import MongoClient

In [2]:
# Two datasets are imported:
# prov reports on the situation of the 52 provinces
# excep handles the exceptions for all the municipalities inside the province

prov = pd.read_csv("data/Provincias.csv", dtype={"CP": object})
excep = pd.read_csv("data/Excepciones.csv", dtype={"CP": object, "Provincia": object})

In [6]:
display(prov.head())
display(excep.head())

Unnamed: 0,CP,Provincia,Fase,Excepción
0,4,Almería,1,False
1,11,Cádiz,1,False
2,14,Córdoba,1,False
3,21,Huelva,1,False
4,23,Jaén,1,False


Unnamed: 0,Provincia,CP,Municipio,Fase,Territorio
0,12,"12570, 12579",Alcalà de Xivert,1,Vinaròs
1,12,12579,Alcossebre,1,Vinaròs
2,12,12579,Santa Magdalena de Pulpis,1,Vinaròs
3,12,"12530, 12580",Benicarló,1,Vinaròs
4,12,12598,Peñíscola,1,Vinaròs


## 🧹 Handling the "CP" column

In [7]:
# The dataframe index becomes the code of the province 
# with an exception

byCode = excep.set_index(["Provincia"])

byCode

Unnamed: 0_level_0,CP,Municipio,Fase,Territorio
Provincia,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12,"12570, 12579",Alcalà de Xivert,1,Vinaròs
12,12579,Alcossebre,1,Vinaròs
12,12579,Santa Magdalena de Pulpis,1,Vinaròs
12,"12530, 12580",Benicarló,1,Vinaròs
12,12598,Peñíscola,1,Vinaròs
...,...,...,...,...
08,08593,Tagamanent,0,Barcelona
08,"08470, 08471",Vallgorguina,0,Barcelona
08,08188,Vallromanes,0,Barcelona
08,08455,Vilalba Sasserra,0,Barcelona


In [8]:
# Several municipalities have several Zip Codes
# The column is exploded and cleaned

def clean_CP(df):
    df["CP"] = df["CP"].apply(lambda X: X.split(","))
    df = df.explode("CP")
    df["CP"] = df["CP"].apply(lambda X: X.replace(" ", ""))
    df["CP"] = df["CP"].apply(lambda X: X.replace("\t", ""))
    return df

df = clean_CP(byCode)
df.head()

Unnamed: 0_level_0,CP,Municipio,Fase,Territorio
Provincia,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12,12570,Alcalà de Xivert,1,Vinaròs
12,12579,Alcalà de Xivert,1,Vinaròs
12,12579,Alcossebre,1,Vinaròs
12,12579,Santa Magdalena de Pulpis,1,Vinaròs
12,12530,Benicarló,1,Vinaròs


## 🛠 Handling the exceptions into a dict

### I want my database to be a nested dictionary where in the case of Excepcion = True, an object with all the zip codes that have an exception is returned.

In [9]:
# A dictionary of dataframes is created
# Each dataframe will be grouped by each Zip Code

codigos = byCode.index.value_counts().index.to_list()

dfDict = {codigo : df.loc[codigo] for codigo in codigos}

print(dfDict.keys())

dict_keys(['08', '03', '46', '49', '09', '24', '12', '47', '37', '05', '34', '38', '42', '07', '40', '35'])


In [22]:
pandas__version__

Unnamed: 0_level_0,CP,Municipio,Fase,Territorio
Provincia,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
08,08731,Avinyonet del Penedès,0,Barcelona
08,08734,Avinyonet del Penedès,0,Barcelona
08,08793,Avinyonet del Penedès,0,Barcelona
08,08798,Avinyonet del Penedès,0,Barcelona
08,08794,Cabanyes,0,Barcelona
...,...,...,...,...
08,08470,Vallgorguina,0,Barcelona
08,08471,Vallgorguina,0,Barcelona
08,08188,Vallromanes,0,Barcelona
08,08455,Vilalba Sasserra,0,Barcelona


In [23]:
# Each df of dfDict is converted to a dictionary of dictionaries

dictDict = {}
for i in dfDict:
    dictDict[i] = dfDict[i].to_dict("record")

print(dictDict["43"][0])

TypeError: unsupported type: <class 'str'>

In [8]:
# Trato cada diccionario de dataframes para que me devuelva la arquitectura que quiero

def parse_excepcion(dfDict, code):
    dictio ={}
    dictio[code] = dfDict[code].to_dict("record")
    d_temp = {}
    for territory in dictio[code]:
        d_temp[territory['CP']] = {'Fase':territory['Fase'],'Territorio':territory['Territorio']}

    return d_temp

In [9]:
# The initial prov df is converted to dictionary so I can change the value when "Excepcion" = True

data_dict = prov.to_dict("records")

print(data_dict[0])
print(data_dict[-1])

{'CP': '04', 'Provincia': 'Almería', 'Fase': '1', 'Excepcion': False}
{'CP': '49', 'Provincia': 'Zamora', 'Fase': 'Parcial', 'Excepcion': True}


In [10]:
# data_dict when Excepcion = True is changed
# Now it is parsed as I want it

for value in data_dict:
    if value['Excepcion']:
        prefijo = value["CP"]
        excepcion = parse_excepcion(dfDict, prefijo)
        value['Excepcion'] = excepcion
        
print(data_dict[0])
print(data_dict[-1])

{'CP': '04', 'Provincia': 'Almería', 'Fase': '1', 'Excepcion': False}
{'CP': '49', 'Provincia': 'Zamora', 'Fase': 'Parcial', 'Excepcion': {'49160': {'Fase': 1, 'Territorio': 'Carbajales'}, '49541': {'Fase': 1, 'Territorio': 'Carbajales'}, '49543': {'Fase': 1, 'Territorio': 'Carbajales'}, '49540': {'Fase': 1, 'Territorio': 'Tabara'}, '49163': {'Fase': 1, 'Territorio': 'Carbajales'}, '49146': {'Fase': 1, 'Territorio': 'Carbajales'}, '49147': {'Fase': 1, 'Territorio': 'Carbajales'}, '49542': {'Fase': 1, 'Territorio': 'Carbajales'}, '49164': {'Fase': 1, 'Territorio': 'Carbajales'}, '49165': {'Fase': 1, 'Territorio': 'Carbajales'}, '49571': {'Fase': 1, 'Territorio': 'Alta Sanabria'}, '49572': {'Fase': 1, 'Territorio': 'Alta Sanabria'}, '49570': {'Fase': 1, 'Territorio': 'Alta Sanabria'}, '49573': {'Fase': 1, 'Territorio': 'Alta Sanabria'}, '49574': {'Fase': 1, 'Territorio': 'Alta Sanabria'}, '49580': {'Fase': 1, 'Territorio': 'Alta Sanabria'}, '49582': {'Fase': 1, 'Territorio': 'Alta Sanabr

## 🌍 Connection to MongoDB server

In [24]:
# Setting up the connection to the database

client = MongoClient()

def insertRecord(dictionary):
    db = client["unanuevanormalidad"] # Creo db
    collection = db["data"] # Creo collection
    collection.insert_many(dictionary)


In [25]:
# Final dictionary is added to the database

#insertRecord(data_dict)