# Validar distribuciones de series de tiempo

In [1]:
from series_tiempo_ar.validations import get_distribution_errors
from series_tiempo_ar import TimeSeriesDataJson
import pandas as pd

In [2]:
url = "https://datos.agroindustria.gob.ar/data.json"
catalog = TimeSeriesDataJson(url)

In [3]:
ts_distributions = catalog.get_distributions(only_time_series=True, meta_field="identifier")
ts_distributions

['fe5bb691-7f7a-46ef-9142-c18eaa218213',
 '221859a8-c51e-47c2-95ab-a8525bb2b55d',
 '6e79add0-52d6-4eaf-b715-4636259f3aed',
 '2a928aef-7f9e-4335-bf3d-a1c5eab49037',
 'f7c928d6-2cb0-4426-acf2-a11c242c813a',
 '4820c10d-f4c1-4ede-83b3-e0334cbb1e32',
 '84976af9-af2d-4759-87c3-0fb02ee7d8ec',
 '24571505-bee3-447c-8c2c-8aabd9515508',
 'f4025e36-bd7c-410e-9449-a51621957f09',
 '2507ae92-75e6-4cbd-97f0-5c65f4da9963',
 '0128a612-22e8-4152-953b-64fb75986e2e',
 'aa7aefc2-1bd8-4a9f-88a0-c0c6b76629d5',
 '1c3fb8f1-065c-4b2b-8b25-c32b1a14a241',
 '1c56b990-5f6c-4f39-ba3c-81fad06080e0',
 '2ff2b00a-0d1a-4193-b836-2f81811de464',
 'dcd6d482-11fa-4543-b449-8add44b9a736',
 'df8f7d09-797e-4e0b-9ad0-160f99edf2c0',
 'f331e15a-0627-4752-a5f5-a3016a87a45c',
 'ff0bf38f-a245-4173-882a-4ef3d61a06bc',
 '1ec33f3e-86b6-4802-a87d-91af91e26bb5']

## Series sin descripción ni units

In [4]:
series = pd.DataFrame(catalog.get_time_series())

In [5]:
series[pd.isnull(series.description)].to_csv("series-null-description.csv", index=False, encoding="utf8")
series[pd.isnull(series.description)]

Unnamed: 0,dataset_identifier,description,distribution_identifier,id,title,type,units


In [6]:
series[pd.isnull(series.units)].to_csv("series-null-units.csv", index=False, encoding="utf8")
series[pd.isnull(series.units)]

Unnamed: 0,dataset_identifier,description,distribution_identifier,id,title,type,units


## Validar una distribución

In [7]:
get_distribution_errors(catalog, ts_distributions[0])

[]

## Validar todas las distribuciones

In [8]:
errors = []
for ts_distribution in ts_distributions:
    try:
        for error in get_distribution_errors(catalog, ts_distribution):
            errors.append({
                "distribution_id": ts_distribution,
                "error_msg": error.args[0]
            })
    except Exception as e:
        print("No se pudo leer la distribución {}".format(ts_distribution))
        print(e)

No se pudo leer la distribución 4820c10d-f4c1-4ede-83b3-e0334cbb1e32
'indice_tiempo ' is not in list


In [9]:
pd.DataFrame(errors)

Unnamed: 0,distribution_id,error_msg
0,221859a8-c51e-47c2-95ab-a8525bb2b55d,"superficie_ha_santa_fe tiene 1 valores, deberi..."
1,24571505-bee3-447c-8c2c-8aabd9515508,'consumo_per_capita_kg_persona_año' usa caract...
