In [1]:
import seaborn as sns
import pandas as pd
pd.set_option('display.max_colwidth',None)

In [2]:
from pathlib import Path

In [3]:
import eurostat

def load_df_raw_catalog():
    return eurostat.get_toc_df()

In [4]:
import re

from pandas.core.interchange.dataframe_protocol import DataFrame


def split_domain(code):
    match = re.match(r"^([A-Z]+)+(_)+(.*)",code)
    if match:
        return match.group(1), match.group(3)
    return code

def convert_to_datetime(series):
    cleaned = series.astype('str').str.replace(r'([+-]\d{2}):(\d{2})', r'\1:\2', regex=True)
    return pd.to_datetime(cleaned, errors='coerce', utc=True)

def convert_object_to_category(df:DataFrame, threshold :int=5):
    description = df[df.select_dtypes('object')].describe()
    unique_counts = description.loc['unique']
    for column in unique_counts[unique_counts < threshold].index.tolist():
        df[column] = df[column].astype('category')
    return df

# Load and explore data

In [10]:
df_ds = load_df_raw_catalog()

ConnectionError: HTTPSConnectionPool(host='ec.europa.eu', port=443): Max retries exceeded with url: /eurostat/api/dissemination/sdmx/2.1/dataflow/all?format=JSON&compressed=true&lang=en (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x752ba6dca660>: Failed to resolve 'ec.europa.eu' ([Errno -2] Name or service not known)"))

In [None]:
df_ds.head()

In [None]:
df_ds.dtypes

In [None]:
df_ds.describe()

## Set column data types

In [6]:
df_ds['title'] = df_ds['title'].astype('str')
df_ds['code'] = df_ds['code'].astype('str')
df_ds['last update of data'] = convert_to_datetime(df_ds['last update of data'])
df_ds['last table structure change'] = convert_to_datetime(df_ds['last table structure change']) 

NameError: name 'df_ds' is not defined

In [7]:
df_ds['data start'].unique()

NameError: name 'df_ds' is not defined

In [11]:
df_ds[['start_year','start_quater']] = df_ds['data start'].str.split('-',n=1,expand=True)
df_ds[['end_year','end_quater']] = df_ds['data end'].str.split('-',n=1,expand=True)
df_ds['start_year'] = pd.to_numeric(df_ds['start_year'], errors='coerce')
df_ds['end_year'] = pd.to_numeric(df_ds['end_year'], errors='coerce')
df_ds = df_ds.drop(columns=['data start', 'data end'])

In [12]:
unique_start = df_ds['start_year'].unique()
unique_start.sort()
unique_start

array([1947., 1949., 1950., 1952., 1953., 1954., 1959., 1960., 1962.,
       1967., 1968., 1969., 1970., 1971., 1973., 1974., 1975., 1976.,
       1977., 1978., 1979., 1980., 1981., 1982., 1983., 1984., 1985.,
       1986., 1987., 1988., 1989., 1990., 1991., 1992., 1993., 1994.,
       1995., 1996., 1997., 1998., 1999., 2000., 2001., 2002., 2003.,
       2004., 2005., 2006., 2007., 2008., 2009., 2010., 2011., 2012.,
       2013., 2014., 2015., 2016., 2017., 2018., 2019., 2020., 2021.,
       2022., 2023., 2024.,   nan])

In [13]:
unique_start = df_ds['end_year'].unique()
unique_start.sort()
unique_start

array([1983., 1984., 1991., 1995., 1996., 1997., 1998., 1999., 2000.,
       2001., 2002., 2003., 2004., 2005., 2006., 2007., 2008., 2009.,
       2010., 2011., 2012., 2013., 2014., 2015., 2016., 2017., 2018.,
       2019., 2020., 2021., 2022., 2023., 2024., 2025., 2026., 2032.,
       2050., 2100.,   nan])

In [14]:
df_ds.describe(include = 'all')

Unnamed: 0,title,code,type,last update of data,last table structure change,start_year,start_quater,end_year,end_quater
count,8066,8066,8066,8066,8066,7525.0,723,7525.0,827
unique,7345,8066,1,,,,20,,21
top,Individuals - internet activities,POST_CUBE1_X$POST_DTR_1,dataset,,,,Q1,,Q4
freq,6,1,8066,,,,359,,305
mean,,,,2022-06-23 08:44:51.618646272+00:00,2024-08-16 14:10:51.053310208+00:00,2005.721462,,2019.148704,
min,,,,2009-03-26 10:00:00+00:00,2018-12-13 12:00:00+00:00,1947.0,,1983.0,
25%,,,,2021-10-29 21:00:00+00:00,2024-01-03 22:00:00+00:00,2000.0,,2016.0,
50%,,,,2024-12-13 10:00:00+00:00,2024-10-15 09:00:00+00:00,2007.0,,2022.0,
75%,,,,2025-04-14 21:00:00+00:00,2025-03-27 22:00:00+00:00,2014.0,,2024.0,
max,,,,2025-05-06 21:00:00+00:00,2025-05-06 09:00:00+00:00,2024.0,,2100.0,


## Dataset cleanup  and relevance filtring

In [15]:
df_ds = df_ds.drop(columns='type')

In [16]:
df_title_duplicates = df_ds[df_ds.duplicated(subset='title',keep=False)]
df_title_duplicates

Unnamed: 0,title,code,last update of data,last table structure change,start_year,start_quater,end_year,end_quater
17,"Population by sex, age, migration status and degree of urbanisation",LFST_R_PGAUM,2025-04-14 21:00:00+00:00,2025-04-14 21:00:00+00:00,2021.0,,2024.0,
73,"Current depressive symptoms by sex, age and country of birth",HLTH_EHIS_MH1B$DV_2106,2023-06-16 21:00:00+00:00,2024-01-03 22:00:00+00:00,,,,
75,"Current depressive symptoms by sex, age and country of citizenship",HLTH_EHIS_MH1C,2023-06-16 21:00:00+00:00,2024-01-03 22:00:00+00:00,2014.0,,2019.0,
77,"Current depressive symptoms by sex, age and country of citizenship",HLTH_EHIS_MH1C$DV_2107,2023-06-16 21:00:00+00:00,2024-01-03 22:00:00+00:00,,,,
81,"Current depressive symptoms by sex, age and educational attainment level",HLTH_EHIS_MH1E,2023-06-16 21:00:00+00:00,2024-01-03 22:00:00+00:00,2014.0,,2019.0,
...,...,...,...,...,...,...,...,...
7950,Victims of intentional homicide and sexual exploitation by age and sex,CRIM_HOM_VAGE$DV_2603,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,
7952,Intentional homicide victims by victim-offender relationship and sex,CRIM_HOM_VREL$DV_1645,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,
7958,Police-recorded offences by offence category,CRIM_OFF_CAT,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,2008.0,,2023.0,
7959,Police-recorded offences by offence category,CRIM_OFF_CAT$DV_1401,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,


It looks like datasets with duplicates titles include at least one version that has a $DV suffix followed by number.
This indicates a derived veiw (or data variation), which is not relevant for the purpose of this project. 

Therefore, I will exclude these derived view from the list of available datasets.


In [17]:
df_ds[df_ds['code'].str.contains('\\$DV', na = False)]

Unnamed: 0,title,code,last update of data,last table structure change,start_year,start_quater,end_year,end_quater
73,"Current depressive symptoms by sex, age and country of birth",HLTH_EHIS_MH1B$DV_2106,2023-06-16 21:00:00+00:00,2024-01-03 22:00:00+00:00,,,,
77,"Current depressive symptoms by sex, age and country of citizenship",HLTH_EHIS_MH1C$DV_2107,2023-06-16 21:00:00+00:00,2024-01-03 22:00:00+00:00,,,,
85,"Current depressive symptoms by sex, age and income quintile",HLTH_EHIS_MH1I$DV_464,2023-06-16 21:00:00+00:00,2024-01-03 22:00:00+00:00,,,,
142,"Time spent on health-enhancing (non-work-related) aerobic physical activity by sex, age and educational attainment level",HLTH_EHIS_PE2E$DV_300,2022-06-01 21:00:00+00:00,2024-01-03 22:00:00+00:00,,,,
144,"Time spent on health-enhancing (non-work-related) aerobic physical activity by sex, age and educational attainment level",HLTH_EHIS_PE2E$DV_301,2022-06-01 21:00:00+00:00,2024-01-03 22:00:00+00:00,,,,
...,...,...,...,...,...,...,...,...
7949,Victims of intentional homicide and sexual exploitation by age and sex,CRIM_HOM_VAGE$DV_2321,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,
7950,Victims of intentional homicide and sexual exploitation by age and sex,CRIM_HOM_VAGE$DV_2603,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,
7952,Intentional homicide victims by victim-offender relationship and sex,CRIM_HOM_VREL$DV_1645,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,
7959,Police-recorded offences by offence category,CRIM_OFF_CAT$DV_1401,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,


In [18]:
df_ds = df_ds[~df_ds['code'].str.contains('\\$DV', na = False)]

There are two types of naming:
1. {domain}_{specific area} as example LFSA = labour force survey + Annual
2. {domain}{number}, as example TIPSII40 = International investment position (tipsii) + number

In [19]:
df_ds[['domain','ds_name']] = df_ds['code'].apply(split_domain).apply(pd.Series)

In [20]:
df_ds['domain'] = df_ds['domain'].astype('category')

Duplicate dataset titles are still present, indicating that further analysis is needed to fully resolve and filter out all redundant entries.

In [21]:
df_title_duplicates = df_ds[df_ds.duplicated(subset='title',keep=False)]
df_title_duplicates.sort_values('title')

Unnamed: 0,title,code,last update of data,last table structure change,start_year,start_quater,end_year,end_quater,domain,ds_name
2488,"Active population by sex, age and citizenship (1 000)",LFSQ_AGAN,2025-03-25 22:00:00+00:00,2025-03-14 10:00:00+00:00,1998.0,Q1,2024.0,Q4,LFSQ,AGAN
3459,"Active population by sex, age and citizenship (1 000)",LFSA_AGAN,2025-04-14 21:00:00+00:00,2025-04-14 21:00:00+00:00,1995.0,,2024.0,,LFSA,AGAN
3458,"Active population by sex, age and educational attainment level (1 000)",LFSA_AGAED,2025-04-14 21:00:00+00:00,2025-04-14 21:00:00+00:00,1983.0,,2024.0,,LFSA,AGAED
2486,"Active population by sex, age and educational attainment level (1 000)",LFSQ_AGAED,2025-03-25 22:00:00+00:00,2025-03-14 10:00:00+00:00,1998.0,Q1,2024.0,Q4,LFSQ,AGAED
3464,"Activity rates by sex, age and citizenship (%)",LFSA_ARGAN,2025-04-14 21:00:00+00:00,2025-04-14 21:00:00+00:00,1995.0,,2024.0,,LFSA,ARGAN
...,...,...,...,...,...,...,...,...,...,...
3702,"Unemployment rates by sex, age and educational attainment level (%)",LFSA_URGAED,2025-04-14 21:00:00+00:00,2025-04-14 21:00:00+00:00,1983.0,,2024.0,,LFSA,URGAED
4788,Volume of passenger transport relative to GDP,TTR00001,2024-07-25 21:00:00+00:00,2025-01-23 22:00:00+00:00,2011.0,,2022.0,,TTR00001,
4485,Volume of passenger transport relative to GDP,TRAN_HV_PSTRA,2024-07-25 21:00:00+00:00,2024-07-25 21:00:00+00:00,1990.0,,2022.0,,TRAN,HV_PSTRA
417,Water use balance,MED_EN42,2020-05-28 21:00:00+00:00,2024-01-10 22:00:00+00:00,2005.0,,2018.0,,MED,EN42


In [22]:
df_same_name = df_title_duplicates.groupby(['ds_name'])['title'].count().reset_index(name='count')
df_same_name = df_same_name[df_same_name['count']>1]
exclude_from_duplicates = set(df_title_duplicates[df_title_duplicates['ds_name'].isin(df_same_name['ds_name'])].sort_values('ds_name')['code'])
df_title_duplicates['not_duplicate'] = df_title_duplicates['code'].apply(lambda x: x in exclude_from_duplicates )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_title_duplicates['not_duplicate'] = df_title_duplicates['code'].apply(lambda x: x in exclude_from_duplicates )


By looking at docs there are 2 types of statistics annual and quater.
Because this project about storing data the both of them are going to be stored in same area but in different files.

Additionally there are derived tables that have the same title, but was not marked properly.
* Volume of passenger transport relative to GDP 	TTR00001 	- derived
* Volume of passenger transport relative to GDP 	TRAN_HV_PSTRA

But at the same time there are data sets that start with TTR and not derived

In [23]:
df_title_duplicates[df_title_duplicates['not_duplicate']==False].sort_values('title')

Unnamed: 0,title,code,last update of data,last table structure change,start_year,start_quater,end_year,end_quater,domain,ds_name,not_duplicate
6197,Aggregate replacement ratio for pensions (excluding other social benefits) by sex,TESPN070,2025-04-30 09:00:00+00:00,2025-04-30 09:00:00+00:00,2010.0,,2024.0,,TESPN070,,False
4148,Aggregate replacement ratio for pensions (excluding other social benefits) by sex,ILC_PNP3,2025-04-30 09:00:00+00:00,2025-01-27 22:00:00+00:00,2010.0,,2024.0,,ILC,PNP3,False
3861,Air transport of freight by NUTS 2 region,TGS00078,2024-08-29 09:00:00+00:00,2025-01-23 22:00:00+00:00,2011.0,,2022.0,,TGS00078,,False
4909,Air transport of freight by NUTS 2 region,TRAN_R_AVGO_NM,2024-05-05 21:00:00+00:00,2024-08-29 09:00:00+00:00,1993.0,,2022.0,,TRAN,R_AVGO_NM,False
3859,Air transport of passengers by NUTS 2 region,TGS00077,2024-08-29 09:00:00+00:00,2025-01-23 22:00:00+00:00,2011.0,,2022.0,,TGS00077,,False
...,...,...,...,...,...,...,...,...,...,...,...
5913,Unemployment rate by sex,TEILM020,2025-05-05 21:00:00+00:00,2025-05-05 21:00:00+00:00,2024.0,05,2025.0,04,TEILM020,,False
4788,Volume of passenger transport relative to GDP,TTR00001,2024-07-25 21:00:00+00:00,2025-01-23 22:00:00+00:00,2011.0,,2022.0,,TTR00001,,False
4485,Volume of passenger transport relative to GDP,TRAN_HV_PSTRA,2024-07-25 21:00:00+00:00,2024-07-25 21:00:00+00:00,1990.0,,2022.0,,TRAN,HV_PSTRA,False
417,Water use balance,MED_EN42,2020-05-28 21:00:00+00:00,2024-01-10 22:00:00+00:00,2005.0,,2018.0,,MED,EN42,False


Looks like in general all dataset with underscore in their name contains raw or structured data.

Flat codes often contains pre-aggregated or simplified data.

So if the dataset have the same title, but one of entries no underscore then it is likely to be derived.
If both have underscode both saved
If no one have underscore they should be cheched.

In [24]:
def determine_duplicate(row, df : pd.DataFrame):
    if row['not_duplicate']:
        return True
    if '_' in row['code']:
        return True
    df_same_title : pd.DataFrame = df[df['title']==row['title']]
    # 
    has_underscore = df_same_title['code'].str.contains('_').any()
    if has_underscore:
        return False
    return True
    
df_title_duplicates['not_duplicate'] = df_title_duplicates.apply(lambda row: determine_duplicate(row,df_title_duplicates),axis=1)
df_title_duplicates[df_title_duplicates['not_duplicate']].count()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_title_duplicates['not_duplicate'] = df_title_duplicates.apply(lambda row: determine_duplicate(row,df_title_duplicates),axis=1)


title                          455
code                           455
last update of data            455
last table structure change    455
start_year                     455
start_quater                    83
end_year                       455
end_quater                      83
domain                         455
ds_name                        447
not_duplicate                  455
dtype: int64

In [25]:
df_title_duplicates[df_title_duplicates['not_duplicate']==False].sort_values('title')

Unnamed: 0,title,code,last update of data,last table structure change,start_year,start_quater,end_year,end_quater,domain,ds_name,not_duplicate
6197,Aggregate replacement ratio for pensions (excluding other social benefits) by sex,TESPN070,2025-04-30 09:00:00+00:00,2025-04-30 09:00:00+00:00,2010.0,,2024.0,,TESPN070,,False
3861,Air transport of freight by NUTS 2 region,TGS00078,2024-08-29 09:00:00+00:00,2025-01-23 22:00:00+00:00,2011.0,,2022.0,,TGS00078,,False
3859,Air transport of passengers by NUTS 2 region,TGS00077,2024-08-29 09:00:00+00:00,2025-01-23 22:00:00+00:00,2011.0,,2022.0,,TGS00077,,False
3838,Animal populations by NUTS 2 region,TGS00045,2025-03-27 22:00:00+00:00,2025-03-27 22:00:00+00:00,2013.0,,2024.0,,TGS00045,,False
3871,At-risk-of-poverty rate by NUTS 2 region,TGS00103,2025-04-30 09:00:00+00:00,2025-04-30 09:00:00+00:00,2013.0,,2024.0,,TGS00103,,False
3854,Available beds in hospitals by NUTS 2 region,TGS00064,2024-09-03 21:00:00+00:00,2025-01-23 22:00:00+00:00,2011.0,,2022.0,,TGS00064,,False
4075,Building permits - annual data,TIPSHO50,2025-05-05 09:00:00+00:00,2025-04-14 21:00:00+00:00,2005.0,,2024.0,,TIPSHO50,,False
5904,Building permits - monthly data,TEIIS550,2025-05-05 09:00:00+00:00,2025-04-15 09:00:00+00:00,2024.0,04,2025.0,03,TEIIS550,,False
5714,EMU convergence criterion series - annual data,TEC00097,2025-01-15 10:00:00+00:00,2025-01-15 10:00:00+00:00,2013.0,,2024.0,,TEC00097,,False
5770,Employment expectations indicator,TEIBS030,2025-04-29 09:00:00+00:00,2025-04-29 09:00:00+00:00,2024.0,05,2025.0,04,TEIBS030,,False


In [26]:
df_ds = df_ds[~df_ds['code'].isin(df_title_duplicates[df_title_duplicates['not_duplicate']==False])]

In [27]:
df_ds.describe(include='all')

Unnamed: 0,title,code,last update of data,last table structure change,start_year,start_quater,end_year,end_quater,domain,ds_name
count,7617,7617,7617,7617,7525.0,723,7525.0,827,7617,6834
unique,7345,7617,,,,20,,21,885,6635
top,Hourly earnings by economic activity and contractual working time (enterprises with 10 employed persons or more),POST_CUBE1_X$POST_DTR_1,,,,Q1,,Q4,ILC,10_F_BS
freq,4,1,,,,359,,305,512,4
mean,,,2022-05-11 01:20:49.021399296+00:00,2024-08-14 06:00:13.101746176+00:00,2005.721462,,2019.148704,,,
min,,,2009-03-26 10:00:00+00:00,2018-12-13 12:00:00+00:00,1947.0,,1983.0,,,
25%,,,2021-04-26 21:00:00+00:00,2024-01-03 22:00:00+00:00,2000.0,,2016.0,,,
50%,,,2024-12-12 22:00:00+00:00,2024-10-15 09:00:00+00:00,2007.0,,2022.0,,,
75%,,,2025-04-14 21:00:00+00:00,2025-03-28 10:00:00+00:00,2014.0,,2024.0,,,
max,,,2025-05-06 21:00:00+00:00,2025-05-06 09:00:00+00:00,2024.0,,2100.0,,,


In [28]:
df_ds['end_year'].unique()

array([2024., 2023., 2016., 2022., 2020., 2019., 2010., 2021., 2017.,
       2018., 2008., 1999., 2002., 2004., 2012., 2015., 2013., 2011.,
       2014., 2007., 2005., 2006., 2025., 2001., 2100., 2032., 2050.,
       2000., 1996., 1998., 2003., 2009., 1991.,   nan, 1983., 1984.,
       1997., 1995., 2026.])

# Check na values

In [29]:
(df_ds.isna().sum()/len(df_ds))*100

title                           0.000000
code                            0.000000
last update of data             0.000000
last table structure change     0.000000
start_year                      1.207825
start_quater                   90.508074
end_year                        1.207825
end_quater                     89.142707
domain                          0.000000
ds_name                        10.279638
dtype: float64

In [30]:
df_ds.isna().value_counts()

title  code   last update of data  last table structure change  start_year  start_quater  end_year  end_quater  domain  ds_name
False  False  False                False                        False       True          False     True        False   False      6132
                                                                                                                        True        566
                                                                            False         False     False       False   False       516
                                                                                                                        True        207
                                                                            True          False     False       False   False       104
                                                                True        True          True      True        False   False        82
                                                        

In [31]:
def setDomain(row):
    match = re.match(r"^([a-zA-Z]+)(_|-)?(.*)",row['code'])
    if match:
        gr2 = match.group(3)
        if gr2:
            return match.group(1),gr2
        else:
            return match.group(1), None
    return row['domain'],row['ds_name']

In [32]:
df_ds[['domain','ds_name']] = df_ds.apply(lambda row: pd.Series(setDomain(row)),axis=1)

In [33]:
df_ds.isna().value_counts()

title  code   last update of data  last table structure change  start_year  start_quater  end_year  end_quater  domain  ds_name
False  False  False                False                        False       True          False     True        False   False      6698
                                                                            False         False     False       False   False       723
                                                                            True          False     False       False   False       104
                                                                True        True          True      True        False   False        92
Name: count, dtype: int64

In [34]:
df_ds[df_ds[['start_year', 'start_quater', 'end_year', 'end_quater']].isna().all(axis=1)]

Unnamed: 0,title,code,last update of data,last table structure change,start_year,start_quater,end_year,end_quater,domain,ds_name
6024,Water resources: long-term annual average,TEN00001,2024-07-05 21:00:00+00:00,2024-07-05 21:00:00+00:00,,,,,TEN,00001
6654,Renewable freshwater resources - long term annual averages,ENV_WAT_LTAA,2024-07-05 21:00:00+00:00,2023-07-31 21:00:00+00:00,,,,,ENV,WAT_LTAA
7965,EU trade since 2017 by BEC/rev.5,DS-059329,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,,DS,059329
7966,EU trade since 2002 by BEC/rev.4,DS-059328,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,,DS,059328
7967,"Extra-EU trade since 2002 by mode of transport, by HS2-4-6",DS-059334,2025-04-23 09:00:00+00:00,2025-04-23 09:00:00+00:00,,,,,DS,059334
...,...,...,...,...,...,...,...,...,...,...
8060,Prices of letter mail and parcel services (USP under direct or indirect designation),POST_CUBE1_X$POST_PRI_1,2025-04-15 12:44:56+00:00,2025-04-15 12:44:56+00:00,,,,,POST,CUBE1_X$POST_PRI_1
8061,"Postal services falling under the universal service obligation (USP under direct or indirect designation, traffic)",POST_CUBE1_X$USO701,2025-04-15 12:44:56+00:00,2025-04-15 12:44:56+00:00,,,,,POST,CUBE1_X$USO701
8063,Number of enterprises providing postal services,POST_CUBE1_X$NUM701,2025-04-15 12:44:56+00:00,2025-04-15 12:44:56+00:00,,,,,POST,CUBE1_X$NUM701
8064,Access points (USP under direct or indirect designation ),POST_CUBE1_X$POST_ACC_1,2025-04-15 12:44:56+00:00,2025-04-15 12:44:56+00:00,,,,,POST,CUBE1_X$POST_ACC_1


Most of the datasets without any data about start and end are providing some summary analitical data, so they do not manupulate time period data. 

Start and end quater are available only if data set has any data period data.