# Function to desaggregate time series, i.e. if only annual data are available while quaterly is required

Desaggregation is performed for each combination category x region

In [None]:
def desaggregate_data(data, category, region):
    # Step 1: Select data for the specified category and region
    subset_employment_data = select_data(data, category, region)

    # Step 2: Merge employment and GDP data based on 'TIME'
    employment_gdp_merged = pd.merge(subset_employment_data, gdp, on='TIME', how='outer', suffixes=['_sample1', '_sample2'])

    # Step 3: Sort the merged data based on 'TIME' column
    employment_gdp_merged.sort_values('TIME', inplace=True)

    # Step 4: Reset the index
    employment_gdp_merged.reset_index(drop=True, inplace=True)

    # Step 5: Fill missing values with NaN
    employment_gdp_merged.fillna(value=np.nan, inplace=True)

    # Step 6: Forward fill missing values in specific columns
    employment_gdp_merged['OFFERS_PREDICTED'] = employment_gdp_merged['OFFERS_PREDICTED'].ffill()
    employment_gdp_merged['TRIMESTER'] = employment_gdp_merged['TRIMESTER'].ffill()
    employment_gdp_merged['YEAR'] = employment_gdp_merged['YEAR'].ffill()

    # Step 7: Calculate the sum of GDP for each trimester and year
    gdp_sum_trimester = employment_gdp_merged.groupby(['TRIMESTER', 'YEAR'])['GDP'].sum()
    employment_gdp_merged = employment_gdp_merged.merge(gdp_sum_trimester.rename('GDP_SUM'), on=['TRIMESTER', 'YEAR'])

    # Step 8: Forward fill missing values in 'GDP_SUM' column
    employment_gdp_merged['GDP_SUM'] = employment_gdp_merged['GDP_SUM'].ffill()

    # Step 9: Calculate desegregated offers
    employment_gdp_merged['OFFERS_NUM_DESAGG'] = employment_gdp_merged['OFFERS_PREDICTED'] * (employment_gdp_merged['GDP'] / employment_gdp_merged['GDP_SUM'])

    return employment_gdp_merged
