In [19]:
import pandas as pd
import faostat
import openpyxl

# Modeling Task
Analyze how food waste relates to each of the following three target nutrition indicators (with potential mediation by per-capita food supply):

- Dietary Energy Adequacy – The average dietary energy supply as a percentage of the population’s required energy needs.
- Prevalence of Undernourishment – The share of the population that is chronically undernourished (consuming less than the minimum dietary energy requirement).
- Prevalence of Obesity – The share of the adult population that is obese (BMI ≥ 30).
<br>Mediating Factor: Food Supply per Capita – The average food availability per person, measured in kilocalories per person per day.

Countries for Analysis
| Region/Type         | Country  | Rationale                                                  |
|---------------------|----------|-------------------------------------------------------------|
| EU (mandatory)      | Bulgaria | Transitioning food system, moderate waste issues            |
| High-income EU      | France   | Adequate diets, active waste-reduction policies             |
| High-income EU      | Germany  | High surplus, distinct obesity profile                      |
| Sub-Saharan Africa  | Nigeria  | High undernourishment, post-harvest losses                  |
| South Asia          | India    | Large undernourished population, growing obesity            |
| Latin America       | Brazil   | Rising obesity, emerging economy                            |
| North America       | USA      | Very high obesity & waste, low hunger                       |
| MENA                | Egypt    | High obesity, import-reliant, food insecurity               |
| Southeast Asia      | Vietnam  | Rapid dietary shift, changing waste patterns                |
| Latin America       | Mexico   | Obesity crisis, notable waste                               |


# Data download
Downloading the data from Faostat API

Datasets: <br>
- FBSH - Food Balances (-2013, old methodology and population)
- FBS - Food Balances (2010-)
- FS - Suite of Food Security Indicators - dependent variables are here

In [23]:
#faostat.list_datasets()

In [20]:
#faostat.list_pars('FBS'), faostat.list_pars('FBSH'), faostat.list_pars('FS')

In [21]:
#Obtain the code for required features
#faostat.get_par('FS', 'element'), faostat.get_par('FS', 'items'), faostat.get_par('FS', 'area'), faostat.get_par('FS', 'year3')

In [22]:
#faostat.get_par('FBSH', 'element'), faostat.get_par('FBSH', 'itemsagg'), faostat.get_par('FBSH', 'items'), faostat.get_par('FBSH', 'area')

In [24]:
#faostat.get_par('FBS', 'element'), faostat.get_par('FBS', 'itemsagg'), faostat.get_par('FBS', 'items'), faostat.get_par('FBS', 'area')

__Obtain the mediator variables__ <br>
_FBS (new method) and FBSH (old method)_ <br>
Items:
- 'Grand Total + (Total)': '2901',
- 'Vegetal Products + (Total)': '2903',
- 'Animal Products + (Total)': '2941'

Elements:
- 'Total Population - Both sexes': '511'
- 'Food supply quantity (kg/capita/yr)': '645',
- 'Food supply (kcal/capita/day)': '664',
- 'Protein supply quantity (g/capita/day)': '674'
- 'Fat supply quantity (g/capita/day)': '684'
- 'Losses': '2120'

_Food Security data_ <br>
Items: <br>
- 'Prevalence of undernourishment (percent)': '21004',
- 'Number of people undernourished (million)': '21001',
- 'Prevalence of obesity in the adult population (18 years and older) (percent)': '21042',
- 'Number of obese adults (18 years and older) (million)': '210420',
- 'Average dietary energy supply adequacy (percent) (3-year average)': '21010',
- 'Dietary energy supply used in the estimation of the prevalence of undernourishment (kcal/cap/day)': '220001',
- 'Dietary energy supply used in the estimation of the prevalence of undernourishment (kcal/cap/day) (3-year average)': '22000',
- 'Minimum dietary energy requirement  (kcal/cap/day)': '21056',
- 'Average dietary energy requirement (kcal/cap/day)': '21057' <br>

Elements: <br>
- 'Value': '6120'
- 'Confidence interval': '6210'

__Obtain the predictor__ <br>
Because of API limitations we need to make a separate call for Losses, however API doesn't return all the food items, so we download the data from https://www.fao.org/faostat/en/#data . In case you want to try with API below are the keys. <br>

Items:
- 'Vegetal Products + (Total)': '2903',
- 'Vegetal Products > (List)': '2903>',
- 'Animal Products + (Total)': '2941',
- 'Animal Products > (List)': '2941>'

Elements:
- Losses: 2120

In [45]:
def download_data(area):
    mypars_FBS = {'area': area,
              'element': [511, 2120, 645, 664, 674, 684, 2300],
              'item': [2903, 2901, 2941] ,
              'year': [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]}
    data_FBS = faostat.get_data_df('FBS', pars=mypars_FBS, strval=False)
    
    mypars_FBSH = {'area': area,
              'element': [511, 2120, 645, 664, 674, 684, 2300],
              'item': [2903, 2901, 2941] ,
              'year': [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013]}
    data_FBSH = faostat.get_data_df('FBSH', pars=mypars_FBSH, strval=False)

    mypars_FS = {'area': area,
              'element': [6120, 6210],
              'item': [21004, 21001, 21042, 210420, 21010, 220001, 22000, 21056, 21057] ,
              'year3': [20233, 20223, 20213, 20203, 20193, 20183, 20173, 20163, 20153, 20143, 20133, 20123, 20113, 20103, 20093, 20083, 20073, 20063, 20053, 20043, 20033, 20023, 20013, 20003]}
    data_FS = faostat.get_data_df('FS', pars=mypars_FS, strval=False)
    print("Data has been downloaded.")
    return data_FBS, data_FBSH, data_FS

In [47]:
# Setting area code for Germany - 79
data_FBS, data_FBSH, data_FS = download_data(area = 79)

Data has been downloaded.


In [51]:
data_FS.head(2)

Unnamed: 0,Domain Code,Domain,Area Code,Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value
0,FS,Suite of Food Security Indicators,79,Germany,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,136
1,FS,Suite of Food Security Indicators,79,Germany,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,137


In [15]:
data_FBSH['Element'].value_counts()

Element
Food supply (kcal/capita/day)             42
Protein supply quantity (g/capita/day)    42
Fat supply quantity (g/capita/day)        42
Name: count, dtype: int64

In [16]:
data_FS.head(2)

Unnamed: 0,Domain Code,Domain,Area Code,Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value
0,FS,Suite of Food Security Indicators,79,Germany,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,136
1,FS,Suite of Food Security Indicators,79,Germany,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,137


In [17]:
data_FBSH.head(2)

Unnamed: 0,Domain Code,Domain,Area Code,Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value
0,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,664,Food supply (kcal/capita/day),2901,Grand Total,2000,2000,kcal/cap/d,3336.0
1,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,664,Food supply (kcal/capita/day),2901,Grand Total,2001,2001,kcal/cap/d,3363.0


In [52]:
data_FBSH.dtypes

Domain Code      object
Domain           object
Area Code         int64
Area             object
Element Code      int64
Element          object
Item Code         int64
Item             object
Year Code         int64
Year             object
Unit             object
Value           float64
dtype: object

In [53]:
data_FBSH.shape, data_FBS.shape, data_FS.shape

((126, 12), (117, 12), (206, 12))

In [21]:
data_FBSH['Year'].unique(), data_FBS['Year'].unique(), data_FS['Year'].unique()

(array(['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007',
        '2008', '2009', '2010', '2011', '2012', '2013'], dtype=object),
 array(['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
        '2018', '2019', '2020', '2021', '2022'], dtype=object),
 array(['2000-2002', '2001-2003', '2002-2004', '2003-2005', '2004-2006',
        '2005-2007', '2006-2008', '2007-2009', '2008-2010', '2009-2011',
        '2010-2012', '2011-2013', '2012-2014', '2013-2015', '2014-2016',
        '2015-2017', '2016-2018', '2017-2019', '2018-2020', '2019-2021',
        '2020-2022', '2021-2023', '2000', '2001', '2002', '2003', '2004',
        '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012',
        '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020',
        '2021', '2022', '2023'], dtype=object))

In [22]:
data_FBSH.isna().sum(), data_FBS.isna().sum(), data_FS.isna().sum()

(Domain Code     0
 Domain          0
 Area Code       0
 Area            0
 Element Code    0
 Element         0
 Item Code       0
 Item            0
 Year Code       0
 Year            0
 Unit            0
 Value           0
 dtype: int64,
 Domain Code     0
 Domain          0
 Area Code       0
 Area            0
 Element Code    0
 Element         0
 Item Code       0
 Item            0
 Year Code       0
 Year            0
 Unit            0
 Value           0
 dtype: int64,
 Domain Code      0
 Domain           0
 Area Code        0
 Area             0
 Element Code     0
 Element          0
 Item Code        0
 Item             0
 Year Code        0
 Year             0
 Unit             0
 Value           22
 dtype: int64)

In [55]:
data_FS.loc[data_FS.isnull().any(axis=1)].head(2)

Unnamed: 0,Domain Code,Domain,Area Code,Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value
90,FS,Suite of Food Security Indicators,79,Germany,6132,Value,210011,Number of people undernourished (million) (3-y...,20002002,2000-2002,million No,
91,FS,Suite of Food Security Indicators,79,Germany,6132,Value,210011,Number of people undernourished (million) (3-y...,20012003,2001-2003,million No,


In [18]:
#Download the datasets
#data_FBSH.to_excel("data_FBSH_gr.xlsx", index=False)
#data_FBS.to_excel("data_FBS_gr.xlsx", index=False)
#data_FS.to_excel("data_FS_gr.xlsx", index=False)

# Data preprocessing
- Includes transposing the items/elements for easier processing
- Deduplication of data
- Calculation of Total losses per year
- Merging required columns into one dataset

## Details
- FS data has 46 unique Years (3 year + 1 year data) <br>

# Food item matching
The Losses Value is provided per different food Items in FAOSTAT, we need to summed those to get the Total Losses per Year.
We are using two data sources due to the methodology of research changing during the course of time. Thus we will compare two datasets(FBSH and FBS) and check which food Items per year don't match.

**Decision:** We found there are differences in food Items from each methodology (FBS and FBSH). We concluded that the production, and demand for foods can change overtime because of this we have calculated the Total Food Losses value by summing all food Items losses value, regardless if there is mismatch for each year.

In [24]:
#Load data as this cannot be pulled with API
##Compare two datasets and check which Items (column Item) per year 2011(column Year) don't match.
FBSH_losses = pd.read_csv('/Users/aysun/Documents/fao-nutrition/Germany data/FAOSTAT_data_old_gr.csv')
FBS_losses = pd.read_csv('/Users/aysun/Documents/fao-nutrition/Germany data/FAOSTAT_data_new_gr.csv')

In [25]:
FBSH_losses['Year'].unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013])

In [26]:
FBSH_losses.shape, FBS_losses.shape

((518, 14), (521, 15))

In [56]:
FBS_losses.head(2)

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (FBS),Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,FBS,Food Balances (2010-),276,Germany,5123,Losses,S2511,Wheat and products,2010,2010,1000 t,638,I,Imputed value,
1,FBS,Food Balances (2010-),276,Germany,5123,Losses,S2511,Wheat and products,2011,2011,1000 t,614,I,Imputed value,


In [57]:
FBSH_losses.head(2)

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (FBS),Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,S2511,Wheat and products,2000,2000,1000 t,521,I,Imputed value
1,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,S2511,Wheat and products,2001,2001,1000 t,550,I,Imputed value


In [29]:
FBSH_losses_2011 = FBSH_losses[FBSH_losses['Year'] == 2011]
FBS_losses_2011 = FBS_losses[FBS_losses['Year'] == 2011]

# Check items in df1 that don't match in df2
items_in_df1_not_in_df2 = FBSH_losses_2011[~FBSH_losses_2011['Item'].isin(FBS_losses_2011['Item'])][['Item','Item Code (FBS)']]

# Check items in df2 that don't match in df1
items_in_df2_not_in_df1 = FBS_losses_2011[~FBS_losses_2011['Item'].isin(FBSH_losses_2011['Item'])][['Item','Item Code (FBS)']]

# Output the results
items_in_df2_not_in_df1

Unnamed: 0,Item,Item Code (FBS)
14,Rice and products,S2807
71,"Cereals, other",S2520
184,Groundnuts,S2552


In [30]:
items_in_df1_not_in_df2

Unnamed: 0,Item,Item Code (FBS)
25,Rice (Milled Equivalent),S2805
95,"Cereals, Other",S2520
193,Groundnuts (Shelled Eq),S2556


# Deduplicate data
For some countries (example with Bulgaria) we noticed there are duplicated Items per year because of two reasons:
- The same Item has I (Imputed by receiver agency) and E (Estimated value) flag _(see below the definitions)_ - FBSH data, year 2011, S2949 Eggs, S2744	 Eggs
- Two Items with similar names, seemingly including the other - FBSH data, year 2011, S2911	Pulses, S2549	Pulses, Other and products
- **Decision**: Choose flag I, because all of the food items in FBS are with flag I, possibly the Estimated values aren't conclusive.

FAO definitions of flags I and E: <br>
<br>
__E - Estimated value__ - Observation obtained through an estimation methodology or based on the use of a limited amount of data (e.g. to produce a value at an early stage of the production stage while not all data are available). If needed, additional information can be provided through free text using the COMMENT_OBS attribute at the observation level or at a higher level (in SDMX-compliant environment). This code is also to be used when the estimation is done by a sender agency (and flagged as such). When the imputation is carried out by a receiver agency in order to replace or fill gaps in reported data series, the flag to use is I “Value imputed by a receiving agency”. <br>
__I - Value imputed by a receiving agency__ Observation imputed by a receiving agency to replace or fill gaps in reported data series. This code is intended to cover all cases where a receiving agency publishes data about a sending agency that do not come from an official source in the sender agency's reporting framework. When the estimation is done by the sender agency, the flag to use is E “Estimated value”.


In [31]:
FBSH_losses.loc[(FBSH_losses['Year'] == 2011) & (FBSH_losses['Item'] == 'Eggs')]

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (FBS),Item,Year Code,Year,Unit,Value,Flag,Flag Description
501,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,S2744,Eggs,2011,2011,1000 t,7,I,Imputed value


In [32]:
FBSH_losses['Flag'].value_counts()

Flag
I    518
Name: count, dtype: int64

In [33]:
FBS_losses['Flag'].value_counts()

Flag
I    521
Name: count, dtype: int64

In [34]:
df_sorted_FSBH = FBSH_losses.sort_values(by=['Year', 'Item', 'Flag'], ascending=[True, True, False])

# Drop duplicates, keeping the first (which will be 'I' if it exists)
FBSH_losses_d = df_sorted_FSBH.drop_duplicates(subset=['Year', 'Item'], keep='first')
FBSH_losses_d['Flag'].value_counts()

Flag
I    518
Name: count, dtype: int64

# Sum Losses per Item

In [58]:
def preprocessing(FBSH_losses_data, FBS_losses_data, data_FBS, data_FBSH, data_FS):
    df_sorted_FSBH = FBSH_losses.sort_values(by=['Year', 'Item', 'Flag'], ascending=[True, True, False])

    # Drop duplicates, keeping the first (which will be 'I' if it exists)
    FBSH_losses_d = df_sorted_FSBH.drop_duplicates(subset=['Year', 'Item'], keep='first')

    total_losses_per_year_FBSH = FBSH_losses_d.groupby('Year')['Value'].sum().reset_index()
    total_losses_per_year_FBSH = total_losses_per_year_FBSH.rename(columns={'Value': 'Total Losses (1000 t)'})
    
    total_losses_per_year_FBS = FBS_losses.groupby('Year')['Value'].sum().reset_index()
    total_losses_per_year_FBS = total_losses_per_year_FBS.rename(columns={'Value': 'Total Losses (1000 t)'})
    
    # Merge the total losses back to the original df based on Year
    FBSH_losses_sum = pd.merge(FBSH_losses_d, total_losses_per_year_FBSH, on='Year', how='left')
    FBS_losses_sum = pd.merge(FBS_losses, total_losses_per_year_FBS, on='Year', how='left')

    # Drop unnecessary columns
    FBSH_losses_sum_reduced = FBSH_losses_sum.drop(columns=['Item','Item Code (FBS)', 'Value', 'Flag', 'Flag Description',], errors='ignore')
    FBS_losses_sum_reduced = FBS_losses_sum.drop(columns=['Item','Item Code (FBS)', 'Value', 'Flag', 'Flag Description',], errors='ignore')
    
    # Drop duplicate years, keeping the first occurrence
    FBSH_losses_sum_reduced = FBSH_losses_sum_reduced.drop_duplicates(subset='Year')
    FBS_losses_sum_reduced = FBS_losses_sum_reduced.drop_duplicates(subset='Year')

    # Prep final datasets
    # Pivot the FS table
    pivoted_FS = data_FS.pivot_table(
        index=['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year'],
        columns='Item',
        values='Value',
        aggfunc='first'
    ).reset_index()

    #Pivot the FBSH table
    # Merge Total Losses (1000 t) to the rest of the data
    data_FBSH['Year'] = data_FBSH['Year'].astype(int)
    data_FBSH = pd.merge( data_FBSH, FBSH_losses_sum_reduced[['Year', 'Total Losses (1000 t)']], on='Year', how='left')
    
    # Create a combined column for Element + Item
    data_FBSH['Element_Item'] = data_FBSH['Element'] + "_" + data_FBSH['Item']
    
    # Pivot the table
    pivoted_FBSH = data_FBSH.pivot_table(
        index=['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year', 'Total Losses (1000 t)'],
        columns='Element_Item',
        values='Value',
        aggfunc='first'
    ).reset_index()

    #Pivot the FBS table
    # Merge Total Losses (1000 t) to the rest of the data
    data_FBS['Year'] = data_FBS['Year'].astype(int)
    data_FBS = pd.merge( data_FBS, FBS_losses_sum_reduced[['Year', 'Total Losses (1000 t)']], on='Year', how='left')
    
    # Create a combined column for Element + Item
    data_FBS['Element_Item'] = data_FBS['Element'] + "_" + data_FBS['Item']
    
    # Pivot the table
    pivoted_FBS = data_FBS.pivot_table(
        index=['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year', 'Total Losses (1000 t)'],
        columns='Element_Item',
        values='Value',
        aggfunc='first'
    ).reset_index()

    return pivoted_FS, pivoted_FBS, pivoted_FBSH

In [61]:
fs, fbs, fbsh = preprocessing(FBSH_losses, FBS_losses, data_FBS, data_FBSH, data_FS)

In [65]:
fbs.shape, fs.shape, fbsh.shape

((13, 16), (46, 14), (14, 16))

In [35]:
total_losses_per_year_FBSH = FBSH_losses_d.groupby('Year')['Value'].sum().reset_index()
total_losses_per_year_FBSH = total_losses_per_year_FBSH.rename(columns={'Value': 'Total Losses (1000 t)'})

total_losses_per_year_FBS = FBS_losses.groupby('Year')['Value'].sum().reset_index()
total_losses_per_year_FBS = total_losses_per_year_FBS.rename(columns={'Value': 'Total Losses (1000 t)'})

# Merge the total losses back to the original df based on Year
FBSH_losses_sum = pd.merge(FBSH_losses_d, total_losses_per_year_FBSH, on='Year', how='left')
FBS_losses_sum = pd.merge(FBS_losses, total_losses_per_year_FBS, on='Year', how='left')

In [36]:
FBS_losses_sum.head(2)

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (FBS),Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note,Total Losses (1000 t)
0,FBS,Food Balances (2010-),276,Germany,5123,Losses,S2511,Wheat and products,2010,2010,1000 t,638,I,Imputed value,,3510
1,FBS,Food Balances (2010-),276,Germany,5123,Losses,S2511,Wheat and products,2011,2011,1000 t,614,I,Imputed value,,3756


In [33]:
FBSH_losses_sum.head(2)

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (FBS),Item,Year Code,Year,Unit,Value,Flag,Flag Description,Total Losses (1000 t)
0,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,S2617,Apples and products,2000,2000,1000 t,212,I,Imputed value,4021
1,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,S2615,Bananas,2000,2000,1000 t,10,I,Imputed value,4021


In [13]:
# Drop unnecessary columns
FBSH_losses_sum_reduced = FBSH_losses_sum.drop(columns=['Item','Item Code (FBS)', 'Value', 'Flag', 'Flag Description',], errors='ignore')
FBS_losses_sum_reduced = FBS_losses_sum.drop(columns=['Item','Item Code (FBS)', 'Value', 'Flag', 'Flag Description',], errors='ignore')

# Drop duplicate years, keeping the first occurrence
FBSH_losses_sum_reduced = FBSH_losses_sum_reduced.drop_duplicates(subset='Year')
FBS_losses_sum_reduced = FBS_losses_sum_reduced.drop_duplicates(subset='Year')

In [14]:
FBS_losses_sum_reduced

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Year Code,Year,Unit,Note,Total Losses (1000 t)
0,FBS,Food Balances (2010-),276,Germany,5123,Losses,2010,2010,1000 t,,3510
1,FBS,Food Balances (2010-),276,Germany,5123,Losses,2011,2011,1000 t,,3756
2,FBS,Food Balances (2010-),276,Germany,5123,Losses,2012,2012,1000 t,,3748
3,FBS,Food Balances (2010-),276,Germany,5123,Losses,2013,2013,1000 t,,3797
4,FBS,Food Balances (2010-),276,Germany,5123,Losses,2014,2014,1000 t,,5075
5,FBS,Food Balances (2010-),276,Germany,5123,Losses,2015,2015,1000 t,,5120
6,FBS,Food Balances (2010-),276,Germany,5123,Losses,2016,2016,1000 t,,5498
7,FBS,Food Balances (2010-),276,Germany,5123,Losses,2017,2017,1000 t,,6159
8,FBS,Food Balances (2010-),276,Germany,5123,Losses,2018,2018,1000 t,,4421
9,FBS,Food Balances (2010-),276,Germany,5123,Losses,2019,2019,1000 t,,4790


In [36]:
FBSH_losses_sum_reduced

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Year Code,Year,Unit,Total Losses (1000 t)
0,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2000,2000,1000 t,4021
37,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2001,2001,1000 t,3651
74,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2002,2002,1000 t,3228
111,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2003,2003,1000 t,3061
148,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2004,2004,1000 t,3493
185,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2005,2005,1000 t,3334
222,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2006,2006,1000 t,3377
259,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2007,2007,1000 t,3757
296,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2008,2008,1000 t,3750
333,FBSH,"Food Balances (-2013, old methodology and popu...",276,Germany,5123,Losses,2009,2009,1000 t,3862


In [37]:
#FBSH_losses_sum.to_excel("data_FBSH_final.xlsx", index=False)
#FBS_losses_sum.to_excel("data_FBS_final.xlsx", index=False)

# Transpose data

In [28]:
# Pivot the table
pivoted_FS = data_FS.pivot_table(
    index=['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year'],
    columns='Item',
    values='Value',
    aggfunc='first'
).reset_index()

In [29]:
pivoted_FS.head(2)

Item,Domain Code,Domain,Area Code,Area,Year Code,Year,Average dietary energy requirement (kcal/cap/day),Average dietary energy supply adequacy (percent) (3-year average),Dietary energy supply used in the estimation of the prevalence of undernourishment (kcal/cap/day),Dietary energy supply used in the estimation of the prevalence of undernourishment (kcal/cap/day) (3-year average),Minimum dietary energy requirement (kcal/cap/day),Number of obese adults (18 years and older) (million),Prevalence of obesity in the adult population (18 years and older) (percent),Prevalence of undernourishment (percent) (3-year average)
0,FS,Suite of Food Security Indicators,79,Germany,2000,2000,2542,,3423,,1957,11.8,17.8,
1,FS,Suite of Food Security Indicators,79,Germany,2001,2001,2542,,3451,,1957,12.1,18.2,


In [40]:
pivoted_FS.shape, pivoted_FS.columns

((46, 14),
 Index(['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year',
        'Average dietary energy requirement (kcal/cap/day)',
        'Average dietary energy supply adequacy (percent) (3-year average)',
        'Dietary energy supply used in the estimation of the prevalence of undernourishment (kcal/cap/day)',
        'Dietary energy supply used in the estimation of the prevalence of undernourishment (kcal/cap/day) (3-year average)',
        'Minimum dietary energy requirement  (kcal/cap/day)',
        'Number of obese adults (18 years and older) (million)',
        'Prevalence of obesity in the adult population (18 years and older) (percent)',
        'Prevalence of undernourishment (percent) (3-year average)'],
       dtype='object', name='Item'))

In [17]:
# Merge Total Losses (1000 t) to the rest of the data
data_FBSH['Year'] = data_FBSH['Year'].astype(int)
data_FBSH = pd.merge( data_FBSH, FBSH_losses_sum_reduced[['Year', 'Total Losses (1000 t)']], on='Year', how='left')

# Create a combined column for Element + Item
data_FBSH['Element_Item'] = data_FBSH['Element'] + "_" + data_FBSH['Item']

# Pivot the table
pivoted_FBSH = data_FBSH.pivot_table(
    index=['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year', 'Total Losses (1000 t)'],
    columns='Element_Item',
    values='Value',
    aggfunc='first'
).reset_index()

In [18]:
pivoted_FBSH.head(2)

Element_Item,Domain Code,Domain,Area Code,Area,Year Code,Year,Total Losses (1000 t),Fat supply quantity (g/capita/day)_Animal Products,Fat supply quantity (g/capita/day)_Grand Total,Fat supply quantity (g/capita/day)_Vegetal Products,Food supply (kcal/capita/day)_Animal Products,Food supply (kcal/capita/day)_Grand Total,Food supply (kcal/capita/day)_Vegetal Products,Protein supply quantity (g/capita/day)_Animal Products,Protein supply quantity (g/capita/day)_Grand Total,Protein supply quantity (g/capita/day)_Vegetal Products
0,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2000,2000,4021,79.72,142.8,63.08,1012.0,3336.0,2324.0,56.48,94.46,37.98
1,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2001,2001,3651,80.39,137.13,56.74,1029.0,3363.0,2334.0,58.13,97.86,39.73


In [19]:
pivoted_FBSH.isna().sum()

Element_Item
Domain Code                                                0
Domain                                                     0
Area Code                                                  0
Area                                                       0
Year Code                                                  0
Year                                                       0
Total Losses (1000 t)                                      0
Fat supply quantity (g/capita/day)_Animal Products         0
Fat supply quantity (g/capita/day)_Grand Total             0
Fat supply quantity (g/capita/day)_Vegetal Products        0
Food supply (kcal/capita/day)_Animal Products              0
Food supply (kcal/capita/day)_Grand Total                  0
Food supply (kcal/capita/day)_Vegetal Products             0
Protein supply quantity (g/capita/day)_Animal Products     0
Protein supply quantity (g/capita/day)_Grand Total         0
Protein supply quantity (g/capita/day)_Vegetal Products    0
dtype: int6

In [30]:
pivoted_FS.isna().sum()

Item
Domain Code                                                                                                            0
Domain                                                                                                                 0
Area Code                                                                                                              0
Area                                                                                                                   0
Year Code                                                                                                              0
Year                                                                                                                   0
Average dietary energy requirement (kcal/cap/day)                                                                     22
Average dietary energy supply adequacy (percent) (3-year average)                                                     24
Dietary energy supply used 

In [31]:
pivoted_FS.loc[pivoted_FS.isnull().any(axis=1)]

Item,Domain Code,Domain,Area Code,Area,Year Code,Year,Average dietary energy requirement (kcal/cap/day),Average dietary energy supply adequacy (percent) (3-year average),Dietary energy supply used in the estimation of the prevalence of undernourishment (kcal/cap/day),Dietary energy supply used in the estimation of the prevalence of undernourishment (kcal/cap/day) (3-year average),Minimum dietary energy requirement (kcal/cap/day),Number of obese adults (18 years and older) (million),Prevalence of obesity in the adult population (18 years and older) (percent),Prevalence of undernourishment (percent) (3-year average)
0,FS,Suite of Food Security Indicators,79,Germany,2000,2000,2542.0,,3423.0,,1957.0,11.8,17.8,
1,FS,Suite of Food Security Indicators,79,Germany,2001,2001,2542.0,,3451.0,,1957.0,12.1,18.2,
2,FS,Suite of Food Security Indicators,79,Germany,2002,2002,2543.0,,3517.0,,1957.0,12.3,18.6,
3,FS,Suite of Food Security Indicators,79,Germany,2003,2003,2545.0,,3480.0,,1959.0,12.6,19.0,
4,FS,Suite of Food Security Indicators,79,Germany,2004,2004,2546.0,,3503.0,,1959.0,12.9,19.3,
5,FS,Suite of Food Security Indicators,79,Germany,2005,2005,2548.0,,3544.0,,1961.0,13.1,19.6,
6,FS,Suite of Food Security Indicators,79,Germany,2006,2006,2551.0,,3562.0,,1963.0,13.3,19.9,
7,FS,Suite of Food Security Indicators,79,Germany,2007,2007,2553.0,,3627.0,,1964.0,13.5,20.1,
8,FS,Suite of Food Security Indicators,79,Germany,2008,2008,2554.0,,3617.0,,1964.0,13.6,20.3,
9,FS,Suite of Food Security Indicators,79,Germany,2009,2009,2555.0,,3614.0,,1965.0,13.7,20.4,


In [22]:
# Merge Total Losses (1000 t) to the rest of the data
data_FBS['Year'] = data_FBS['Year'].astype(int)
data_FBS = pd.merge( data_FBS, FBS_losses_sum_reduced[['Year', 'Total Losses (1000 t)']], on='Year', how='left')

# Create a combined column for Element + Item
data_FBS['Element_Item'] = data_FBS['Element'] + "_" + data_FBS['Item']

# Pivot the table
pivoted_FBS = data_FBS.pivot_table(
    index=['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year', 'Total Losses (1000 t)'],
    columns='Element_Item',
    values='Value',
    aggfunc='first'
).reset_index()

In [23]:
pivoted_FBS.head(2)

Element_Item,Domain Code,Domain,Area Code,Area,Year Code,Year,Total Losses (1000 t),Fat supply quantity (g/capita/day)_Animal Products,Fat supply quantity (g/capita/day)_Grand Total,Fat supply quantity (g/capita/day)_Vegetal Products,Food supply (kcal/capita/day)_Animal Products,Food supply (kcal/capita/day)_Grand Total,Food supply (kcal/capita/day)_Vegetal Products,Protein supply quantity (g/capita/day)_Animal Products,Protein supply quantity (g/capita/day)_Grand Total,Protein supply quantity (g/capita/day)_Vegetal Products
0,FBS,Food Balances (2010-),79,Germany,2010,2010,3510,82.03,147.99,65.97,1114.0,3596.0,2482.0,69.69,109.51,39.82
1,FBS,Food Balances (2010-),79,Germany,2011,2011,3756,83.13,149.14,66.01,1124.0,3627.0,2502.0,69.88,110.49,40.61


In [32]:
pivoted_FBS.shape, pivoted_FBS.columns, pivoted_FBSH.shape, pivoted_FBSH.columns, pivoted_FS.shape, pivoted_FS.columns

((13, 16),
 Index(['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year',
        'Total Losses (1000 t)',
        'Fat supply quantity (g/capita/day)_Animal Products',
        'Fat supply quantity (g/capita/day)_Grand Total',
        'Fat supply quantity (g/capita/day)_Vegetal Products',
        'Food supply (kcal/capita/day)_Animal Products',
        'Food supply (kcal/capita/day)_Grand Total',
        'Food supply (kcal/capita/day)_Vegetal Products',
        'Protein supply quantity (g/capita/day)_Animal Products',
        'Protein supply quantity (g/capita/day)_Grand Total',
        'Protein supply quantity (g/capita/day)_Vegetal Products'],
       dtype='object', name='Element_Item'),
 (14, 16),
 Index(['Domain Code', 'Domain', 'Area Code', 'Area', 'Year Code', 'Year',
        'Total Losses (1000 t)',
        'Fat supply quantity (g/capita/day)_Animal Products',
        'Fat supply quantity (g/capita/day)_Grand Total',
        'Fat supply quantity (g/capita/day)_Vegeta

In [25]:
#Download the edited datasets
#pivoted_FBSH.to_excel("data_FBSH_gr_edited.xlsx", index=False)
#pivoted_FBS.to_excel("data_FBS_gr_edited.xlsx", index=False)
#pivoted_FS.to_excel("data_FS_gr_edited.xlsx", index=False)

NameError: name 'pivoted_FS' is not defined

In [26]:
pivoted_FBSH

Element_Item,Domain Code,Domain,Area Code,Area,Year Code,Year,Total Losses (1000 t),Fat supply quantity (g/capita/day)_Animal Products,Fat supply quantity (g/capita/day)_Grand Total,Fat supply quantity (g/capita/day)_Vegetal Products,Food supply (kcal/capita/day)_Animal Products,Food supply (kcal/capita/day)_Grand Total,Food supply (kcal/capita/day)_Vegetal Products,Protein supply quantity (g/capita/day)_Animal Products,Protein supply quantity (g/capita/day)_Grand Total,Protein supply quantity (g/capita/day)_Vegetal Products
0,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2000,2000,4021,79.72,142.8,63.08,1012.0,3336.0,2324.0,56.48,94.46,37.98
1,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2001,2001,3651,80.39,137.13,56.74,1029.0,3363.0,2334.0,58.13,97.86,39.73
2,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2002,2002,3228,81.93,139.04,57.11,1048.0,3427.0,2379.0,58.1,99.0,40.89
3,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2003,2003,3061,82.33,139.19,56.85,1063.0,3390.0,2328.0,59.55,97.95,38.4
4,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2004,2004,3493,79.77,138.58,58.81,1037.0,3411.0,2375.0,58.2,96.73,38.53
5,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2005,2005,3334,80.55,138.96,58.41,1047.0,3450.0,2403.0,58.35,96.96,38.61
6,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2006,2006,3377,79.37,140.72,61.34,1045.0,3466.0,2421.0,59.7,99.18,39.48
7,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2007,2007,3757,82.33,146.0,63.67,1084.0,3527.0,2443.0,62.0,102.49,40.5
8,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2008,2008,3750,80.24,144.11,63.87,1061.0,3517.0,2456.0,61.38,101.28,39.89
9,FBSH,"Food Balances (-2013, old methodology and popu...",79,Germany,2009,2009,3862,80.52,142.81,62.29,1074.0,3515.0,2441.0,62.34,102.56,40.22


# Moving average processing

In [None]:
final_data_gr = pd.read_csv("")

In [38]:
data_FS.head(2)

Unnamed: 0,Domain Code,Domain,Area Code,Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value
0,FS,Suite of Food Security Indicators,79,Germany,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,136
1,FS,Suite of Food Security Indicators,79,Germany,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,137


# Data exploration