# Nutrition in Germany from 1961-2013

1. Import packages, load csv file, explore csv file

In [1]:
import pandas as pd
import numpy as np

Ger_food = pd.read_csv('GER_FAOSTAT_food_consumption.csv')

In [2]:
Ger_food

Unnamed: 0,Domain Code,Domain,Country Code,Country,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2901,Grand Total,1961,1961,kcal/capita/day,2855,Fc,Calculated data
1,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2909,Sugar & Sweeteners,1961,1961,kcal/capita/day,336,Fc,Calculated data
2,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2914,Vegetable Oils,1961,1961,kcal/capita/day,252,Fc,Calculated data
3,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2918,Vegetables,1961,1961,kcal/capita/day,31,Fc,Calculated data
4,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2919,Fruits - Excluding Wine,1961,1961,kcal/capita/day,108,Fc,Calculated data
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2941,Animal Products,2013,2013,kcal/capita/day,1044,Fc,Calculated data
579,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2943,Meat,2013,2013,kcal/capita/day,353,Fc,Calculated data
580,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2946,Animal fats,2013,2013,kcal/capita/day,273,Fc,Calculated data
581,FBS,Food Balance Sheets,79,Germany,664,Food supply (kcal/capita/day),2948,Milk - Excluding Butter,2013,2013,kcal/capita/day,331,Fc,Calculated data


2. Drop columns which are not needed for visualisation

In [3]:
Ger_food = Ger_food.drop(['Domain Code', 'Domain', 'Country Code', 'Country', 'Element Code', 'Element', 'Item Code', 'Year Code', 'Flag', 'Flag Description'], axis=1)

In [4]:
Ger_food

Unnamed: 0,Item,Year,Unit,Value
0,Grand Total,1961,kcal/capita/day,2855
1,Sugar & Sweeteners,1961,kcal/capita/day,336
2,Vegetable Oils,1961,kcal/capita/day,252
3,Vegetables,1961,kcal/capita/day,31
4,Fruits - Excluding Wine,1961,kcal/capita/day,108
...,...,...,...,...
578,Animal Products,2013,kcal/capita/day,1044
579,Meat,2013,kcal/capita/day,353
580,Animal fats,2013,kcal/capita/day,273
581,Milk - Excluding Butter,2013,kcal/capita/day,331


3. Look at unique values in 'Items' column and transform them to columns to only have one year per row

In [5]:
Ger_food.Item.unique()

array(['Grand Total', 'Sugar & Sweeteners', 'Vegetable Oils',
       'Vegetables', 'Fruits - Excluding Wine', 'Alcoholic Beverages',
       'Animal Products', 'Meat', 'Animal fats',
       'Milk - Excluding Butter', 'Fish, Seafood'], dtype=object)

In [6]:
Ger_food.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 4 columns):
Item     583 non-null object
Year     583 non-null int64
Unit     583 non-null object
Value    583 non-null int64
dtypes: int64(2), object(2)
memory usage: 18.3+ KB


In [7]:
Ger_food_clean = Ger_food.pivot(index='Year', columns='Item', values='Value').rename(columns={"Fruits - Excluding Wine": "Fruits", "Milk - Excluding Butter": "Milk", "Grand Total": "Total kcal Consumption"})

In [8]:
Ger_food_clean

Item,Alcoholic Beverages,Animal Products,Animal fats,"Fish, Seafood",Fruits,Total kcal Consumption,Meat,Milk,Sugar & Sweeteners,Vegetable Oils,Vegetables
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1961,162,923,346,22,108,2855,262,232,336,252,31
1962,175,932,352,23,120,2915,262,236,333,270,32
1963,187,940,357,24,125,2899,261,239,338,260,36
1964,203,943,356,24,111,2983,270,231,351,270,33
1965,208,948,354,24,116,3012,276,233,338,280,32
1966,211,945,353,23,126,3012,279,226,353,269,37
1967,212,966,359,23,129,3004,286,233,343,272,36
1968,222,990,366,23,146,3062,303,231,365,272,34
1969,235,997,364,25,135,3043,305,233,358,263,36
1970,255,1028,370,24,132,3101,322,238,376,265,40


4. Change order of columns to first display general information, like 'Total kcal consumption' and 'Animal Products'

In [9]:
cols = list(Ger_food_clean.columns.values)
cols

['Alcoholic Beverages',
 'Animal Products',
 'Animal fats',
 'Fish, Seafood',
 'Fruits',
 'Total kcal Consumption',
 'Meat',
 'Milk',
 'Sugar & Sweeteners',
 'Vegetable Oils',
 'Vegetables']

In [10]:
Ger_food_clean = Ger_food_clean[['Total kcal Consumption','Animal Products','Alcoholic Beverages','Animal fats','Fish, Seafood','Fruits','Meat','Milk','Sugar & Sweeteners','Vegetable Oils','Vegetables']]
Ger_food_clean

Item,Total kcal Consumption,Animal Products,Alcoholic Beverages,Animal fats,"Fish, Seafood",Fruits,Meat,Milk,Sugar & Sweeteners,Vegetable Oils,Vegetables
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1961,2855,923,162,346,22,108,262,232,336,252,31
1962,2915,932,175,352,23,120,262,236,333,270,32
1963,2899,940,187,357,24,125,261,239,338,260,36
1964,2983,943,203,356,24,111,270,231,351,270,33
1965,3012,948,208,354,24,116,276,233,338,280,32
1966,3012,945,211,353,23,126,279,226,353,269,37
1967,3004,966,212,359,23,129,286,233,343,272,36
1968,3062,990,222,366,23,146,303,231,365,272,34
1969,3043,997,235,364,25,135,305,233,358,263,36
1970,3101,1028,255,370,24,132,322,238,376,265,40


In [46]:
Ger_food_clean.reset_index(level=Ger_food_clean.index.names, inplace=True)

In [48]:
Ger_food_clean.columns.value_counts()

Vegetable Oils            1
Vegetables                1
Alcoholic Beverages       1
Meat                      1
Fish, Seafood             1
Total kcal Consumption    1
Sugar & Sweeteners        1
Animal fats               1
Fruits                    1
Animal Products           1
Milk                      1
Year                      1
Name: Item, dtype: int64

# % of Overweight Population in Germany from 1975-2016

In [11]:
Ger_over = pd.read_csv('GER_overweight_1975_2016.csv')

In [12]:
Ger_over

Unnamed: 0.1,Unnamed: 0,2016,2016.1,2016.2,2015,2015.1,2015.2,2014,2014.1,2014.2,...,1978.2,1977,1977.1,1977.2,1976,1976.1,1976.2,1975,1975.1,1975.2
0,,"Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...",...,"Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr...","Prevalence of overweight among adults, BMI &Gr..."
1,,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years,...,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years,18+ years
2,Country,Both sexes,Male,Female,Both sexes,Male,Female,Both sexes,Male,Female,...,Female,Both sexes,Male,Female,Both sexes,Male,Female,Both sexes,Male,Female
3,Germany,62.8 [58.7-66.7],69.7 [63.7-75.2],56.2 [50.6-62.0],62.3 [58.4-66.0],69.3 [63.7-74.4],55.8 [50.5-61.1],61.9 [58.2-65.3],68.8 [63.7-73.6],55.3 [50.4-60.4],...,39.5 [34.5-44.8],41.6 [37.5-45.8],44.4 [38.3-50.6],39.2 [33.9-44.8],41.1 [36.8-45.6],43.7 [37.2-50.2],38.9 [33.3-44.8],40.6 [36.0-45.3],43.0 [36.1-49.9],38.6 [32.7-44.9]


In [13]:
Ger_over = Ger_over.drop([Ger_over.index[0] , Ger_over.index[1]])

In [14]:
Ger_over_t = Ger_over.transpose().reset_index()


Ger_over_t = Ger_over_t.rename(columns=Ger_over_t.iloc[0])
Ger_over_t.drop(Ger_over_t.index[:1], inplace=True)
Ger_over_t

Unnamed: 0.1,Unnamed: 0,Country,Germany
1,2016,Both sexes,62.8 [58.7-66.7]
2,2016.1,Male,69.7 [63.7-75.2]
3,2016.2,Female,56.2 [50.6-62.0]
4,2015,Both sexes,62.3 [58.4-66.0]
5,2015.1,Male,69.3 [63.7-74.4]
...,...,...,...
122,1976.1,Male,43.7 [37.2-50.2]
123,1976.2,Female,38.9 [33.3-44.8]
124,1975,Both sexes,40.6 [36.0-45.3]
125,1975.1,Male,43.0 [36.1-49.9]


In [15]:
Ger_over_t.reset_index(drop=True)
Ger_over_t.set_index('Unnamed: 0', inplace=True)

In [16]:
Ger_over_t = Ger_over_t.rename({"Country": "Gender", "Germany": "Percentage Overweight"}, axis=1)
Ger_over_t

Unnamed: 0_level_0,Gender,Percentage Overweight
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1
2016,Both sexes,62.8 [58.7-66.7]
2016.1,Male,69.7 [63.7-75.2]
2016.2,Female,56.2 [50.6-62.0]
2015,Both sexes,62.3 [58.4-66.0]
2015.1,Male,69.3 [63.7-74.4]
...,...,...
1976.1,Male,43.7 [37.2-50.2]
1976.2,Female,38.9 [33.3-44.8]
1975,Both sexes,40.6 [36.0-45.3]
1975.1,Male,43.0 [36.1-49.9]


In [17]:
B=Ger_over_t['Percentage Overweight'].str.replace(r"\[.*\]","")

In [18]:
Ger_over_t['Percentage Overweight']=B

In [19]:
Ger_over_t

Unnamed: 0_level_0,Gender,Percentage Overweight
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1
2016,Both sexes,62.8
2016.1,Male,69.7
2016.2,Female,56.2
2015,Both sexes,62.3
2015.1,Male,69.3
...,...,...
1976.1,Male,43.7
1976.2,Female,38.9
1975,Both sexes,40.6
1975.1,Male,43.0


In [20]:
Ger_over_t = Ger_over_t.rename_axis('Year')
Ger_over_t

Unnamed: 0_level_0,Gender,Percentage Overweight
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2016,Both sexes,62.8
2016.1,Male,69.7
2016.2,Female,56.2
2015,Both sexes,62.3
2015.1,Male,69.3
...,...,...
1976.1,Male,43.7
1976.2,Female,38.9
1975,Both sexes,40.6
1975.1,Male,43.0


In [21]:
Ger_over_t = Ger_over_t.reset_index()
Ger_over_t

Unnamed: 0,Year,Gender,Percentage Overweight
0,2016,Both sexes,62.8
1,2016.1,Male,69.7
2,2016.2,Female,56.2
3,2015,Both sexes,62.3
4,2015.1,Male,69.3
...,...,...,...
121,1976.1,Male,43.7
122,1976.2,Female,38.9
123,1975,Both sexes,40.6
124,1975.1,Male,43.0


In [22]:
Ger_over_t = Ger_over_t.pivot(index='Year', columns='Gender', values='Percentage Overweight')
Ger_over_t

Gender,Both sexes,Female,Male
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1975,40.6,,
1975.1,,,43.0
1975.2,,38.6,
1976,41.1,,
1976.1,,,43.7
...,...,...,...
2015.1,,,69.3
2015.2,,55.8,
2016,62.8,,
2016.1,,,69.7


In [23]:
del Ger_over_t['Female']
del Ger_over_t['Male']

In [26]:
Ger_over_t = Ger_over_t[pd.notnull(Ger_over_t['Both sexes'])]

In [28]:
Ger_over_t = Ger_over_t.rename(columns={"Both sexes": "Percentage Overweight"})
Ger_over_t

Gender,Percentage Overweight
Year,Unnamed: 1_level_1
1975,40.6
1976,41.1
1977,41.6
1978,42.1
1979,42.6
1980,43.1
1981,43.6
1982,44.1
1983,44.5
1984,45.0


In [49]:
Ger_over_t.reset_index(level=Ger_over_t.index.names, inplace=True)

In [56]:
c = Ger_over_t['Year'].astype('int64')

In [58]:
Ger_over_t['Year'] = c

In [59]:
Ger_over_t.dtypes

Gender
Year                      int64
Percentage Overweight    object
dtype: object

# Merge 'Ger_food_clean' and 'Ger_over_t'

In [61]:
#Ger_food_clean
#Ger_over_t

Ger_food_over = pd.merge(Ger_food_clean, Ger_over_t, how='inner', on=['Year'])

In [62]:
Ger_food_over

Unnamed: 0,Year,Total kcal Consumption,Animal Products,Alcoholic Beverages,Animal fats,"Fish, Seafood",Fruits,Meat,Milk,Sugar & Sweeteners,Vegetable Oils,Vegetables,Percentage Overweight
0,1975,3118,1046,281,348,27,127,373,220,390,269,41,40.6
1,1976,3190,1064,292,360,28,123,373,224,413,293,43,41.1
2,1977,3144,1044,289,342,26,117,377,219,391,292,45,41.6
3,1978,3258,1100,285,370,25,128,398,225,413,283,46,42.1
4,1979,3280,1119,292,377,26,127,404,233,420,279,47,42.6
5,1980,3278,1107,290,337,28,126,419,241,418,288,46,43.1
6,1981,3273,1098,300,330,27,104,419,240,424,304,49,43.6
7,1982,3337,1124,316,355,26,137,413,246,429,290,48,44.1
8,1983,3378,1184,306,396,27,119,408,270,424,285,47,44.5
9,1984,3391,1187,292,395,26,129,409,276,406,277,52,45.0
