In [1]:
import pandas as pd
import numpy as np

In [2]:
df_raw = pd.read_excel("Table Ciqual 2020_ENG_2020 07 07.xls", decimal=",")

In [3]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3186 entries, 0 to 3185
Data columns (total 76 columns):
 #   Column                                              Non-Null Count  Dtype 
---  ------                                              --------------  ----- 
 0   alim_grp_code                                       3186 non-null   int64 
 1   alim_ssgrp_code                                     3186 non-null   int64 
 2   alim_ssssgrp_code                                   3186 non-null   int64 
 3   alim_grp_nom_eng                                    3141 non-null   object
 4   alim_ssgrp_nom_eng                                  3141 non-null   object
 5   alim_ssssgrp_nom_eng                                3141 non-null   object
 6   alim_code                                           3186 non-null   int64 
 7   alim_nom_eng                                        3186 non-null   object
 8   alim_nom_sci                                        270 non-null    object
 9   Energy, 

In [4]:
df_selected = df_raw[["alim_grp_nom_eng", "alim_ssgrp_nom_eng","alim_ssssgrp_nom_eng", "alim_nom_eng", "Energy, N x Jones' factor, with fibres (kcal/100g)", 
                      "Protein (g/100g)", "Carbohydrate (g/100g)", "Fat (g/100g)", 'Fibres (g/100g)',
                      'FA 18:2 9c,12c (n-6) (g/100g)', 'FA 18:3 c9,c12,c15 (n-3) (g/100g)', 'FA 20:5 5c,8c,11c,14c,17c (n-3) EPA (g/100g)', 'FA 22:6 4c,7c,10c,13c,16c,19c (n-3) DHA (g/100g)',
                      'Salt (g/100g)', 'Calcium (mg/100g)',
       'Chloride (mg/100g)', 'Copper (mg/100g)', 'Iron (mg/100g)',
       'Iodine (µg/100g)', 'Magnesium (mg/100g)', 'Manganese (mg/100g)',
       'Phosphorus (mg/100g)', 'Potassium (mg/100g)', 'Selenium (µg/100g)',
       'Sodium (mg/100g)', 'Zinc (mg/100g)', 'Retinol (µg/100g)',
       'Beta-carotene (µg/100g)', 'Vitamin D (µg/100g)', 'Vitamin E (mg/100g)',
       'Vitamin K1 (µg/100g)', 'Vitamin K2 (µg/100g)', 'Vitamin C (mg/100g)',
       'Vitamin B1 or Thiamin (mg/100g)', 'Vitamin B2 or Riboflavin (mg/100g)',
       'Vitamin B3 or Niacin (mg/100g)',
       'Vitamin B5 or Pantothenic acid (mg/100g)', 'Vitamin B6 (mg/100g)',
       'Vitamin B9 or Folate (µg/100g)', 'Vitamin B12 (µg/100g)']]

In [5]:
df_selected.columns

Index(['alim_grp_nom_eng', 'alim_ssgrp_nom_eng', 'alim_ssssgrp_nom_eng',
       'alim_nom_eng', 'Energy, N x Jones' factor, with fibres (kcal/100g)',
       'Protein (g/100g)', 'Carbohydrate (g/100g)', 'Fat (g/100g)',
       'Fibres (g/100g)', 'FA 18:2 9c,12c (n-6) (g/100g)',
       'FA 18:3 c9,c12,c15 (n-3) (g/100g)',
       'FA 20:5 5c,8c,11c,14c,17c (n-3) EPA (g/100g)',
       'FA 22:6 4c,7c,10c,13c,16c,19c (n-3) DHA (g/100g)', 'Salt (g/100g)',
       'Calcium (mg/100g)', 'Chloride (mg/100g)', 'Copper (mg/100g)',
       'Iron (mg/100g)', 'Iodine (µg/100g)', 'Magnesium (mg/100g)',
       'Manganese (mg/100g)', 'Phosphorus (mg/100g)', 'Potassium (mg/100g)',
       'Selenium (µg/100g)', 'Sodium (mg/100g)', 'Zinc (mg/100g)',
       'Retinol (µg/100g)', 'Beta-carotene (µg/100g)', 'Vitamin D (µg/100g)',
       'Vitamin E (mg/100g)', 'Vitamin K1 (µg/100g)', 'Vitamin K2 (µg/100g)',
       'Vitamin C (mg/100g)', 'Vitamin B1 or Thiamin (mg/100g)',
       'Vitamin B2 or Riboflavin (mg/100g)', 

In [6]:
# remove values not available at https://kwk.blv.admin.ch/naehrstofftabelle-de/nutrientsOverview.php?categoryId=18years&genderId=male
# fix data issues
# set nans and empty values to 0.0
df_reduced = df_selected.drop(["Salt (g/100g)", "Vitamin K2 (µg/100g)"], axis=1).apply(lambda x: x.str.replace(',','.').str.strip("< > + -").replace("traces", "").replace("", "0.0").replace(np.nan, "0.0"))

In [7]:
# compute Vitamin A content
df_reduced["Vitamin A (µg/100g)"] = df_reduced["Retinol (µg/100g)"].astype("float") + 1/6 * df_reduced["Beta-carotene (µg/100g)"].astype("float")
df_vitA = df_reduced.drop(["Retinol (µg/100g)", "Beta-carotene (µg/100g)"], axis=1)

In [8]:
df_final = df_vitA.rename(columns={"alim_grp_nom_eng": "Group", 
                                   "alim_ssgrp_nom_eng": "Subgroup", 
                                   "alim_ssssgrp_nom_eng": "Subsubgroup",
                                   "alim_nom_eng": "Name", 
                                   "Energy, N x Jones' factor, with fibres (kcal/100g)": "Energy (kcal/100g)", 
                                   "FA 18:2 9c,12c (n-6) (g/100g)" : "LA (g/100g)", 
                                   "FA 18:3 c9,c12,c15 (n-3) (g/100g)" : "ALA (g/100g)",
                                   "FA 20:5 5c,8c,11c,14c,17c (n-3) EPA (g/100g)" : "EPA (g/100g)",
                                   "FA 22:6 4c,7c,10c,13c,16c,19c (n-3) DHA (g/100g)" : "DHA (g/100g)"})

In [9]:
df_final.head()

Unnamed: 0,Group,Subgroup,Subsubgroup,Name,Energy (kcal/100g),Protein (g/100g),Carbohydrate (g/100g),Fat (g/100g),Fibres (g/100g),LA (g/100g),...,Vitamin K1 (µg/100g),Vitamin C (mg/100g),Vitamin B1 or Thiamin (mg/100g),Vitamin B2 or Riboflavin (mg/100g),Vitamin B3 or Niacin (mg/100g),Vitamin B5 or Pantothenic acid (mg/100g),Vitamin B6 (mg/100g),Vitamin B9 or Folate (µg/100g),Vitamin B12 (µg/100g),Vitamin A (µg/100g)
0,0.0,0.0,0.0,Desert (average),0.0,4.63,36.6,12.9,1.54,1.32,...,0.0,1.37,0.084,0.15,0.61,0.4,0.056,30.8,0.21,62.066667
1,starters and dishes,mixed salads,0.0,Prepared mixed tuna and vegetable salad. canned,0.0,9.15,7.74,4.7,2.7,1.15,...,0.0,2.75,0.04,0.053,4.45,0.16,0.29,31.0,1.45,2.0
2,starters and dishes,mixed salads,0.0,Prepared mixed meat/fish canned. salad,0.0,8.06,6.4,5.3,2.0,1.08,...,9.75,0.0,0.032,0.022,4.13,0.2,0.12,11.1,1.23,151.166667
3,starters and dishes,mixed salads,0.0,Greek-style marinated mushrooms. prepacked,0.0,2.08,3.95,3.55,2.35,0.0,...,0.0,6.67,0.056,0.21,1.84,0.88,0.088,19.6,0.018,124.5
4,starters and dishes,mixed salads,0.0,Prepared potatoes salad. home-made,0.0,2.68,9.9,8.2,1.3,0.0,...,0.0,10.0,0.077,0.06,0.89,0.53,0.14,7.0,0.0,35.0


In [10]:
df_final.to_csv("ciqual_2020.csv", header=True, index=False)

In [11]:
df_empty = df_final[0:0]

In [12]:
df_empty = df_empty.drop(["Group", "Subgroup", "Subsubgroup", "Name"], axis=1)

In [13]:
df_empty.columns

Index(['Energy (kcal/100g)', 'Protein (g/100g)', 'Carbohydrate (g/100g)',
       'Fat (g/100g)', 'Fibres (g/100g)', 'LA (g/100g)', 'ALA (g/100g)',
       'EPA (g/100g)', 'DHA (g/100g)', 'Calcium (mg/100g)',
       'Chloride (mg/100g)', 'Copper (mg/100g)', 'Iron (mg/100g)',
       'Iodine (µg/100g)', 'Magnesium (mg/100g)', 'Manganese (mg/100g)',
       'Phosphorus (mg/100g)', 'Potassium (mg/100g)', 'Selenium (µg/100g)',
       'Sodium (mg/100g)', 'Zinc (mg/100g)', 'Vitamin D (µg/100g)',
       'Vitamin E (mg/100g)', 'Vitamin K1 (µg/100g)', 'Vitamin C (mg/100g)',
       'Vitamin B1 or Thiamin (mg/100g)', 'Vitamin B2 or Riboflavin (mg/100g)',
       'Vitamin B3 or Niacin (mg/100g)',
       'Vitamin B5 or Pantothenic acid (mg/100g)', 'Vitamin B6 (mg/100g)',
       'Vitamin B9 or Folate (µg/100g)', 'Vitamin B12 (µg/100g)',
       'Vitamin A (µg/100g)'],
      dtype='object')

In [14]:
# rename columns to hold daily amount of nutrient
df_empty=df_empty.rename(columns=lambda x: x.replace("100g","d"))

In [15]:
df_empty.columns

Index(['Energy (kcal/d)', 'Protein (g/d)', 'Carbohydrate (g/d)', 'Fat (g/d)',
       'Fibres (g/d)', 'LA (g/d)', 'ALA (g/d)', 'EPA (g/d)', 'DHA (g/d)',
       'Calcium (mg/d)', 'Chloride (mg/d)', 'Copper (mg/d)', 'Iron (mg/d)',
       'Iodine (µg/d)', 'Magnesium (mg/d)', 'Manganese (mg/d)',
       'Phosphorus (mg/d)', 'Potassium (mg/d)', 'Selenium (µg/d)',
       'Sodium (mg/d)', 'Zinc (mg/d)', 'Vitamin D (µg/d)', 'Vitamin E (mg/d)',
       'Vitamin K1 (µg/d)', 'Vitamin C (mg/d)', 'Vitamin B1 or Thiamin (mg/d)',
       'Vitamin B2 or Riboflavin (mg/d)', 'Vitamin B3 or Niacin (mg/d)',
       'Vitamin B5 or Pantothenic acid (mg/d)', 'Vitamin B6 (mg/d)',
       'Vitamin B9 or Folate (µg/d)', 'Vitamin B12 (µg/d)',
       'Vitamin A (µg/d)'],
      dtype='object')

In [16]:
# taken from https://www.kalorienbedarf.de/mann/
# (male, 40 years, 80 kg, 180)
kcal = 2500
MJ = 0.0041868 * kcal

carbs_kcal_per_gr = 4
protein_kcal_per_gr = 4
fat_kcal_per_gr = 9

body_weight_kg = 80

# compute percentage of calories from protein
1.2*body_weight_kg*protein_kcal_per_gr/kcal

0.1536

In [17]:
# taken from https://kwk.blv.admin.ch/naehrstofftabelle-de/nutrientsOverview.php?categoryId=18years&genderId=male
# protein taken from https://examine.com/guides/protein-intake/
# 80kg male

df_row = pd.DataFrame([{'Energy (kcal/d)' : kcal, 
 'Protein (g/d)': 1.2*body_weight_kg, 
 'Carbohydrate (g/d)': 0.45*kcal/carbs_kcal_per_gr, 
 'Fat (g/d)': 0.35*kcal/fat_kcal_per_gr,
 'Fibres (g/d)': 30.0,
 'LA (g/d)': 0.04*kcal/fat_kcal_per_gr , 
 'ALA (g/d)': 0.005*kcal/fat_kcal_per_gr , 
 'EPA (g/d)': 0.125, 
 'DHA (g/d)': 0.125,                     
 'Calcium (mg/d)': 1000.0, 
 'Chloride (mg/d)': 3100.0, 
 'Copper (mg/d)': 1.6, 
 'Iron (mg/d)': 11.0,
 'Iodine (µg/d)': 150.0, 
 'Magnesium (mg/d)': 350.0, 
 'Manganese (mg/d)': 3.0,
 'Phosphorus (mg/d)': 550.0, 
 'Potassium (mg/d)': 3500.0, 
 'Selenium (µg/d)': 70.0,
 'Sodium (mg/d)': 2000.0, 
 'Zinc (mg/d)': 10.0, 
 'Vitamin D (µg/d)': 15.0, 
 'Vitamin E (mg/d)': 13.0,
 'Vitamin K1 (µg/d)': 70.0, 
 'Vitamin C (mg/d)': 110.0, 
 'Vitamin B1 or Thiamin (mg/d)': 0.1*MJ,
 'Vitamin B2 or Riboflavin (mg/d)': 1.6, 
 'Vitamin B3 or Niacin (mg/d)': 1.6*MJ,
 'Vitamin B5 or Pantothenic acid (mg/d)': 5.0, 
 'Vitamin B6 (mg/d)': 1.7,
 'Vitamin B9 or Folate (µg/d)': 330.0, 
 'Vitamin B12 (µg/d)': 4.0,
 'Vitamin A (µg/d)': 750.0}])

df_RDI = pd.concat([df_empty, df_row])

In [18]:
df_RDI.to_csv("rdi.csv", header=True, index=False)