## Contents




[Imports and loading data ](#01)

[Change value type](#02)

[Merge files](#03)

[Filter by protein foods](#04)

[Concatenate the data frames](#05)

[Save data](#06)






### Imports and loading data <a id='01'></a>

In [2]:
import pandas as pd

In [3]:
df1 = pd.read_excel('data/groups.xlsx')

In [4]:
df2 = pd.read_excel('data/detail_phase.xlsx')

In [5]:
df1 = df1.reindex(columns = ['Code/AGB', 'Food Group','Food Subgroup', 'LCI Name','Preparation'])

In [16]:
df2.head()

Unnamed: 0,Code AGB,Agriculture,Transformation,Emballage,Transport,Supermarché et distribution,Consommation,Total
0,11084,4.569259,1.492415,0.290252,0.384404,0.020135,0.009943,6.766407
1,11023,0.066043,0.0,0.47839,0.189268,0.015709,0.0,0.749409
2,11000,0.082554,0.0,0.0,0.173563,0.019636,0.08037,0.356122
3,11093,0.222514,0.0,0.329456,0.162968,0.019636,0.08037,0.814944
4,20995,4.569259,1.492415,0.290252,0.384404,0.020135,0.009943,6.766407


In [6]:
df1.rename(columns = {'Code/AGB' : 'Code'}, inplace=True)

In [7]:
df2.rename(columns={'Code AGB': 'Code', 'Emballage':'Packaging', 'Supermarché et distribution':'Supermarket_distribution', 
                    'Consommation': 'Consumption'}, inplace=True)

### Change value type   <a id='02'></a>

In [8]:
df1['Code']= df1['Code'].astype(int)

In [9]:
df2['Code']= df2['Code'].astype(int)

In [52]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2517 entries, 0 to 2516
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Code         2517 non-null   int32 
 1   Food Group   2517 non-null   object
 2   LCI Name     2517 non-null   object
 3   Preparation  2517 non-null   object
dtypes: int32(1), object(3)
memory usage: 69.0+ KB


In [53]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2517 entries, 0 to 2516
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Code                      2517 non-null   int32  
 1   Agriculture               2517 non-null   float64
 2   Transformation            2517 non-null   float64
 3   Packaging                 2517 non-null   float64
 4   Transport                 2517 non-null   float64
 5   Supermarket_distribution  2517 non-null   float64
 6   Consumption               2517 non-null   float64
 7   Total                     2517 non-null   float64
dtypes: float64(7), int32(1)
memory usage: 147.6 KB


### Merge the files  <a id='03'></a>

In [10]:
df = pd.merge(df1, df2, on = 'Code', how='outer')

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2518 entries, 0 to 2517
Data columns (total 11 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Code                      2518 non-null   int32  
 1   Food Group                2518 non-null   object 
 2   LCI Name                  2518 non-null   object 
 3   Preparation               2518 non-null   object 
 4   Agriculture               2517 non-null   float64
 5   Transformation            2517 non-null   float64
 6   Packaging                 2517 non-null   float64
 7   Transport                 2517 non-null   float64
 8   Supermarket_distribution  2517 non-null   float64
 9   Consumption               2517 non-null   float64
 10  Total                     2517 non-null   float64
dtypes: float64(7), int32(1), object(3)
memory usage: 206.7+ KB


In [17]:
df.head(5)

Unnamed: 0,Code,Food Group,Food Subgroup,LCI Name,Preparation,Agriculture,Transformation,Packaging,Transport,Supermarket_distribution,Consumption,Total
0,11084,culinary aids and miscellaneous ingredients,some,"Seaweed, agar, raw",No preparation,4.569259,1.492415,0.290252,0.384404,0.020135,0.009943,6.766407
1,11023,culinary aids and miscellaneous ingredients,herbs,"Garlic, powder, dried",No preparation,0.066043,0.0,0.47839,0.189268,0.015709,0.0,0.749409
2,11000,culinary aids and miscellaneous ingredients,herbs,"Garlic, fresh",No preparation,0.082554,0.0,0.0,0.173563,0.019636,0.08037,0.356122
3,11093,culinary aids and miscellaneous ingredients,herbs,"Dill, fresh",No preparation,0.222514,0.0,0.329456,0.162968,0.019636,0.08037,0.814944
4,20995,culinary aids and miscellaneous ingredients,some,"Sea lettuce (Enteromorpha sp.), dried or dehyd...",No preparation,4.569259,1.492415,0.290252,0.384404,0.020135,0.009943,6.766407


In [16]:
df.sort_values(by='Total', ascending = False).head(5)

Unnamed: 0,Code,Food Group,Food Subgroup,LCI Name,Preparation,Agriculture,Transformation,Packaging,Transport,Supermarket_distribution,Consumption,Total
1881,21501,"meat, eggs, fish",cooked meats,"Lamb cutlet, grilled",Pan,51.263481,0.085432,0.407383,0.321761,0.058419,0.191541,52.328017
1877,21509,"meat, eggs, fish",cooked meats,"Lamb, chop fillet, grilled/pan-fried",Pan,51.233349,0.085383,0.435297,0.32162,0.058385,0.19365,52.327682
1891,21520,"meat, eggs, fish",cooked meats,"Lamb, saddle, grilled/pan-fried",Pan,51.233349,0.085383,0.435297,0.32162,0.058385,0.19365,52.327682
1879,21512,"meat, eggs, fish",cooked meats,"Lamb, rib chop, grilled/pan-fried",Pan,51.233349,0.085383,0.435297,0.32162,0.058385,0.19365,52.327682
2013,21801,"meat, eggs, fish",cooked meats,"Young goat, cooked",Pan,51.233349,0.085383,0.435297,0.32162,0.058385,0.19365,52.327682


#### Clean merged file 

In [18]:
#Lowercase the columns names
df.columns = df.columns.str.lower()

In [19]:
#lowercase the entire dataframe 

df = df.applymap(lambda x: x.lower() if isinstance(x, str) else x)

In [20]:
#Replace the translation mistakes

df['preparation'] = df['preparation'].replace('four','oven')

### Filter by protein foods <a id='04'></a>

In [21]:
#Filter the raw products
meat_eggs_fish = df[(df['preparation'] =='no preparation') & (df['food group'] == 'meat, eggs, fish')]

#### Create a function to filter plant-based protein

In [22]:
#Filter food for more than one argument
def filter_veg_protein(*arg):
    
    food_name= arg[0]
    
    filter_condition = '|'.join(arg)
    
    grouped_food = df[df['lci name'].str.contains(filter_condition, case=False)]
    grouped_food['food group']=  food_name
    grouped_food.rename(columns = {'food group': 'product'}, inplace=True)
    grouped_food.sort_values(by='food subgroup', inplace=True)
    return grouped_food
    

#### Create a function to filter the animal protein

In [23]:
def filter_ani_protein(*args):   # *args syntax allow the function to accept a variable number of positional arguments
    
    # Create a filter condition using the OR (|) operator to match any of the food words
    filter_condition = '|'.join(args) 
    
    #Name of the group
    food_name = args[0]
    
    
    # Filter the DataFrame based on the multiple words in 'lci name' column
    grouped_food = meat_eggs_fish[meat_eggs_fish['lci name'].str.contains(filter_condition, case=False)]

    # Replace the 'food group' values with the input 'food' string
    grouped_food['food group'] = food_name
    grouped_food.rename(columns = {'food group' : 'product'}, inplace=True)
    grouped_food.sort_values(by='food subgroup', inplace = True)

    return grouped_food



In [24]:
#Beef
beef = filter_ani_protein('beef')
beef

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group'] = food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group' : 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace = True)


Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
2397,30352,beef,deli meats,"salami, pork and beef",no preparation,5.151833,0.642781,0.098457,0.204987,0.037393,0.006688,6.142138
2194,30152,beef,deli meats,"merguez sausage, pure beef, raw",no preparation,29.803955,0.117971,0.277614,0.209765,0.037393,0.006688,30.453386
2193,30153,beef,deli meats,"merguez sausage, pork and beef, raw",no preparation,23.726495,0.515156,0.481567,0.21334,0.037393,0.006688,24.980639
2191,30154,beef,deli meats,"merguez sausage, beef, mutton and pork, raw",no preparation,31.371255,0.322086,0.378726,0.211707,0.037393,0.006688,32.327855
2189,30156,beef,deli meats,"merguez sausage, beef and mutton, raw",no preparation,38.887532,0.132261,0.277614,0.210101,0.037393,0.006688,39.551589
2008,30052,beef,deli meats,"sausage meat, pork and beef, raw",no preparation,4.36449,0.251793,0.23089,0.201298,0.036974,0.006688,5.092132
2105,6260,beef,other meat products,"burger, beef based, 15% fat, raw",no preparation,15.896356,0.276799,0.269242,0.208398,0.036974,0.006688,16.694457
1912,6231,beef,raw meats,"beef, stewing meat, raw",no preparation,27.221359,0.115697,0.275642,0.203483,0.036974,0.006688,27.859843
2379,40402,beef,raw meats,"kidney, beef, raw",no preparation,21.517062,0.052216,0.275642,0.200491,0.036974,0.006688,22.089073
2151,40202,beef,raw meats,"tongue, beef, raw",no preparation,21.517062,0.052216,0.275642,0.200491,0.036974,0.006688,22.089073


In [25]:
#Pork
pork = filter_ani_protein('pork')
pork

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group'] = food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group' : 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace = True)


Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
2433,30302,pork,deli meats,"dry sausage, pure pork, superior quality",no preparation,5.098168,0.636085,0.098237,0.20333,0.036974,0.006688,6.079482
2286,28502,pork,deli meats,"pork belly, smoked, raw",no preparation,5.587654,0.276203,0.169777,0.204687,0.036974,0.006688,6.281983
2262,8300,pork,deli meats,"pork liver pã¢tã©, superior quality",no preparation,4.289173,0.536165,0.334557,0.12911,0.017321,0.006688,5.313014
2261,8305,pork,deli meats,pork liver pã¢tã©,no preparation,4.289173,0.536165,0.334557,0.12911,0.017321,0.006688,5.313014
2219,8312,pork,deli meats,"pork liver mousse, superior quality",no preparation,4.289173,0.536165,0.428418,0.208475,0.037393,0.006688,5.506312
2218,8313,pork,deli meats,pork liver mousse,no preparation,4.289173,0.536165,0.428418,0.208475,0.037393,0.006688,5.506312
2193,30153,pork,deli meats,"merguez sausage, pork and beef, raw",no preparation,23.726495,0.515156,0.481567,0.21334,0.037393,0.006688,24.980639
2191,30154,pork,deli meats,"merguez sausage, beef, mutton and pork, raw",no preparation,31.371255,0.322086,0.378726,0.211707,0.037393,0.006688,32.327855
2372,8000,pork,deli meats,"rillettes, pork",no preparation,4.289173,0.536165,0.334557,0.12911,0.017321,0.006688,5.313014
2371,8001,pork,deli meats,"rillettes, pure pork",no preparation,4.289173,0.536165,0.334557,0.12911,0.017321,0.006688,5.313014


In [26]:
#Chicken
chicken = filter_ani_protein('chicken')
chicken

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group'] = food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group' : 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace = True)


Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
2140,28963,chicken,deli meats,"chicken cooked ham, in slices",no preparation,5.17823,1.079843,0.275642,0.243997,0.036974,0.006688,6.821373
2029,40054,chicken,raw meats,"heart, chicken, raw",no preparation,1.104531,0.219745,0.275642,0.200893,0.036974,0.006688,1.844473
2342,36029,chicken,raw meats,"chicken, breast, meat and skin, raw",no preparation,5.17823,1.079843,0.275642,0.243997,0.036974,0.006688,6.821373
2341,36022,chicken,raw meats,"chicken, drumstick, raw",no preparation,5.17823,1.030204,0.344553,0.291298,0.046218,0.08873,6.979233
2339,36019,chicken,raw meats,"chicken high leg, meat, raw",no preparation,5.17823,1.030204,0.344553,0.291298,0.046218,0.08873,6.979233
2337,36017,chicken,raw meats,"chicken, breast, without skin, raw",no preparation,5.17823,1.079843,0.275642,0.243997,0.036974,0.006688,6.821373
2334,36024,chicken,raw meats,"chicken, leg, meat, raw",no preparation,5.17823,1.030204,0.344553,0.291298,0.046218,0.08873,6.979233
2331,36002,chicken,raw meats,"chicken, leg, meat and skin, raw",no preparation,5.17823,1.030204,0.344553,0.291298,0.046218,0.08873,6.979233
2327,36023,chicken,raw meats,"chicken, wing, meat and skin, raw",no preparation,5.17823,1.030204,0.344553,0.291298,0.046218,0.08873,6.979233
2326,36008,chicken,raw meats,"chicken, free-range, meat and skin, raw",no preparation,5.17823,1.030204,0.344553,0.291298,0.046218,0.08873,6.979233


In [36]:
#Lamb
lamb = filter_ani_protein('lamb')
lamb

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group'] = food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group' : 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace = True)


Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
1875,21514,lamb,raw meats,"lamb, neck, raw",no preparation,40.546904,0.066555,0.344553,0.253988,0.046218,0.08873,41.346947
1876,21516,lamb,raw meats,"lamb, chop fillet, raw",no preparation,40.546904,0.066555,0.344553,0.253988,0.046218,0.08873,41.346947
1878,21517,lamb,raw meats,"lamb, rib chop, raw",no preparation,40.546904,0.066555,0.344553,0.253988,0.046218,0.08873,41.346947
1880,21500,lamb,raw meats,"lamb, cutlet, raw",no preparation,40.546904,0.066555,0.322472,0.253951,0.046218,0.087058,41.323157
1882,21504,lamb,raw meats,"lamb, shoulder, raw",no preparation,32.437523,0.053244,0.275642,0.20319,0.036974,0.006688,33.013261
1883,21505,lamb,raw meats,"lamb, shoulder, lean, raw",no preparation,32.437523,0.053244,0.275642,0.20319,0.036974,0.006688,33.013261
1887,21502,lamb,raw meats,"lamb, leg, raw",no preparation,40.546904,0.066555,0.344553,0.253988,0.046218,0.08873,41.346947
1890,21515,lamb,raw meats,"lamb, saddle, raw",no preparation,40.546904,0.066555,0.344553,0.253988,0.046218,0.08873,41.346947
2001,40002,lamb,raw meats,"brain, lamb, raw",no preparation,21.06315,0.034574,0.275642,0.198287,0.036974,0.006688,21.615315
2022,40058,lamb,raw meats,"heart, lamb, raw",no preparation,21.06315,0.034574,0.275642,0.198287,0.036974,0.006688,21.615315


In [27]:
#Shrimp
shrimp = filter_ani_protein('shrimp')
shrimp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group'] = food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group' : 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace = True)


Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
2047,10007,shrimp,cooked shellfish,"shrimp or prawn, cooked",no preparation,5.416518,0.0,0.612538,1.122583,0.082165,0.407783,7.641588
2045,10059,shrimp,raw shellfish,"deep water pink shrimp, raw",no preparation,7.203969,1.169571,0.612538,1.354284,0.082165,0.407783,10.830311
2046,10021,shrimp,raw shellfish,"shrimp or prawn, raw",no preparation,5.416518,0.0,0.612538,1.122583,0.082165,0.407783,7.641588
2048,10038,shrimp,raw shellfish,"shrimp, frozen, raw",no preparation,7.203969,1.169571,0.598185,1.354284,0.147055,0.449154,10.922219


In [28]:
#Fish
fish = df[df['food subgroup'] == 'raw fish']
fish.rename(columns = {'food group': 'product'}, inplace =True)
fish['product'] = 'fish'
fish

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fish.rename(columns = {'food group': 'product'}, inplace =True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fish['product'] = 'fish'


Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
1894,26079,fish,raw fish,"common anchovy, raw",no preparation,0.923344,0.0,0.275642,0.905557,0.036974,0.006688,2.148205
1905,26206,fish,raw fish,"mediterranean bass, raw, farmed",no preparation,11.359006,0.0,0.275642,1.002264,0.036974,0.006688,12.680574
1906,26205,fish,raw fish,"mediterranean bass, raw, wild",no preparation,7.722585,0.0,0.275642,1.002264,0.036974,0.006688,9.044153
1907,26072,fish,raw fish,"european bass, raw",no preparation,11.359006,0.0,0.275642,1.002264,0.036974,0.006688,12.680574
1909,26075,fish,raw fish,"atlantic bass, raw",no preparation,11.359006,0.0,0.275642,1.002264,0.036974,0.006688,12.680574
...,...,...,...,...,...,...,...,...,...,...,...,...
2482,27021,fish,raw fish,"salmon trout, raw",no preparation,5.415570,0.0,0.275642,0.905557,0.036974,0.006688,6.640431
2485,26201,fish,raw fish,"turbot, raw, farmed",no preparation,11.359006,0.0,0.275642,1.002264,0.036974,0.006688,12.680574
2486,26042,fish,raw fish,"turbot, raw, wild",no preparation,7.722585,0.0,0.275642,1.002264,0.036974,0.006688,9.044153
2487,26174,fish,raw fish,"turbot, raw",no preparation,11.359006,0.0,0.275642,1.002264,0.036974,0.006688,12.680574


In [29]:
#Tofu
tofu = filter_veg_protein('tofu')
tofu

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group']=  food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group': 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace=True)


Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
2423,20337,tofu,deli substitutes,plant-based sausage with tofu (vegan),pan,0.365504,0.377587,0.233504,0.222821,0.040314,0.088612,1.328341
2473,20912,tofu,meat substitutes,"tofu, smoked",pan,0.01585,0.769007,0.103216,0.167634,0.031912,0.086851,1.17447
705,20904,tofu,mixed dishes,"tofu, plain",microwave,0.083404,0.14573,0.185746,0.195438,0.037393,0.017733,0.665444


In [30]:
#Beans
beans = filter_veg_protein('kidney bean','soybean','chickpeas', 'peas, raw') 
beans.rename(columns = {'food group': 'product'}, inplace=True)
beans['product'] = 'beans'
beans

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group']=  food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group': 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
1360,1027,beans,cheeses,"plant-based spread-cheese type, with soybean, ...",no preparation,0.379477,1.700989,0.180553,0.418325,0.035574,0.006688,2.721605
1361,10291,beans,cheeses,"plant-based cheese, without soybean, prepacked...",no preparation,0.5217,2.099382,0.262661,0.434763,0.035574,0.006688,3.360768
1362,10292,beans,cheeses,"plant-based cheese, without soybean, prepacked...",no preparation,0.5217,2.112212,0.262661,0.434763,0.035574,0.006688,3.373598
907,20524,beans,legumes,"red kidney bean, canned, drained",microwave,0.485741,0.197217,0.281,0.199386,0.025346,0.011046,1.199736
908,20503,beans,legumes,"red kidney bean, cooked",microwave,0.210298,0.0,0.113903,0.056411,0.027863,0.011046,0.419522
909,20525,beans,legumes,"red kidney bean, dried",no preparation,0.490004,0.0,0.265399,0.131441,0.064923,0.0,0.951768
1113,20901,beans,nuts and oilseeds,"soybean, whole grain",no preparation,0.923293,0.007537,0.265399,0.249871,0.022707,0.0,1.468806
1016,20072,beans,vegetables,"garden peas, raw",no preparation,0.426621,0.0,0.0,0.219369,0.02019,0.0,0.666181
435,25589,beans,vegetarian dishes,"plant-based ball with wheat and/or soybean, pr...",pan,0.317053,0.921851,0.233503,0.229732,0.040314,0.088612,1.831065
498,25234,beans,vegetarian dishes,cereal patty (without soybean),pan,0.743013,0.39301,0.130234,0.151722,0.021356,0.080163,1.519498


In [31]:
#Nuts
nuts = filter_veg_protein('grains/nuts', 'cashew')
nuts.rename(columns = {'food group': 'product'}, inplace=True)
nuts['product'] = 'nuts'
nuts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group']=  food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group': 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
964,15048,nuts,nuts and oilseeds,mix of unsalted grains/nuts and dried fruit,no preparation,1.257756,0.085367,0.281544,0.258313,0.026315,0.0,1.909295
965,15049,nuts,nuts and oilseeds,mix of unsalted grains/nuts and raisins,no preparation,1.257756,0.085367,0.281544,0.258313,0.026315,0.0,1.909295
966,15018,nuts,nuts and oilseeds,mix of salted grains/nuts and raisins,no preparation,1.257756,0.085367,0.281544,0.258313,0.026315,0.0,1.909295
982,15019,nuts,nuts and oilseeds,"cashew nut, grilled, salted",no preparation,2.597034,0.194501,0.349166,0.452655,0.022707,0.0,3.616063


In [33]:
#Grains
grains = filter_veg_protein('amaranth, raw','quinoa, raw', 'wild rice, raw', 'oat, raw')
grains.rename(columns = {'food group': 'product'}, inplace=True)
grains['product'] = 'grains'
grains = grains.drop(2012, axis=0)
grains

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food['food group']=  food_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.rename(columns = {'food group': 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_food.sort_values(by='food subgroup', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
1444,9345,grains,"pasta, rice and cereals","amaranth, raw",no preparation,0.745474,0.0,0.265399,0.131441,0.016099,0.0,1.158414
1446,9310,grains,"pasta, rice and cereals","oat, raw",no preparation,0.740506,0.0,0.265399,0.131441,0.016099,0.0,1.153446
1779,93401,grains,"pasta, rice and cereals","quinoa, raw",no preparation,5.434591,0.0,0.265399,0.354844,0.016099,0.0,6.070934
1788,9108,grains,"pasta, rice and cereals","wild rice, raw",no preparation,2.158403,0.0,0.265399,0.315158,0.01882,0.0,2.75778


In [34]:
#Cheese
cheese = df[df['food subgroup']=='cheeses']
cheese.rename(columns = {'food group': 'product'}, inplace=True)
cheese['product'] = 'cheese'
cheese

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cheese.rename(columns = {'food group': 'product'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cheese['product'] = 'cheese'


Unnamed: 0,code,product,food subgroup,lci name,preparation,agriculture,transformation,packaging,transport,supermarket_distribution,consumption,total
1165,12112,cheese,cheeses,"abondance cheese, from cow's milk",no preparation,5.118224,0.405349,0.262661,0.211504,0.035574,0.006688,6.039999
1166,12761,cheese,cheeses,"asiago cheese, from cow's milk",no preparation,5.025375,0.414813,0.262661,0.211504,0.035574,0.006688,5.956614
1167,12105,cheese,cheeses,"beaufort cheese, from cow's milk",no preparation,5.249288,0.392732,0.262661,0.211504,0.035574,0.006688,6.158445
1168,12526,cheese,cheeses,"gex blue cheese, or jura blue cheese or septmo...",no preparation,4.800835,0.243360,0.262661,0.211504,0.035574,0.006688,5.560622
1172,12021,cheese,cheeses,"brie de meaux cheese, from cow's milk",no preparation,4.321164,0.270201,0.262661,0.211504,0.035574,0.006688,5.107792
...,...,...,...,...,...,...,...,...,...,...,...,...
1364,12763,cheese,cheeses,tomme cheese (pdo) from the french bauges munn...,no preparation,5.008959,0.231282,0.262661,0.211504,0.035574,0.006688,5.756667
1365,12759,cheese,cheeses,"tomme cheese, from mountain or savoy",no preparation,4.928558,0.235755,0.262661,0.211504,0.035574,0.006688,5.680739
1366,12758,cheese,cheeses,"tomme cheese, from cow's milk",no preparation,4.975453,0.233518,0.262661,0.211504,0.035574,0.006688,5.725397
1367,12760,cheese,cheeses,"tomme cheese, reduced fat, around 13% fat",no preparation,4.928558,0.235755,0.262661,0.211504,0.035574,0.006688,5.680739


### Concat the data frames  <a id='05'></a>

In [37]:
protein_products = pd.concat([beef, lamb,  pork, chicken, fish, shrimp,tofu, beans, nuts, grains, cheese ], axis= 0, ignore_index=True)

### Save data file  <a id='06'></a>

In [38]:
protein_products.to_csv('./data/protein_foods.csv', index=False)