<a href="https://colab.research.google.com/github/diegobarbo/pandasDataFrames/blob/main/3_Working_With_DataFrames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.__version__

'1.3.5'

# 1. What Is A DataFrame?

In [3]:
# some python lists
names = ['Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda']
ages = [29, 21, 45, 23, 39, 46]
married = [False, True, True, True, False, True]

In [4]:
# pandas series
ser = pd.Series(names, name='name')

In [5]:
# pandas dataframe
df = pd.DataFrame({'name': names, 'age': ages, 'married': married})


In [6]:
df

Unnamed: 0,name,age,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [7]:
ser

0       Olga
1     Andrew
2      Brian
3    Telulah
4     Nicole
5      Tilda
Name: name, dtype: object

In [8]:
ser.iloc[2]

'Brian'

In [9]:
df.iloc[2,0]

'Brian'

In [10]:
df.iloc[2]

name       Brian
age           45
married     True
Name: 2, dtype: object

In [11]:
# .ndim

In [12]:
ser.ndim

1

In [13]:
df.ndim

2

In [14]:
# .shape

In [15]:
ser.shape

(6,)

In [16]:
df.shape

(6, 3)

In [17]:
# word of the lecture: collate - to collect in a given (or proper) order

In [18]:
ser

0       Olga
1     Andrew
2      Brian
3    Telulah
4     Nicole
5      Tilda
Name: name, dtype: object

In [19]:
df.name

0       Olga
1     Andrew
2      Brian
3    Telulah
4     Nicole
5      Tilda
Name: name, dtype: object

In [20]:
df.dtypes

name       object
age         int64
married      bool
dtype: object

# 2. Creating A DataFrame

In [21]:
df

Unnamed: 0,name,age,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [22]:
names

['Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda']

In [23]:
ages

[29, 21, 45, 23, 39, 46]

In [24]:
married

[False, True, True, True, False, True]

In [25]:
pd.DataFrame({'name': names, 'age': ages, 'married': married})

Unnamed: 0,name,age,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


# 3. BONUS- Four More Ways To Build DataFrames

In [26]:
# 1 - dict of tuples
tuple_names = tuple(names)
tuple_ages = tuple(ages)
tuple_married = tuple(married)

In [27]:
tuple_names

('Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda')

In [28]:
names

['Olga', 'Andrew', 'Brian', 'Telulah', 'Nicole', 'Tilda']

In [29]:
pd.DataFrame({
    'name': tuple_names,
    'ages': tuple_ages,
    'married': tuple_married
})

Unnamed: 0,name,ages,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [30]:
# 2 - dict of series

In [31]:
series_names = pd.Series(names)

In [32]:
series_ages = pd.Series(ages)

In [33]:
series_married = pd.Series(married)

In [34]:
pd.DataFrame({'name': series_names,
              'ages': series_ages,
              'married': series_married})

Unnamed: 0,name,ages,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [35]:
pd.DataFrame({'names': ['Olga', 'Andrew']})

Unnamed: 0,names
0,Olga
1,Andrew


In [36]:
pd.DataFrame({'names': {0:'Olga', 1:'Andrew'}})

Unnamed: 0,names
0,Olga
1,Andrew


In [37]:
# the enumerate function

In [38]:
enumerate(names)

<enumerate at 0x7ff20118fd20>

In [39]:
list(enumerate(names))

[(0, 'Olga'),
 (1, 'Andrew'),
 (2, 'Brian'),
 (3, 'Telulah'),
 (4, 'Nicole'),
 (5, 'Tilda')]

In [40]:
dict_names = {k:v for k,v in enumerate(names)}

In [41]:
dict_names

{0: 'Olga', 1: 'Andrew', 2: 'Brian', 3: 'Telulah', 4: 'Nicole', 5: 'Tilda'}

In [42]:
dict_ages = {k:v for k,v in enumerate(ages)}
dict_married = {k:v for k,v in enumerate(married)}

In [43]:
def convert_list_to_dict(l):
  return {k:v for k,v in enumerate(l)}

In [44]:
dict_names = convert_list_to_dict(names)

In [45]:
dict_names

{0: 'Olga', 1: 'Andrew', 2: 'Brian', 3: 'Telulah', 4: 'Nicole', 5: 'Tilda'}

In [46]:
dict_ages = convert_list_to_dict(ages)
dict_married = convert_list_to_dict(married)

In [47]:
dict_ages

{0: 29, 1: 21, 2: 45, 3: 23, 4: 39, 5: 46}

In [48]:
dict_married

{0: False, 1: True, 2: True, 3: True, 4: False, 5: True}

In [49]:
pd.DataFrame({'name': dict_names,
              'ages': dict_ages,
              'married': dict_married})

Unnamed: 0,name,ages,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


In [50]:
# 4 - list of dicts

In [51]:
pd.DataFrame([{
    'name': 'Olga',
    'age': 29,
    'married': False
}])

Unnamed: 0,name,age,married
0,Olga,29,False


In [52]:
# the zip method

In [53]:
list(zip(names, ages, married))

[('Olga', 29, False),
 ('Andrew', 21, True),
 ('Brian', 45, True),
 ('Telulah', 23, True),
 ('Nicole', 39, False),
 ('Tilda', 46, True)]

In [54]:
rowwise = [{'name': name, 'age': ages, 'married': married} for name, ages, married in zip(names, ages, married)]

In [55]:
pd.DataFrame(rowwise)

Unnamed: 0,name,age,married
0,Olga,29,False
1,Andrew,21,True
2,Brian,45,True
3,Telulah,23,True
4,Nicole,39,False
5,Tilda,46,True


# 4. The info() Method

In [56]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [57]:
df.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Columns: 3 entries, name to married
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [58]:
df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [59]:
df.info(max_cols=2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Columns: 3 entries, name to married
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [60]:
df.info(memory_usage=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)

In [61]:
df.info(memory_usage=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [62]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 557.0 bytes


# 5. Reading In Nutrition Data

In [63]:
dataurl = 'https://andybek.com/pandas-nutrition'

In [64]:
nutrition = pd.read_csv(dataurl)

In [65]:
nutrition.head(10)

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
5,5,"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
6,6,"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
7,7,"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
8,8,"Cheese, camembert",100 g,300,24g,15g,72mg,842.00 mg,15.4 mg,62.00 mcg,...,24.26 g,15.259 g,7.023 g,0.724 g,72.00 mg,0.0 g,3.68 g,0.00 mg,0.00 mg,51.80 g
9,9,Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [66]:
nutrition.info(verbose=False, memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Columns: 77 entries, Unnamed: 0 to water
dtypes: int64(3), object(74)
memory usage: 39.2 MB


# 6. Some Cleanup: Removing The Duplicated Index

In [67]:
nutrition.head()

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [68]:
nutrition['Unnamed: 0']

0          0
1          1
2          2
3          3
4          4
        ... 
8784    8784
8785    8785
8786    8786
8787    8787
8788    8788
Name: Unnamed: 0, Length: 8789, dtype: int64

In [69]:
nutrition.drop('Unnamed: 0', axis=1)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [70]:
nutrition.set_index('Unnamed: 0')

Unnamed: 0_level_0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [71]:
nutrition.head()

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [72]:
nutrition = pd.read_csv(dataurl, index_col=[0])

In [73]:
nutrition.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


# 7. The sample() Method

In [74]:
nutrition.sample()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
2851,"Egg custards, prepared with 2% milk, dry mix",100 g,112,2.8g,1.5g,49mg,87.00 mg,13.5 mg,8.00 mcg,0.00 mcg,...,2.83 g,1.475 g,0.896 g,0.201 g,49.00 mg,0.0 g,1.00 g,0.00 mg,0.00 mg,74.44 g


In [75]:
nutrition.sample(random_state=12)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
3713,"Thuringer, pork, beef, summer sausage, cervelat",100 g,362,30g,12g,74mg,1300.00 mg,78.9 mg,2.00 mcg,0.00 mcg,...,30.43 g,11.510 g,12.970 g,1.200 g,74.00 mg,0.0 g,3.63 g,0.00 mg,0.00 mg,45.18 g


In [76]:
nutrition.sample(n=3)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
1286,"Guavas, raw, strawberry",100 g,69,0.6g,0.2g,0,37.00 mg,0,0,0,...,0.60 g,0.172 g,0.055 g,0.253 g,0.00 mg,0,0.80 g,0,0,80.66 g
38,"Currants, dried, zante",100 g,283,0.3g,,0,8.00 mg,10.6 mg,10.00 mcg,0.00 mcg,...,0.27 g,0.028 g,0.047 g,0.180 g,0.00 mg,0.0 g,2.36 g,0.00 mg,0.00 mg,19.21 g
2417,"Apples, unheated, unsweetened, frozen",100 g,48,0.3g,0.1g,0,3.00 mg,0,1.00 mcg,0.00 mcg,...,0.32 g,0.053 g,0.013 g,0.095 g,0.00 mg,0,0.24 g,0,0,86.85 g


In [77]:
nutrition.sample(frac=0.01)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
5838,"Beverages, Ready-to-Drink, Nutritional Shake, ...",100 g,105,2.5g,0.4g,2mg,84.00 mg,32.4 mg,42.00 mcg,42.00 mcg,...,2.53 g,0.422 g,0.844 g,1.266 g,2.00 mg,0.0 g,0.60 g,0.00 mg,0.00 mg,76.20 g
8700,"Beef, raw, choice, trimmed to 1/8"" fat, separa...",100 g,260,21g,9.3g,70mg,57.00 mg,44.1 mg,3.00 mcg,0.00 mcg,...,20.96 g,9.265 g,9.929 g,0.921 g,70.00 mg,0.0 g,0.84 g,0.00 mg,0.00 mg,60.35 g
531,"Apricots, raw",100 g,48,0.4g,,0,1.00 mg,2.8 mg,9.00 mcg,0.00 mcg,...,0.39 g,0.027 g,0.170 g,0.077 g,0.00 mg,0.0 g,0.75 g,0.00 mg,0.00 mg,86.35 g
5655,"Beans, solids and liquids, no salt added, cann...",100 g,15,0.1g,,0,14.00 mg,8.3 mg,18.00 mcg,0.00 mcg,...,0.10 g,0.023 g,0.004 g,0.052 g,0.00 mg,0.0 g,0.92 g,0.00 mg,0.00 mg,94.68 g
2219,"Candies, milk chocolate coated raisins",100 g,390,15g,10g,3mg,36.00 mg,26.3 mg,8.00 mcg,0.00 mcg,...,14.80 g,10.272 g,3.134 g,0.571 g,3.00 mg,0.0 g,1.50 g,10.00 mg,122.00 mg,11.20 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1313,"Hearts of palm, canned",100 g,28,0.6g,0.1g,0,426.00 mg,0,39.00 mcg,0.00 mcg,...,0.62 g,0.130 g,0.103 g,0.202 g,0.00 mg,0,2.04 g,0,0,90.20 g
6206,"Fast foods, prepared with garlic and parmesan ...",100 g,343,13g,3g,7mg,539.00 mg,7.5 mg,147.00 mcg,123.00 mcg,...,12.88 g,2.966 g,2.778 g,6.160 g,7.00 mg,0.0 g,2.05 g,0.00 mg,0.00 mg,28.38 g
7869,"Beef, broiled, cooked, select, trimmed to 1/8""...",100 g,194,7.8g,3g,83mg,62.00 mg,110.7 mg,10.00 mcg,0.00 mcg,...,7.76 g,2.953 g,3.096 g,0.277 g,83.00 mg,0.0 g,1.20 g,0.00 mg,0.00 mg,62.67 g
4718,"Alcoholic beverage, BUDWEISER SELECT, light, beer",100 g,28,0g,,0,0,0,0,0,...,0.00 g,0,0,0,0.00 mg,3.4 g,0.08 g,0,0,95.30 g


In [78]:
nutrition.shape[0]

8789

In [79]:
nutrition.shape[0] * 0.01

87.89

# 8. BONUS - Sampling With Replacement Or Weights

In [80]:
# with or without replacement

In [81]:
nutrition.sample(n=3, replace=True)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
1720,"Coriander (cilantro) leaves, raw",100 g,23,0.5g,,0,46.00 mg,12.8 mg,62.00 mcg,0.00 mcg,...,0.52 g,0.014 g,0.275 g,0.040 g,0.00 mg,0.0 g,1.47 g,0.00 mg,0.00 mg,92.21 g
7467,"Cereals, dry, golden brown maple, Instant Oatm...",100 g,356,5.1g,1.1g,0,220.00 mg,0,0,0,...,5.09 g,1.100 g,1.890 g,2.090 g,0.00 mg,0,1.29 g,0,0,8.60 g
8173,"Beef, grilled, cooked, select, trimmed to 0"" f...",100 g,166,5.2g,1.7g,75mg,60.00 mg,91.6 mg,8.00 mcg,0.00 mcg,...,5.17 g,1.716 g,2.006 g,0.286 g,75.00 mg,0.0 g,1.20 g,0.00 mg,0.00 mg,65.76 g


In [82]:
# weighted sampling

In [83]:
weights = pd.Series(data=[10, 10, 10, 1, 2], index=[7, 17, 29, 5, 6])

In [84]:
weights

7     10
17    10
29    10
5      1
6      2
dtype: int64

In [85]:
nutrition.sample(n=3, weights=weights)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
7,"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
29,"Nuts, dried, pine nuts",100 g,673,68g,4.9g,0,2.00 mg,55.8 mg,34.00 mcg,0.00 mcg,...,68.37 g,4.899 g,18.764 g,34.071 g,0.00 mg,0.0 g,2.59 g,0.00 mg,0.00 mg,2.28 g
17,"Peppers, raw, jalapeno",100 g,29,0.4g,0.1g,0,3.00 mg,7.5 mg,27.00 mcg,0.00 mcg,...,0.37 g,0.092 g,0.029 g,0.112 g,0.00 mg,0.0 g,0.53 g,0.00 mg,0.00 mg,91.69 g


# 9. BONUS - How Are Random Numbers Generated?

In [86]:
nutrition.sample()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
1495,"Cereals ready-to-eat, KASHI GOLEAN",100 g,311,2.2g,0.4g,0,177.00 mg,35.2 mg,107.00 mcg,0.00 mcg,...,2.20 g,0.400 g,0.300 g,0.690 g,0.00 mg,0.0 g,2.70 g,0.00 mg,0.00 mg,3.00 g


In [87]:
nutrition.sample(random_state=19)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
4176,"Babyfood, apple and sweet potato, fruit and ve...",100 g,64,0.2g,,0,3.00 mg,4.4 mg,2.00 mcg,0.00 mcg,...,0.22 g,0.041 g,0.008 g,0.079 g,0.00 mg,0.0 g,0.30 g,0.00 mg,0.00 mg,84.00 g


# 10. DataFrame Axes

In [88]:
nutrition.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [89]:
# .axes - this returns a python list of two pandas index objects. One caracterizing the rows and other the columns.
nutrition.axes

[Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
             ...
             8779, 8780, 8781, 8782, 8783, 8784, 8785, 8786, 8787, 8788],
            dtype='int64', length=8789),
 Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
        'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
        'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
        'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
        'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
        'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
        'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
        'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
        'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
        'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
        'methionine', 'phenylalanine', 'proline', 'ser

In [90]:
nutrition.axes[0]

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            8779, 8780, 8781, 8782, 8783, 8784, 8785, 8786, 8787, 8788],
           dtype='int64', length=8789)

In [91]:
nutrition.axes[0][3]

3

In [92]:
nutrition.index[3]

3

In [93]:
nutrition.axes[1]

Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
       'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
       'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
       'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
       'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
       'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose',
       'fat', 'saturated_fatty_acids', 'mon

In [94]:
nutrition.axes[1][69]

'polyunsaturated_fatty_acids'

In [95]:
nutrition.columns[69]

'polyunsaturated_fatty_acids'

In [96]:
# the axis param

In [97]:
nutrition.dropna(axis=0)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
5,"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,0.00 mcg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
6,"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [98]:
# 0 = "rows";
# 1 = "columns"

# 11. Changing The Index

In [99]:
# our data frame currently consists of an integer based index

In [100]:
nutrition.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [101]:
# this is an index consisting of 64 bit integers, which in pandas is represented as an Int64Index
nutrition.index

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            8779, 8780, 8781, 8782, 8783, 8784, 8785, 8786, 8787, 8788],
           dtype='int64', length=8789)

In [102]:
type(nutrition.index)

pandas.core.indexes.numeric.Int64Index

In [103]:
pd.RangeIndex(start=0, stop=8789, step=1)

RangeIndex(start=0, stop=8789, step=1)

In [104]:
nutrition.index = pd.RangeIndex(start=0, stop=8789, step=1)

In [105]:
type(nutrition.index)

pandas.core.indexes.range.RangeIndex

In [106]:
nutrition.set_index('name')

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,6.422 mg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,7.680 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,6.550 mg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
"Beef, raw, all grades, trimmed to 0"" fat, separable lean only, boneless, eye of round roast, round",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,6.720 mg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [107]:
nutrition.set_index('name', inplace=True)

In [108]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [109]:
nutrition.set_index('folic_acid')

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,niacin,pantothenic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
folic_acid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.00 mcg,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.000 mg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
0.00 mcg,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,1.167 mg,0.863 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
0.00 mcg,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.649 mg,0.281 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
0,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,3.363 mg,0.942 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
0.00 mcg,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.063 mg,0.224 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.00 mcg,100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,6.422 mg,0.356 mg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
0.00 mcg,100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,7.680 mg,0.580 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
0.00 mcg,100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,6.550 mg,0.520 mg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
0.00 mcg,100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,6.720 mg,0.355 mg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [110]:
nutrition.set_index('folic_acid', drop=False)

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
folic_acid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.00 mcg,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
0.00 mcg,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
0.00 mcg,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
0,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
0.00 mcg,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.00 mcg,100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,6.422 mg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
0.00 mcg,100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,7.680 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
0.00 mcg,100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,6.550 mg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
0.00 mcg,100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,6.720 mg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [111]:
nutrition.set_index('folic_acid', drop=False, append=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,folic_acid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Cornstarch,0.00 mcg,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",0.00 mcg,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",0.00 mcg,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",0,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",0.00 mcg,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",0.00 mcg,100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,6.422 mg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",0.00 mcg,100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,7.680 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",0.00 mcg,100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,6.550 mg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
"Beef, raw, all grades, trimmed to 0"" fat, separable lean only, boneless, eye of round roast, round",0.00 mcg,100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,6.720 mg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [112]:
nutrition.set_index('folic_acid', drop=False, append=True).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,folic_acid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Cornstarch,0.00 mcg,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",0.00 mcg,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",0.00 mcg,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",0,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",0.00 mcg,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [113]:
nutrition.set_index('folic_acid', drop=False, append=True, verify_integrity=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,folic_acid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Cornstarch,0.00 mcg,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",0.00 mcg,100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",0.00 mcg,100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",0,100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",0.00 mcg,100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [114]:
nutrition.calories.value_counts()

884    78
47     45
56     43
0      39
50     38
       ..
549     1
643     1
556     1
584     1
605     1
Name: calories, Length: 671, dtype: int64

In [115]:
nutrition.set_index('calories', verify_integrity=False)

Unnamed: 0_level_0,serving_size,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
calories,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
381,100 g,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
691,100 g,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,0.863 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
25,100 g,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,0.281 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
367,100 g,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,0.942 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
144,100 g,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,0.224 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,100 g,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,6.422 mg,0.356 mg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
206,100 g,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,7.680 mg,0.580 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
277,100 g,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,6.550 mg,0.520 mg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
121,100 g,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,6.720 mg,0.355 mg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


# 12. Extracting From DataFrames By Label

In [116]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [117]:
nutrition.loc['Eggplant, raw']

serving_size       100 g
calories              25
total_fat           0.2g
saturated_fat        NaN
cholesterol            0
                  ...   
alcohol            0.0 g
ash               0.66 g
caffeine         0.00 mg
theobromine      0.00 mg
water            92.30 g
Name: Eggplant, raw, Length: 75, dtype: object

In [118]:
type(nutrition.loc['Eggplant, raw'])

pandas.core.series.Series

In [119]:
nutrition.loc['Eggplant, raw']['calories']

25

In [120]:
nutrition.loc['Eggplant, raw', 'calories']

25

In [121]:
nutrition.loc['Eggplant, raw':'Sherbet, orange', 'calories':'cholesterol']

Unnamed: 0_level_0,calories,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Eggplant, raw",25,0.2g,,0
"Teff, uncooked",367,2.4g,0.4g,0
"Sherbet, orange",144,2g,1.2g,1mg


In [122]:
nutrition.loc[
              ['Raspberries, raw'],
              ['protein', 'vitamin_b6']
]

Unnamed: 0_level_0,protein,vitamin_b6
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Raspberries, raw",1.20 g,0.055 mg


In [123]:
nutrition.loc[
              ['Raspberries, raw', 'Blackberries, raw'],
              ['protein', 'vitamin_b6', 'water']
]

Unnamed: 0_level_0,protein,vitamin_b6,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Raspberries, raw",1.20 g,0.055 mg,85.75 g
"Blackberries, raw",1.39 g,0.030 mg,88.15 g


# 13. DataFrame Extraction by Position

In [124]:
nutrition.head(10)

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,0.00 mcg,0.507 mg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,5.960 mg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
"Cheese, camembert",100 g,300,24g,15g,72mg,842.00 mg,15.4 mg,62.00 mcg,0.00 mcg,0.630 mg,...,24.26 g,15.259 g,7.023 g,0.724 g,72.00 mg,0.0 g,3.68 g,0.00 mg,0.00 mg,51.80 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [125]:
nutrition.iloc[3]

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
alcohol               0
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
Name: Teff, uncooked, Length: 75, dtype: object

In [126]:
nutrition.iloc[3, :]

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
alcohol               0
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
Name: Teff, uncooked, Length: 75, dtype: object

In [127]:
nutrition.iloc[[4,6,9], :]

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [128]:
nutrition.iloc[[4,6,9]]

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [129]:
nutrition.iloc[[4,6,9], 2]

name
Sherbet, orange         2g
Taro leaves, raw      0.7g
Vegetarian fillets     18g
Name: total_fat, dtype: object

In [130]:
nutrition.iloc[[4,6,9], 2:5]

Unnamed: 0_level_0,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Sherbet, orange",2g,1.2g,1mg
"Taro leaves, raw",0.7g,0.2g,0
Vegetarian fillets,18g,2.8g,0


In [131]:
# boolean masks

In [132]:
nutrition.iloc[
               [True if i%2==0 else False for i in range(8789)],
               [True if i%2==0 else False for i in range(75)]
]

Unnamed: 0_level_0,serving_size,total_fat,cholesterol,choline,folic_acid,pantothenic_acid,thiamin,vitamin_a_rae,carotene_beta,lutein_zeaxanthin,...,carbohydrate,sugars,galactose,lactose,sucrose,saturated_fatty_acids,polyunsaturated_fatty_acids,alcohol,caffeine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,0.1g,0,0.4 mg,0.00 mcg,0.000 mg,0.000 mg,0.00 mcg,0.00 mcg,0.00 mcg,...,91.27 g,0.00 g,0,0,0,0.009 g,0.025 g,0.0 g,0.00 mg,8.32 g
"Eggplant, raw",100 g,0.2g,0,6.9 mg,0.00 mcg,0.281 mg,0.039 mg,1.00 mcg,14.00 mcg,36.00 mcg,...,5.88 g,3.53 g,0,0,0.26 g,0.034 g,0.076 g,0.0 g,0.00 mg,92.30 g
"Sherbet, orange",100 g,2g,1mg,7.7 mg,0.00 mcg,0.224 mg,0.027 mg,12.00 mcg,1.00 mcg,7.00 mcg,...,30.40 g,24.32 g,0,0,0,1.160 g,0.080 g,0.0 g,0.00 mg,66.10 g
"Taro leaves, raw",100 g,0.7g,0,12.8 mg,0.00 mcg,0.084 mg,0.209 mg,241.00 mcg,2895.00 mcg,1932.00 mcg,...,6.70 g,3.01 g,0,0,0,0.151 g,0.307 g,0.0 g,0.00 mg,85.66 g
"Cheese, camembert",100 g,24g,72mg,15.4 mg,0.00 mcg,1.364 mg,0.028 mg,241.00 mcg,12.00 mcg,0.00 mcg,...,0.46 g,0.46 g,0,0,0,15.259 g,0.724 g,0.0 g,0.00 mg,51.80 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, select, trimmed to 1/8"" fat, separable lean only, lip-on, boneless, rib eye steak/roast",100 g,6.4g,70mg,49.4 mg,0.00 mcg,0.530 mg,0.100 mg,2.00 mcg,0.00 mcg,0.00 mcg,...,0.00 g,0.00 g,0,0,0,2.313 g,0.396 g,0.0 g,0.00 mg,70.89 g
"Oil, uses similar to 95 degree hard butter, confection fat, palm kernel (hydrogenated), industrial",100 g,100g,0,0.2 mg,0.00 mcg,0.000 mg,0.000 mg,0.00 mcg,0.00 mcg,0.00 mcg,...,0.00 g,0.00 g,0,0,0,93.701 g,0.000 g,0.0 g,0.00 mg,0.05 g
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",100 g,3.5g,62mg,64.5 mg,0.00 mcg,0.356 mg,0.063 mg,3.00 mcg,0.00 mcg,0.00 mcg,...,0.00 g,0.00 g,0,0,0,1.353 g,0.244 g,0.0 g,0.00 mg,72.51 g
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",100 g,23g,78mg,0,0.00 mcg,0.520 mg,0.130 mg,0.00 mcg,0,0,...,0.00 g,0,0,0,0,11.570 g,0.980 g,0,0,59.80 g


In [133]:
new_nutr = nutrition.iloc[
               [True if i%2==0 else False for i in range(8789)],
               [True if i%2==0 else False for i in range(75)]
]

In [134]:
nutrition.shape # x rows and y cols

(8789, 75)

In [135]:
new_nutr.shape # x/2 rows and y/2 cols

(4395, 38)

In [136]:
nutrition.head(10)

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,0.00 mcg,0.507 mg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,5.960 mg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
"Cheese, camembert",100 g,300,24g,15g,72mg,842.00 mg,15.4 mg,62.00 mcg,0.00 mcg,0.630 mg,...,24.26 g,15.259 g,7.023 g,0.724 g,72.00 mg,0.0 g,3.68 g,0.00 mg,0.00 mg,51.80 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [137]:
nutrition.iloc[9,1]

290

# 14. Single Value Access With .at And .iat

In [138]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [139]:
nutrition.loc['Nuts, pecans', 'calories']

691

In [140]:
nutrition.iloc[1, 1]

691

In [141]:
nutrition.at['Nuts, pecans', 'calories']

691

In [142]:
nutrition.iat[1, 1]

691

In [143]:
%timeit nutrition.loc['Nuts, pecans', 'calories']

The slowest run took 4.44 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 5: 22.6 µs per loop


In [144]:
%timeit nutrition.at['Nuts, pecans', 'calories']

The slowest run took 7.98 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 5: 8.78 µs per loop


In [145]:
# when needing single value access, prefer .at and .iat over .loc and .iloc

# 15. BONUS - The get_loc() Method

In [146]:
# column label: 'vitamin_k'
# index position: 2

In [147]:
# word of the lecture: pari passu -> kinda equal; on equal footing

In [148]:
# approach #1 - get label from position

In [149]:
nutrition.index

Index(['Cornstarch', 'Nuts, pecans', 'Eggplant, raw', 'Teff, uncooked',
       'Sherbet, orange', 'Cauliflower, raw', 'Taro leaves, raw',
       'Lamb, raw, ground', 'Cheese, camembert', 'Vegetarian fillets',
       ...
       'Beef, braised, cooked, all grades, trimmed to 1/8" fat, separable lean and fat, flat half, brisket',
       'Beef, raw, select, trimmed to 1/8" fat, separable lean only, lip-on, boneless, rib eye steak/roast',
       'Beef, raw, choice, trimmed to 1/8" fat, separable lean only, lip-on, boneless, rib eye steak/roast',
       'Oil, uses similar to 95 degree hard butter, confection fat, palm kernel (hydrogenated), industrial',
       'Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round steak, round',
       'Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round',
       'Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand',
       'Lamb, raw

In [150]:
nutrition.index[2]

'Eggplant, raw'

In [151]:
index_label = nutrition.index[2]

In [152]:
column_label = 'vitamin_k'

In [153]:
nutrition.loc[index_label, column_label]

'3.5 mcg'

In [154]:
nutrition.at[index_label, column_label]

'3.5 mcg'

In [155]:
# approach #2 - get int location from label

In [156]:
nutrition.columns.get_loc('vitamin_k')

26

In [157]:
column_loc = 26

In [158]:
index_loc = 2

In [159]:
nutrition.iloc[index_loc, column_loc]

'3.5 mcg'

In [160]:
nutrition.iat[index_loc, column_loc]

'3.5 mcg'

# 16. Skill Challenge

1. Randomly select 10 food items and assign the resulting dataframe to a new variable called nutr_mini.

In [161]:
nutrition.sample(10)

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Nuts, with salt added, without peanuts, oil roasted, mixed nuts",100 g,615,56g,9.1g,0,306.00 mg,51.2 mg,56.00 mcg,0.00 mcg,1.964 mg,...,56.17 g,9.087 g,33.137 g,11.449 g,0.00 mg,0.0 g,2.89 g,0.00 mg,0.00 mg,3.15 g
"Babyfood, strained, without ascorbic acid, prunes with tapioca, fruit",100 g,69,0.1g,,0,5.00 mg,1.1 mg,0.00 mcg,0.00 mcg,0.525 mg,...,0.10 g,0.008 g,0.066 g,0.022 g,0.00 mg,0.0 g,0.50 g,0.00 mg,0.00 mg,80.30 g
"Fish, raw (may have been previously frozen), Pacific, cod",100 g,69,0.4g,0.1g,47mg,303.00 mg,65.0 mg,7.00 mcg,0.00 mcg,1.095 mg,...,0.41 g,0.085 g,0.073 g,0.164 g,47.00 mg,0.0 g,1.46 g,0.00 mg,0.00 mg,83.95 g
"Fish, raw, bluefish",100 g,124,4.2g,0.9g,59mg,60.00 mg,0,2.00 mcg,0.00 mcg,5.950 mg,...,4.24 g,0.915 g,1.793 g,1.060 g,59.00 mg,0.0 g,1.04 g,0,0,70.86 g
"Beverages, FULL THROTTLE, Energy drink",100 g,46,0.1g,,0,35.00 mg,0.3 mg,0.00 mcg,0.00 mcg,1.667 mg,...,0.08 g,0.000 g,0.000 g,0.000 g,0.00 mg,0.0 g,0.28 g,33.00 mg,0.00 mg,87.30 g
"Vitasoy USA Organic Nasoya, Tofu Plus Extra Firm",100 g,92,4.9g,0.6g,0,7.00 mg,0,0,0,0,...,4.90 g,0.600 g,1.300 g,3.000 g,0.00 mg,0,1.30 g,0,0,81.80 g
"Fish, dry heat, cooked, king, mackerel",100 g,134,2.6g,0.5g,68mg,203.00 mg,0,9.00 mcg,0.00 mcg,10.462 mg,...,2.56 g,0.465 g,0.979 g,0.589 g,68.00 mg,0,1.64 g,0,0,69.04 g
"Veal, raw, separable lean only, rib",100 g,120,3.9g,1.2g,83mg,95.00 mg,0,13.00 mcg,0.00 mcg,7.050 mg,...,3.89 g,1.170 g,1.250 g,0.400 g,83.00 mg,0,1.02 g,0,0,75.18 g
"Beverages, low calorie, with high vitamin C with other added vitamins, powder, fruit-flavored drink",100 g,227,0.2g,,0,14.00 mg,0.1 mg,5.00 mcg,0.00 mcg,80.000 mg,...,0.16 g,0.010 g,0.038 g,0.034 g,0.00 mg,0.0 g,7.27 g,0.00 mg,0.00 mg,1.32 g
"Beverages, no caffeine, lemon-lime soda, carbonated",100 g,41,0g,,0,10.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.015 mg,...,0.00 g,0.000 g,0.000 g,0.000 g,0.00 mg,0.0 g,0.00 g,0.00 mg,0.00 mg,89.49 g


In [162]:
nutr_mini = nutrition.sample(10) #default axis=0

In [163]:
nutr_mini.shape

(10, 75)

In [164]:
nutr_mini = nutrition.sample(10, axis=1)

In [165]:
nutr_mini.shape

(8789, 10)

2. From nutr_mini, extract the total_fat and cholesterol columns for all rows.

In [166]:
nutr_mini = nutrition.sample(10) #default axis=0

In [167]:
nutr_mini.loc[:, ['total_fat', 'cholesterol']]

Unnamed: 0_level_0,total_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Nuts, dried, pine nuts",68g,0
"McDONALD'S, Grilled, RANCH SNACK WRAP",10g,0
"Crustaceans, raw, northern, lobster",0.8g,127mg
"Rice mix, unprepared, flavored, white and wild",0.9g,0
"Frankfurter, heated, meat",24g,73mg
"Soup, single brand, condensed, canned, vegetable beef",0.8g,5mg
"Fish, raw, Atlantic, wolffish",2.4g,46mg
"Beef, broiled, cooked, choice, trimmed to 0"" fat, separable lean only, mock tender steak, chuck",5.7g,94mg
"Beans, raw, sprouted, mature seeds, navy",0.7g,0
"Lamb, raw, testes, imported, New Zealand",2.4g,393mg


3. Extract all the columns from vitamin_b12 to the end, for the first, second and third rows.

In [168]:
nutr_mini.columns.get_loc('vitamin_b12')

20

In [169]:
b12_loc = nutr_mini.columns.get_loc('vitamin_b12')

In [170]:
nutr_mini.iloc[0:3, b12_loc:]

Unnamed: 0_level_0,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Nuts, dried, pine nuts",0.00 mcg,0.094 mg,0.8 mg,0.00 IU,9.33 mg,9.33 mg,53.9 mcg,16.00 mg,1.324 mg,5.53 mg,...,68.37 g,4.899 g,18.764 g,34.071 g,0.00 mg,0.0 g,2.59 g,0.00 mg,0.00 mg,2.28 g
"McDONALD'S, Grilled, RANCH SNACK WRAP",0.17 mcg,0.194 mg,0.4 mg,0,0,0,0,93.00 mg,0.062 mg,1.45 mg,...,10.31 g,3.363 g,2.313 g,3.415 g,0,0,2.31 g,0,0,55.17 g
"Crustaceans, raw, northern, lobster",1.25 mcg,0.104 mg,0.0 mg,1.00 IU,0.87 mg,0.87 mg,0.0 mcg,84.00 mg,1.349 mg,0.26 mg,...,0.75 g,0.181 g,0.220 g,0.296 g,127.00 mg,0.0 g,1.88 g,0.00 mg,0.00 mg,80.95 g


4. Get the calories for the third food in nutr_mini using an attribute-based approach that is faster than .loc or .iloc.

In [171]:
nutr_mini.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Nuts, dried, pine nuts",100 g,673,68g,4.9g,0,2.00 mg,55.8 mg,34.00 mcg,0.00 mcg,4.387 mg,...,68.37 g,4.899 g,18.764 g,34.071 g,0.00 mg,0.0 g,2.59 g,0.00 mg,0.00 mg,2.28 g
"McDONALD'S, Grilled, RANCH SNACK WRAP",100 g,222,10g,3.4g,0,577.00 mg,0,45.00 mcg,0,5.875 mg,...,10.31 g,3.363 g,2.313 g,3.415 g,0,0,2.31 g,0,0,55.17 g
"Crustaceans, raw, northern, lobster",100 g,77,0.8g,0.2g,127mg,423.00 mg,70.3 mg,10.00 mcg,0.00 mcg,1.591 mg,...,0.75 g,0.181 g,0.220 g,0.296 g,127.00 mg,0.0 g,1.88 g,0.00 mg,0.00 mg,80.95 g
"Rice mix, unprepared, flavored, white and wild",100 g,355,0.9g,,0,1140.00 mg,39.9 mg,175.00 mcg,6.00 mcg,7.018 mg,...,0.88 g,0.000 g,0.180 g,0.570 g,0.00 mg,0.0 g,4.93 g,0.00 mg,0.00 mg,7.53 g
"Frankfurter, heated, meat",100 g,278,24g,7.2g,73mg,1013.00 mg,0,6.00 mcg,0.00 mcg,2.654 mg,...,24.31 g,7.220 g,10.743 g,3.963 g,73.00 mg,0.0 g,3.19 g,0.00 mg,0.00 mg,57.82 g


In [172]:
nutr_mini.iat[2,1]

77

# 17. More Cleanup: Going Numeric

In [173]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [174]:
nutrition.total_fat.head()

name
Cornstarch         0.1g
Nuts, pecans        72g
Eggplant, raw      0.2g
Teff, uncooked     2.4g
Sherbet, orange      2g
Name: total_fat, dtype: object

In [175]:
nutrition.total_fat.sum()

'0.1g72g0.2g2.4g2g0.3g0.7g23g24g18g0g0.4g0.1g7.2g15g4.5g16g0.4g0.9g9.2g0.5g1.7g5.9g33g15g0g0g3g0.4g68g1.5g0.2g11g0.8g16g0.2g0.5g50g0.3g0.6g3g16g3.1g11g2.4g0g0.3g0.5g99g0.1g5.3g5.3g0.4g5.3g0.2g0.3g6.4g22g1g14g10g17g14g4.3g22g36g0.7g100g27g2.8g5.8g14g0.7g1.5g11g34g0.2g0.9g4.6g0g0.4g19g0.2g10g28g6.7g6.7g0.3g0.3g44g37g14g50g0.9g0.7g0.1g0.7g8g10g19g9.2g0g0.2g8.6g1.3g14g0.4g0.2g8.2g5.2g3.3g2.1g0.1g80g0.5g1.4g0.4g3.3g0.2g0.7g0.1g9.4g0.3g0.6g29g0.2g1.4g0.2g0.4g1.2g1.8g0g0.9g0.2g1.4g0.8g1.5g9.8g0g13g16g19g7.4g0.2g5.2g29g0.3g9.9g22g14g15g4.1g0.5g3.5g15g20g32g1g81g0.2g1.6g0g17g22g7.1g7.4g0.2g8.7g1.4g34g0.3g6.3g30g8.1g0.2g0.1g3.7g0.6g0.3g3.9g0.3g1.2g29g14g26g1.1g2g13g0g9g22g3.7g100g0.1g2.1g2.1g0.3g0.2g6.8g8.1g8.3g2.4g0.2g23g6.7g1g0.3g0.2g0.1g12g17g0.1g100g3.4g0.2g6.7g1.5g22g0g1g25g34g3.6g100g7.3g9g1.6g0.4g0.3g18g0.4g9.5g1.4g11g3.1g1.5g0g2.4g3.6g0g15g1.2g6.6g0.7g4.2g15g0.1g0.5g0.2g0.1g3.5g3g0.1g0.4g0.1g0.3g100g31g2.8g9.7g2.3g11g1.7g0.1g1.7g0.5g4.7g0.5g0.2g8.6g18g25g0.5g0.5g26g7.7g0.1g0.1g100g1.1g16

In [176]:
nutrition.total_fat.max()

'9g'

In [177]:
nutrition.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Data columns (total 75 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   serving_size                 8789 non-null   object
 1   calories                     8789 non-null   int64 
 2   total_fat                    8789 non-null   object
 3   saturated_fat                7199 non-null   object
 4   cholesterol                  8789 non-null   object
 5   sodium                       8789 non-null   object
 6   choline                      8789 non-null   object
 7   folate                       8789 non-null   object
 8   folic_acid                   8789 non-null   object
 9   niacin                       8789 non-null   object
 10  pantothenic_acid             8789 non-null   object
 11  riboflavin                   8789 non-null   obje

In [178]:
nutrition.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Columns: 75 entries, serving_size to water
dtypes: int64(2), object(73)
memory usage: 5.3+ MB


#18. The astype() Method

In [179]:
df = pd.DataFrame({'age': [12, 13, 14, 16], 
                   'weight': [41.1, 34.5, 83.2, 90.1], 
                   'height': ['1.72', '1.74', '1.91', '1.54']})

In [180]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     4 non-null      int64  
 1   weight  4 non-null      float64
 2   height  4 non-null      object 
dtypes: float64(1), int64(1), object(1)
memory usage: 224.0+ bytes


In [181]:
df

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [182]:
df.astype(float)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [183]:
df

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [184]:
df = df.astype(float)

In [185]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     4 non-null      float64
 1   weight  4 non-null      float64
 2   height  4 non-null      float64
dtypes: float64(3)
memory usage: 224.0 bytes


In [186]:
df.astype({'age': int})

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [187]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     4 non-null      float64
 1   weight  4 non-null      float64
 2   height  4 non-null      float64
dtypes: float64(3)
memory usage: 224.0 bytes


In [188]:
df.astype({'age': np.int16})

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [189]:
nutrition.iloc[:4, :]

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g


# 19. DataFrame replace() + A Glimpse At Regex

In [190]:
nutrition.iloc[:6 , :1]

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100 g
"Nuts, pecans",100 g
"Eggplant, raw",100 g
"Teff, uncooked",100 g
"Sherbet, orange",100 g
"Cauliflower, raw",100 g


In [191]:
nutrition.iloc[:6 , :1].info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, Cornstarch to Cauliflower, raw
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   serving_size  6 non-null      object
dtypes: object(1)
memory usage: 96.0+ bytes


In [192]:
dfm = nutrition.iloc[:6 , :1]

In [193]:
dfm.replace(to_replace='100 g', value=100)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


In [194]:
dfm.replace(to_replace='100 g', value=100).info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, Cornstarch to Cauliflower, raw
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   serving_size  6 non-null      int64
dtypes: int64(1)
memory usage: 96.0+ bytes


In [195]:
# no problem to remove the 'parameters'
dfm.replace('100 g', 100)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


In [196]:
# regex -> regular expressions

In [197]:
dfm.replace('\sg', '', regex=True)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


In [198]:
dfm.replace('\sg', '', regex=True).info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, Cornstarch to Cauliflower, raw
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   serving_size  6 non-null      object
dtypes: object(1)
memory usage: 96.0+ bytes


In [199]:
dfm.replace('\sg', '', regex=True).astype(int)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


In [200]:
nutrition.head(10)

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,0.00 mcg,0.507 mg,...,0.28 g,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,5.960 mg,...,23.41 g,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g
"Cheese, camembert",100 g,300,24g,15g,72mg,842.00 mg,15.4 mg,62.00 mcg,0.00 mcg,0.630 mg,...,24.26 g,15.259 g,7.023 g,0.724 g,72.00 mg,0.0 g,3.68 g,0.00 mg,0.00 mg,51.80 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


# 20. Part I: Collecting The Units

In [201]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [202]:
units = nutrition.astype(str).replace('[^a-zA-Z]', '', regex=True)

In [203]:
nutrition.sample(20, axis=1).head()

Unnamed: 0_level_0,leucine,vitamin_a,aspartic_acid,potassium,lactose,glucose,valine,alanine,vitamin_e,histidine,phenylalanine,monounsaturated_fatty_acids,sucrose,proline,galactose,vitamin_b6,fat,folic_acid,glycine,vitamin_a_rae
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Cornstarch,0.036 g,0.00 IU,0.020 g,3.00 mg,0,0,0.014 g,0.019 g,0.00 mg,0.008 g,0.013 g,0.016 g,0,0.024 g,0,0.000 mg,0.05 g,0.00 mcg,0.009 g,0.00 mcg
"Nuts, pecans",0.598 g,56.00 IU,0.929 g,410.00 mg,0.00 g,0.04 g,0.411 g,0.397 g,1.40 mg,0.262 g,0.426 g,40.801 g,3.90 g,0.363 g,0,0.210 mg,71.97 g,0.00 mcg,0.453 g,3.00 mcg
"Eggplant, raw",0.064 g,23.00 IU,0.164 g,229.00 mg,0,1.58 g,0.053 g,0.051 g,0.30 mg,0.023 g,0.043 g,0.016 g,0.26 g,0.043 g,0,0.084 mg,0.18 g,0.00 mcg,0.041 g,1.00 mcg
"Teff, uncooked",1.068 g,9.00 IU,0.820 g,427.00 mg,0.00 g,0.73 g,0.686 g,0.747 g,0.08 mg,0.301 g,0.698 g,0.589 g,0.62 g,0.664 g,0.00 g,0.482 mg,2.38 g,0,0.477 g,0.00 mcg
"Sherbet, orange",0,46.00 IU,0,96.00 mg,0,0,0,0,0.01 mg,0,0,0.530 g,0,0,0,0.023 mg,2.00 g,0.00 mcg,0,12.00 mcg


In [204]:
units.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,g,,g,,,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g
"Nuts, pecans",g,,g,g,,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g
"Eggplant, raw",g,,g,,,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g
"Teff, uncooked",g,,g,g,,mg,mg,,,mg,...,g,g,g,g,,,g,,,g
"Sherbet, orange",g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g


In [205]:
units.saturated_fat

name
Cornstarch                                                                                            nan
Nuts, pecans                                                                                            g
Eggplant, raw                                                                                         nan
Teff, uncooked                                                                                          g
Sherbet, orange                                                                                         g
                                                                                                     ... 
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round      g
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand      g
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand      g
Beef, raw, all grades, trimmed to 0" fat,

In [206]:
units.saturated_fat.value_counts()

g      7199
nan    1590
Name: saturated_fat, dtype: int64

In [207]:
units.saturated_fat.mode()

0    g
dtype: object

In [208]:
units.mode()

Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g


In [209]:
units = units.mode()

# 21. The rename() Method

In [210]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [211]:
df.rename(index={0: 'Pikachu'})

Unnamed: 0,age,weight,height
Pikachu,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [212]:
df.rename(index={0: 'Pikachu', 1: 'Andy'})

Unnamed: 0,age,weight,height
Pikachu,12.0,41.1,1.72
Andy,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [213]:
df.rename(columns={'weight': 'Weight (kg)'})

Unnamed: 0,age,Weight (kg),height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [214]:
df.rename(columns={'weight': 'Weight (kg)'}, index={0: 'Pikachu'})

Unnamed: 0,age,Weight (kg),height
Pikachu,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [215]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [216]:
df.rename(mapper={'height': 'Height (m)'}, axis=1)

Unnamed: 0,age,weight,Height (m)
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [217]:
# rows instead of zero or columns instead of one to the axis parameter works just fine
df.rename(mapper={'height': 'Height (m)'}, axis='columns')

Unnamed: 0,age,weight,Height (m)
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [218]:
df.axes[1]

Index(['age', 'weight', 'height'], dtype='object')

In [219]:
df.axes[0]

RangeIndex(start=0, stop=4, step=1)

# 22. DataFrame dropna()

In [220]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,83.2,1.91
3,16.0,90.1,1.54


In [221]:
df.loc[2, 'weight'] = np.nan

In [222]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,13.0,34.5,1.74
2,14.0,,1.91
3,16.0,90.1,1.54


In [223]:
df.loc[1, :] = np.nan

In [224]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,,,
2,14.0,,1.91
3,16.0,90.1,1.54


In [225]:
df.dropna()

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [226]:
df.dropna(how='any', axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [227]:
df.dropna(how='all', axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
2,14.0,,1.91
3,16.0,90.1,1.54


In [228]:
# thresh

In [229]:
df.dropna(thresh=3, axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [230]:
df.dropna(how='any', axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [231]:
df.dropna(thresh=df.shape[1], axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [232]:
df.shape

(4, 3)

In [233]:
df.dropna(axis=1)

0
1
2
3


In [234]:
df.dropna(how='any', axis=1)

0
1
2
3


In [235]:
df.dropna(how='all', axis=1)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,,,
2,14.0,,1.91
3,16.0,90.1,1.54


In [236]:
df.dropna(axis=1, thresh=3)

Unnamed: 0,age,height
0,12.0,1.72
1,,
2,14.0,1.91
3,16.0,1.54


In [237]:
#df remains unchanged
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,,,
2,14.0,,1.91
3,16.0,90.1,1.54


In [238]:
# dropna() is one o those methods that supports 'inplace=' paramenter. So, if we set inplace=True, df will change
df.dropna(axis=1, thresh=3, inplace=True)

In [239]:
df

Unnamed: 0,age,height
0,12.0,1.72
1,,
2,14.0,1.91
3,16.0,1.54


# 23. BONUS - dropna() With Subset

In [240]:
df

Unnamed: 0,age,height
0,12.0,1.72
1,,
2,14.0,1.91
3,16.0,1.54


In [241]:
df['gender'] = ['M', 'F', np.nan, 'F']

In [242]:
df

Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
2,14.0,1.91,
3,16.0,1.54,F


In [243]:
df.dropna()

Unnamed: 0,age,height,gender
0,12.0,1.72,M
3,16.0,1.54,F


In [244]:
df.dropna(axis=0, how='any')

Unnamed: 0,age,height,gender
0,12.0,1.72,M
3,16.0,1.54,F


In [245]:
# the subset param

In [246]:
df.dropna(axis=0, how='any', subset=['gender'])

Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
3,16.0,1.54,F


In [247]:
df.dropna(axis=0, how='any', subset=['age'])

Unnamed: 0,age,height,gender
0,12.0,1.72,M
2,14.0,1.91,
3,16.0,1.54,F


In [248]:
df.dropna(axis=1, how='any')

0
1
2
3


In [249]:
df

Unnamed: 0,age,height,gender
0,12.0,1.72,M
1,,,F
2,14.0,1.91,
3,16.0,1.54,F


In [250]:
df.dropna(axis=1, how='any', subset=[0, 2])

Unnamed: 0,age,height
0,12.0,1.72
1,,
2,14.0,1.91
3,16.0,1.54


In [251]:
# word of the lecture: orthogonal -> at a right angle

#24. Part II: Merging Units With Column Names

In [252]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [253]:
units

Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g


In [254]:
# the rename method

In [255]:
### DETOUR: dataframe iteration

In [256]:
for k in units:
  print(k)

serving_size
calories
total_fat
saturated_fat
cholesterol
sodium
choline
folate
folic_acid
niacin
pantothenic_acid
riboflavin
thiamin
vitamin_a
vitamin_a_rae
carotene_alpha
carotene_beta
cryptoxanthin_beta
lutein_zeaxanthin
lucopene
vitamin_b12
vitamin_b6
vitamin_c
vitamin_d
vitamin_e
tocopherol_alpha
vitamin_k
calcium
copper
irom
magnesium
manganese
phosphorous
potassium
selenium
zink
protein
alanine
arginine
aspartic_acid
cystine
glutamic_acid
glycine
histidine
hydroxyproline
isoleucine
leucine
lysine
methionine
phenylalanine
proline
serine
threonine
tryptophan
tyrosine
valine
carbohydrate
fiber
sugars
fructose
galactose
glucose
lactose
maltose
sucrose
fat
saturated_fatty_acids
monounsaturated_fatty_acids
polyunsaturated_fatty_acids
fatty_acids_total_trans
alcohol
ash
caffeine
theobromine
water


In [257]:
for k in units:
  print(k, units[k])

serving_size 0    g
Name: serving_size, dtype: object
calories 0    
Name: calories, dtype: object
total_fat 0    g
Name: total_fat, dtype: object
saturated_fat 0    g
Name: saturated_fat, dtype: object
cholesterol 0    mg
Name: cholesterol, dtype: object
sodium 0    mg
Name: sodium, dtype: object
choline 0    mg
Name: choline, dtype: object
folate 0    mcg
Name: folate, dtype: object
folic_acid 0    mcg
Name: folic_acid, dtype: object
niacin 0    mg
Name: niacin, dtype: object
pantothenic_acid 0    mg
Name: pantothenic_acid, dtype: object
riboflavin 0    mg
Name: riboflavin, dtype: object
thiamin 0    mg
Name: thiamin, dtype: object
vitamin_a 0    IU
Name: vitamin_a, dtype: object
vitamin_a_rae 0    mcg
Name: vitamin_a_rae, dtype: object
carotene_alpha 0    mcg
Name: carotene_alpha, dtype: object
carotene_beta 0    mcg
Name: carotene_beta, dtype: object
cryptoxanthin_beta 0    mcg
Name: cryptoxanthin_beta, dtype: object
lutein_zeaxanthin 0    mcg
Name: lutein_zeaxanthin, dtype: object

In [258]:
for k in units:
  print(k, units[k].at[0])

serving_size g
calories 
total_fat g
saturated_fat g
cholesterol mg
sodium mg
choline mg
folate mcg
folic_acid mcg
niacin mg
pantothenic_acid mg
riboflavin mg
thiamin mg
vitamin_a IU
vitamin_a_rae mcg
carotene_alpha mcg
carotene_beta mcg
cryptoxanthin_beta mcg
lutein_zeaxanthin mcg
lucopene 
vitamin_b12 mcg
vitamin_b6 mg
vitamin_c mg
vitamin_d IU
vitamin_e mg
tocopherol_alpha mg
vitamin_k mcg
calcium mg
copper mg
irom mg
magnesium mg
manganese mg
phosphorous mg
potassium mg
selenium mcg
zink mg
protein g
alanine g
arginine g
aspartic_acid g
cystine g
glutamic_acid g
glycine g
histidine g
hydroxyproline 
isoleucine g
leucine g
lysine g
methionine g
phenylalanine g
proline g
serine g
threonine g
tryptophan g
tyrosine g
valine g
carbohydrate g
fiber g
sugars g
fructose 
galactose 
glucose 
lactose 
maltose 
sucrose 
fat g
saturated_fatty_acids g
monounsaturated_fatty_acids g
polyunsaturated_fatty_acids g
fatty_acids_total_trans mg
alcohol g
ash g
caffeine mg
theobromine mg
water g


In [259]:
for k in units:
  print(units[k].at[0])

g

g
g
mg
mg
mg
mcg
mcg
mg
mg
mg
mg
IU
mcg
mcg
mcg
mcg
mcg

mcg
mg
mg
IU
mg
mg
mcg
mg
mg
mg
mg
mg
mg
mg
mcg
mg
g
g
g
g
g
g
g
g

g
g
g
g
g
g
g
g
g
g
g
g
g
g






g
g
g
g
mg
g
g
mg
mg
g


In [260]:
units.replace('', np.nan).dropna(axis=1)

Unnamed: 0,serving_size,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,g,g,g,mg,mg,mg,mcg,mcg,mg,mg,...,g,g,g,g,mg,g,g,mg,mg,g


In [261]:
units = units.replace('', np.nan).dropna(axis=1)

In [262]:
for k in units:
  print(units[k].at[0])

g
g
g
mg
mg
mg
mcg
mcg
mg
mg
mg
mg
IU
mcg
mcg
mcg
mcg
mcg
mcg
mg
mg
IU
mg
mg
mcg
mg
mg
mg
mg
mg
mg
mg
mcg
mg
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
mg
g
g
mg
mg
g


In [263]:
# {
#     'serving_size': 'serging_size_g',
#   'total_fat': 'total_fat_g'
#   ...
# }

In [264]:
mapper = {k:units[k].at[0] for k in units}

In [265]:
mapper

{'alanine': 'g',
 'alcohol': 'g',
 'arginine': 'g',
 'ash': 'g',
 'aspartic_acid': 'g',
 'caffeine': 'mg',
 'calcium': 'mg',
 'carbohydrate': 'g',
 'carotene_alpha': 'mcg',
 'carotene_beta': 'mcg',
 'cholesterol': 'mg',
 'choline': 'mg',
 'copper': 'mg',
 'cryptoxanthin_beta': 'mcg',
 'cystine': 'g',
 'fat': 'g',
 'fatty_acids_total_trans': 'mg',
 'fiber': 'g',
 'folate': 'mcg',
 'folic_acid': 'mcg',
 'glutamic_acid': 'g',
 'glycine': 'g',
 'histidine': 'g',
 'irom': 'mg',
 'isoleucine': 'g',
 'leucine': 'g',
 'lutein_zeaxanthin': 'mcg',
 'lysine': 'g',
 'magnesium': 'mg',
 'manganese': 'mg',
 'methionine': 'g',
 'monounsaturated_fatty_acids': 'g',
 'niacin': 'mg',
 'pantothenic_acid': 'mg',
 'phenylalanine': 'g',
 'phosphorous': 'mg',
 'polyunsaturated_fatty_acids': 'g',
 'potassium': 'mg',
 'proline': 'g',
 'protein': 'g',
 'riboflavin': 'mg',
 'saturated_fat': 'g',
 'saturated_fatty_acids': 'g',
 'selenium': 'mcg',
 'serine': 'g',
 'serving_size': 'g',
 'sodium': 'mg',
 'sugars': 'g

In [266]:
mapper = {k: k + "_" + units[k].at[0] for k in units}

In [267]:
mapper

{'alanine': 'alanine_g',
 'alcohol': 'alcohol_g',
 'arginine': 'arginine_g',
 'ash': 'ash_g',
 'aspartic_acid': 'aspartic_acid_g',
 'caffeine': 'caffeine_mg',
 'calcium': 'calcium_mg',
 'carbohydrate': 'carbohydrate_g',
 'carotene_alpha': 'carotene_alpha_mcg',
 'carotene_beta': 'carotene_beta_mcg',
 'cholesterol': 'cholesterol_mg',
 'choline': 'choline_mg',
 'copper': 'copper_mg',
 'cryptoxanthin_beta': 'cryptoxanthin_beta_mcg',
 'cystine': 'cystine_g',
 'fat': 'fat_g',
 'fatty_acids_total_trans': 'fatty_acids_total_trans_mg',
 'fiber': 'fiber_g',
 'folate': 'folate_mcg',
 'folic_acid': 'folic_acid_mcg',
 'glutamic_acid': 'glutamic_acid_g',
 'glycine': 'glycine_g',
 'histidine': 'histidine_g',
 'irom': 'irom_mg',
 'isoleucine': 'isoleucine_g',
 'leucine': 'leucine_g',
 'lutein_zeaxanthin': 'lutein_zeaxanthin_mcg',
 'lysine': 'lysine_g',
 'magnesium': 'magnesium_mg',
 'manganese': 'manganese_mg',
 'methionine': 'methionine_g',
 'monounsaturated_fatty_acids': 'monounsaturated_fatty_acids

In [268]:
nutrition.rename(columns=mapper)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,6.422 mg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,7.680 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,6.550 mg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
"Beef, raw, all grades, trimmed to 0"" fat, separable lean only, boneless, eye of round roast, round",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,6.720 mg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [269]:
nutrition.rename(columns=mapper, inplace=True)

# 25. Part III: Removing Units From values

In [270]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [271]:
nutrition.replace('[a-zA-Z]', '', regex=True, inplace=True)

In [272]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100,381,0.1,,0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100,25,0.2,,0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,0.063,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [273]:
nutrition.dtypes

serving_size_g     object
calories            int64
total_fat_g        object
saturated_fat_g    object
cholesterol_mg     object
                    ...  
alcohol_g          object
ash_g              object
caffeine_mg        object
theobromine_mg     object
water_g            object
Length: 75, dtype: object

In [274]:
nutrition.dtypes.value_counts()

object    73
int64      2
dtype: int64

In [275]:
nutrition = nutrition.astype(float)

In [276]:
nutrition.total_fat_g.sum()

92784.20000000001

In [277]:
nutrition.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Columns: 75 entries, serving_size_g to water_g
dtypes: float64(75)
memory usage: 5.3+ MB


# 26. Filtering in 2D

In [278]:
nutrition.shape

(8789, 75)

In [279]:
nutrition.head(3)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3


In [280]:
nutrition.filter(like="Octopus", axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.8,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0


In [281]:
nutrition.filter(like="octopus", axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [282]:
# filter with regex

In [283]:
nutrition.filter(regex='octopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [284]:
nutrition.filter(regex='[Oo]ctopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.8,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [285]:
nutrition.filter(regex='(?i)octopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.8,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [286]:
# filter along both dimensions

In [287]:
nutrition.filter(regex='(?i)octopus', axis=0).filter(items=['cholesterol_mg', 'serving_size_g', 'calories'], axis=1)

Unnamed: 0_level_0,cholesterol_mg,serving_size_g,calories
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopus (Alaska Native),41.0,100.0,56.0
"Mollusks, raw, common, octopus",48.0,100.0,82.0
"Mollusks, moist heat, cooked, common, octopus",96.0,100.0,164.0


In [288]:
nutrition.filter(regex='(?i)octopus', axis=0).loc[:, ['cholesterol_mg', 'serving_size_g', 'calories']]

Unnamed: 0_level_0,cholesterol_mg,serving_size_g,calories
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopus (Alaska Native),41.0,100.0,56.0
"Mollusks, raw, common, octopus",48.0,100.0,82.0
"Mollusks, moist heat, cooked, common, octopus",96.0,100.0,164.0


# 27. DataFrame Sorting

In [289]:
nutrition.head(3)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3


In [290]:
nutrition.vitamin_b12_mcg

name
Cornstarch                                                                                            0.00
Nuts, pecans                                                                                          0.00
Eggplant, raw                                                                                         0.00
Teff, uncooked                                                                                        0.00
Sherbet, orange                                                                                       0.13
                                                                                                      ... 
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    1.64
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    2.95
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    2.42
Beef, raw, all grades, trimmed t

In [291]:
type(nutrition.vitamin_b12_mcg)

pandas.core.series.Series

In [292]:
nutrition.vitamin_b12_mcg.sort_values()

name
Cornstarch                                                                           0.00
Apricots, stewed, sulfured, dehydrated (low-moisture)                                0.00
Cocoa, processed with alkali, unsweetened, dry powder                                0.00
Tomato products, with herbs and cheese, sauce, canned                                0.00
Mothbeans, without salt, boiled, cooked, mature seeds                                0.00
                                                                                    ...  
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Lamb, raw, liver, variety meats and by-products                                     90.05
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Name:

In [293]:
nutrition.sort_values(by=['calories'], ascending=False)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Fat, mutton tallow",100.0,902.0,100.0,47.0,102.0,0.0,79.8,0.0,0.0,0.00,...,100.0,47.300,40.600,7.800,102.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, salmon",100.0,902.0,100.0,20.0,485.0,0.0,0.0,0.0,0.0,0.00,...,100.0,19.872,29.037,40.324,485.0,0.0,0.00,0.0,0.0,0.00
Lard,100.0,902.0,100.0,39.0,95.0,0.0,49.7,0.0,0.0,0.00,...,100.0,39.200,45.100,11.200,95.0,0.0,0.00,0.0,0.0,0.00
"Fat, beef tallow",100.0,902.0,100.0,50.0,109.0,0.0,79.8,0.0,0.0,0.00,...,100.0,49.800,41.800,4.000,109.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, cod liver",100.0,902.0,100.0,23.0,570.0,0.0,0.0,0.0,0.0,0.00,...,100.0,22.608,46.711,22.541,570.0,0.0,0.00,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, decaffeinated, brewed, green, tea",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.000,0.000,0.000,0.0,0.0,0.00,0.0,0.0,99.93
"Beverages, caffeine free, cola, ZEVIA",100.0,0.0,0.0,,0.0,6.0,0.0,0.0,0.0,0.00,...,0.0,0.000,0.000,0.000,0.0,0.0,0.01,0.0,0.0,98.87
"Carbonated beverage, without caffeine, with sodium saccharin, other than cola or pepper, low calorie",100.0,0.0,0.0,,0.0,16.0,0.0,0.0,0.0,0.00,...,0.0,0.000,0.000,0.000,0.0,0.0,0.10,0.0,0.0,99.80
"Beverages, unsweetened, ready to drink, green, tea",100.0,0.0,0.0,,0.0,7.0,0.0,0.0,0.0,0.00,...,0.0,0.000,0.000,0.000,0.0,0.0,0.12,12.0,0.0,99.88


In [294]:
nutrition.sort_values(by=['cholesterol_mg', 'sodium_mg'], ascending=False)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Veal, braised, cooked, brain, variety meats and by-products",100.0,136.0,9.6,2.2,3100.0,156.0,0.0,3.0,0.0,2.430,...,9.63,2.180,1.740,1.490,3100.0,0.0,1.40,0.0,0.0,76.89
"Beef, simmered, cooked, brain, variety meats and by-products",100.0,151.0,11.0,2.4,3100.0,108.0,490.9,5.0,0.0,3.620,...,10.53,2.394,1.882,1.632,3100.0,0.0,1.46,0.0,0.0,74.86
"Beef, raw, brain, variety meats and by-products",100.0,143.0,10.0,2.3,3010.0,126.0,0.0,3.0,0.0,3.550,...,10.30,2.300,1.890,1.586,3010.0,0.0,1.51,0.0,0.0,76.29
"Lamb, soaked and fried, cooked, brains, imported, New Zealand",100.0,154.0,11.0,1.4,2559.0,101.0,0.0,0.0,0.0,2.995,...,10.92,1.365,4.168,0.999,2559.0,0.0,3.39,0.0,0.0,73.11
"Pork, braised, cooked, brain, variety meats and by-products, fresh",100.0,138.0,9.5,2.2,2552.0,91.0,0.0,4.0,0.0,3.330,...,9.51,2.150,1.720,1.470,2552.0,0.0,1.40,0.0,0.0,75.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Oil, principal uses icings and fillings, palm, soy (partially hydrogenated ), industrial",100.0,884.0,100.0,28.0,0.0,0.0,0.2,0.0,0.0,0.000,...,100.00,28.421,59.715,7.095,0.0,0.0,0.00,0.0,0.0,0.00
"Oil, pourable clear fry, soy (partially hydrogenated ) and soy (winterized), industrial",100.0,884.0,100.0,15.0,0.0,0.0,0.2,0.0,0.0,0.000,...,100.00,15.341,34.630,45.228,0.0,0.0,0.00,0.0,0.0,0.00
"Beverages, fortified, Revive Fruit Punch, Glaceau Vitamin Water, The COCA-COLA company",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,3.384,...,0.00,0.000,0.000,0.000,0.0,0.0,0.15,0.0,0.0,99.17
"Oil, principal use as a tortilla shortening, soy (partially hydrogenated) and cottonseed, industrial",100.0,884.0,100.0,26.0,0.0,0.0,0.2,0.0,0.0,0.000,...,100.00,25.883,59.133,10.305,0.0,0.0,0.00,0.0,0.0,0.00


In [295]:
nutrition.sort_values(by=['cholesterol_mg', 'sodium_mg'], ascending=[False, True])

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Beef, simmered, cooked, brain, variety meats and by-products",100.0,151.0,11.0,2.4,3100.0,108.0,490.9,5.0,0.0,3.620,...,10.53,2.394,1.882,1.632,3100.0,0.0,1.46,0.0,0.0,74.86
"Veal, braised, cooked, brain, variety meats and by-products",100.0,136.0,9.6,2.2,3100.0,156.0,0.0,3.0,0.0,2.430,...,9.63,2.180,1.740,1.490,3100.0,0.0,1.40,0.0,0.0,76.89
"Beef, raw, brain, variety meats and by-products",100.0,143.0,10.0,2.3,3010.0,126.0,0.0,3.0,0.0,3.550,...,10.30,2.300,1.890,1.586,3010.0,0.0,1.51,0.0,0.0,76.29
"Lamb, soaked and fried, cooked, brains, imported, New Zealand",100.0,154.0,11.0,1.4,2559.0,101.0,0.0,0.0,0.0,2.995,...,10.92,1.365,4.168,0.999,2559.0,0.0,3.39,0.0,0.0,73.11
"Pork, braised, cooked, brain, variety meats and by-products, fresh",100.0,138.0,9.5,2.2,2552.0,91.0,0.0,4.0,0.0,3.330,...,9.51,2.150,1.720,1.470,2552.0,0.0,1.40,0.0,0.0,75.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Leavening agents, sodium aluminum sulfate, double-acting, baking powder",100.0,53.0,0.0,,0.0,10600.0,0.0,0.0,0.0,0.000,...,0.00,0.000,0.000,0.000,0.0,0.0,67.30,0.0,0.0,5.00
"Seasoning mix, coriander & annatto, sazon, dry",100.0,0.0,0.0,,0.0,17000.0,0.0,0.0,0.0,0.000,...,0.00,0.000,0.000,0.000,0.0,0.0,99.80,0.0,0.0,0.20
"Desserts, unsweetened, tablets, rennin",100.0,84.0,0.1,,0.0,26050.0,0.0,0.0,0.0,0.000,...,0.10,0.041,0.038,0.007,0.0,0.0,72.50,0.0,0.0,6.50
"Leavening agents, baking soda",100.0,0.0,0.0,,0.0,27360.0,0.0,0.0,0.0,0.000,...,0.00,0.000,0.000,0.000,0.0,0.0,36.90,0.0,0.0,0.20


In [296]:
# brain composition detour

In [297]:
nutrition.loc['Beef, simmered, cooked, brain, variety meats and by-products']

serving_size_g      100.00
calories            151.00
total_fat_g          11.00
saturated_fat_g       2.40
cholesterol_mg     3100.00
                    ...   
alcohol_g             0.00
ash_g                 1.46
caffeine_mg           0.00
theobromine_mg        0.00
water_g              74.86
Name: Beef, simmered, cooked, brain, variety meats and by-products, Length: 75, dtype: float64

In [298]:
nutrition.loc['Beef, simmered, cooked, brain, variety meats and by-products'].filter(like='_g')

serving_size_g                   100.000
total_fat_g                       11.000
saturated_fat_g                    2.400
protein_g                         11.670
alanine_g                          0.000
arginine_g                         0.000
aspartic_acid_g                    0.000
cystine_g                          0.000
glutamic_acid_g                    0.000
glycine_g                          0.000
histidine_g                        0.000
isoleucine_g                       0.000
leucine_g                          0.000
lysine_g                           0.000
methionine_g                       0.000
phenylalanine_g                    0.000
proline_g                          0.000
serine_g                           0.000
threonine_g                        0.000
tryptophan_g                       0.000
tyrosine_g                         0.000
valine_g                           0.000
carbohydrate_g                     1.480
fiber_g                            0.000
sugars_g        

In [299]:
nutrition.loc['Beef, simmered, cooked, brain, variety meats and by-products'].filter(like='_g').sort_values(ascending=False)

serving_size_g                   100.000
water_g                           74.860
protein_g                         11.670
total_fat_g                       11.000
fat_g                             10.530
saturated_fat_g                    2.400
saturated_fatty_acids_g            2.394
monounsaturated_fatty_acids_g      1.882
polyunsaturated_fatty_acids_g      1.632
carbohydrate_g                     1.480
ash_g                              1.460
threonine_g                        0.000
alcohol_g                          0.000
sugars_g                           0.000
fiber_g                            0.000
valine_g                           0.000
tyrosine_g                         0.000
tryptophan_g                       0.000
serine_g                           0.000
phenylalanine_g                    0.000
methionine_g                       0.000
lysine_g                           0.000
leucine_g                          0.000
isoleucine_g                       0.000
histidine_g     

# 28. Using Series between() With DataFrames

In [300]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [301]:
nutrition.calories.head(10)

name
Cornstarch            381.0
Nuts, pecans          691.0
Eggplant, raw          25.0
Teff, uncooked        367.0
Sherbet, orange       144.0
Cauliflower, raw       25.0
Taro leaves, raw       42.0
Lamb, raw, ground     282.0
Cheese, camembert     300.0
Vegetarian fillets    290.0
Name: calories, dtype: float64

In [302]:
nutrition.calories.between(20, 60)

name
Cornstarch                                                                                            False
Nuts, pecans                                                                                          False
Eggplant, raw                                                                                          True
Teff, uncooked                                                                                        False
Sherbet, orange                                                                                       False
                                                                                                      ...  
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    False
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    False
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    False
Beef, raw, all grades, 

In [303]:
nutrition.calories.between(20, 60).shape

(8789,)

In [304]:
nutrition.calories.shape

(8789,)

In [305]:
nutrition.shape

(8789, 75)

In [306]:
nutrition[nutrition.calories.between(20, 60)]

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.30
"Cauliflower, raw",100.0,25.0,0.3,0.1,0.0,30.0,44.3,57.0,0.0,0.507,...,0.28,0.130,0.034,0.031,0.0,0.0,0.76,0.0,0.0,92.07
"Taro leaves, raw",100.0,42.0,0.7,0.2,0.0,3.0,12.8,126.0,0.0,1.513,...,0.74,0.151,0.060,0.307,0.0,0.0,1.92,0.0,0.0,85.66
"PACE, Picante Sauce",100.0,25.0,0.0,,0.0,781.0,0.0,0.0,0.0,0.000,...,0.00,0.000,0.000,0.000,0.0,0.0,3.85,0.0,0.0,89.90
"Mango nectar, canned",100.0,51.0,0.1,,0.0,5.0,1.5,7.0,0.0,0.080,...,0.06,0.014,0.022,0.011,0.0,0.0,0.08,0.0,0.0,86.63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, added calcium, high vitamin C, greater than 3% fruit juice, reduced sugar, Fruit flavored drink",100.0,29.0,0.4,,0.0,25.0,0.1,2.0,0.0,0.000,...,0.37,0.000,0.000,0.000,0.0,0.0,0.06,0.0,0.0,93.00
"Ruby Red grapefruit juice blend (grapefruit, grape, apple), with added vitamin C, bottled, OCEAN SPRAY",100.0,44.0,0.1,,0.0,8.0,0.0,0.0,0.0,0.000,...,0.10,0.008,0.008,0.014,0.0,0.0,0.27,0.0,0.0,88.60
"Beverages, prepared with water, frozen concentrate, with juice and pulp, breakfast type, Orange drink",100.0,45.0,0.0,,0.0,10.0,0.0,0.0,0.0,0.253,...,0.00,0.001,0.001,0.001,0.0,0.0,0.48,0.0,0.0,88.08
"Apple juice, diluted with 3 volume water without added ascorbic acid, unsweetened, frozen concentrate",100.0,47.0,0.1,,0.0,7.0,1.8,0.0,0.0,0.038,...,0.10,0.018,0.002,0.031,0.0,0.0,0.32,0.0,0.0,87.90


In [307]:
nutrition[nutrition.calories.between(20, 60)].sample(4)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Babyfood, strained, peas, vegetables",100.0,50.0,0.4,0.1,0.0,5.0,32.2,28.0,0.0,1.119,...,0.43,0.07,0.05,0.2,0.0,0.0,0.42,0.0,0.0,87.52
"Sweet potato leaves, with salt, steamed, cooked",100.0,35.0,0.3,0.1,0.0,249.0,21.0,49.0,0.0,1.003,...,0.34,0.065,0.012,0.134,0.0,0.0,0.89,0.0,0.0,89.2
"Beverages, assorted fruit flavors, sweetened, bottles, Water with added vitamins and minerals",100.0,22.0,0.0,,0.0,0.0,0.0,8.0,8.0,0.844,...,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,94.44
"Kohlrabi, raw",100.0,27.0,0.1,,0.0,20.0,12.3,16.0,0.0,0.4,...,0.1,0.013,0.007,0.048,0.0,0.0,1.0,0.0,0.0,91.0


# 29. BONUS - Min, Max and Idx[MinMax], And Good Foods

In [308]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [309]:
nutrition.min()

serving_size_g     100.0
calories             0.0
total_fat_g          0.0
saturated_fat_g      0.1
cholesterol_mg       0.0
                   ...  
alcohol_g            0.0
ash_g                0.0
caffeine_mg          0.0
theobromine_mg       0.0
water_g              0.0
Length: 75, dtype: float64

In [310]:
nutrition.min(axis=1)

name
Cornstarch                                                                                            0.0
Nuts, pecans                                                                                          0.0
Eggplant, raw                                                                                         0.0
Teff, uncooked                                                                                        0.0
Sherbet, orange                                                                                       0.0
                                                                                                     ... 
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    0.0
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    0.0
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    0.0
Beef, raw, all grades, trimmed to 0" fat,

In [311]:
nutrition.max(axis=1)

name
Cornstarch                                                                                            381.0
Nuts, pecans                                                                                          691.0
Eggplant, raw                                                                                         229.0
Teff, uncooked                                                                                        429.0
Sherbet, orange                                                                                       144.0
                                                                                                      ...  
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    311.0
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    246.0
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    277.0
Beef, raw, all grades, 

In [312]:
# what food has the most postassium

In [313]:
nutrition.potassium_mg.max()

16500.0

In [314]:
nutrition.potassium_mg.idxmax()

'Leavening agents, cream of tartar'

In [315]:
nutrition.potassium_mg.sort_values(ascending=False).head(10)

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Beverages, powder, unsweetened, instant, tea               6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium_mg, dtype: float64

In [316]:
# targeting: potassium to sodium == 16

In [317]:
K_to_NA = (nutrition.potassium_mg.replace(0,1)/nutrition.sodium_mg.replace(0, 1))\
            .sort_values(ascending=False)

In [318]:
K_to_NA.head(10)

name
Peanut flour, low fat                                           1358.0
Nuts, raw, pistachio nuts                                       1025.0
Beverages, reduced calorie, with whitener, instant, coffee       909.0
Soybeans, raw, mature seeds                                      898.5
Soy meal, raw, defatted                                          830.0
Babyfood, dry, with bananas, rice, cereal                        769.0
Nuts, without salt added, dry roasted, hazelnuts or filberts     755.0
Soy protein concentrate, produced by alcohol extraction          734.0
Nuts, almonds                                                    733.0
Nuts, full fat, acorn flour                                      712.0
dtype: float64

In [319]:
K_to_NA[K_to_NA.between(14,18)].sample(10)

name
Fish, raw, wild, rainbow, trout                                                                                     15.516129
Nuts, toasted, dried (desiccated), coconut meat                                                                     14.972973
Beans, without salt, drained, boiled, cooked, frozen, yellow, snap                                                  14.000000
Cereals, without salt, cooked with water (includes boiling and microwaving), unenriched, regular and quick, oats    17.500000
Spices, dried, dill weed                                                                                            15.903846
Onions, raw, spring or scallions (includes tops and bulb)                                                           17.250000
Spices, white, pepper                                                                                               14.600000
Apricots, solids and liquids, without skin, water pack, canned                                                   

# 30. DataFrame nlargest() And nsmallest()

In [320]:
nutrition.potassium_mg.sort_values(ascending=False).head(10)

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Beverages, powder, unsweetened, instant, tea               6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium_mg, dtype: float64

In [321]:
nutrition.nlargest(10, columns='potassium_mg')

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Leavening agents, cream of tartar",100.0,258.0,0.0,,0.0,52.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,36.8,0.0,0.0,1.7
"Leavening agents, low-sodium, baking powder",100.0,97.0,0.4,0.1,0.0,90.0,0.0,0.0,0.0,0.0,...,0.4,0.073,0.006,0.121,0.0,0.0,46.4,0.0,0.0,6.2
"Parsley, freeze-dried",100.0,271.0,5.2,,0.0,391.0,0.0,194.0,0.0,10.4,...,5.2,0.0,0.0,0.0,0.0,0.0,19.12,0.0,0.0,2.0
"Beverages, powder, unsweetened, instant, tea",100.0,315.0,0.0,,0.0,72.0,118.3,103.0,0.0,10.8,...,0.0,0.0,0.0,0.0,0.0,0.0,16.04,5714.0,71.0,5.09
"Beverages, unsweetened, decaffeinated, instant, tea",100.0,315.0,0.0,,0.0,72.0,118.3,103.0,0.0,10.8,...,0.0,0.0,0.0,0.0,0.0,0.0,16.04,169.0,11.0,5.09
"Spices, dried, chervil",100.0,237.0,3.9,0.2,0.0,83.0,0.0,274.0,0.0,5.4,...,3.9,0.169,1.399,1.8,0.0,0.0,16.6,0.0,0.0,7.2
"Spices, dried, coriander leaf",100.0,279.0,4.8,0.1,0.0,211.0,97.1,274.0,0.0,10.707,...,4.78,0.115,2.232,0.328,0.0,0.0,14.08,0.0,0.0,7.3
"Celery flakes, dried",100.0,319.0,2.1,0.6,0.0,1435.0,122.3,107.0,0.0,4.64,...,2.1,0.555,0.405,1.035,0.0,0.0,13.9,0.0,0.0,9.0
"Beverages, powder, regular, instant, coffee",100.0,353.0,0.5,0.2,0.0,37.0,101.9,0.0,0.0,28.173,...,0.5,0.197,0.041,0.196,0.0,0.0,8.8,3142.0,0.0,3.1
"Beverages, half the caffeine, regular, instant, coffee",100.0,352.0,0.5,0.2,0.0,37.0,101.9,0.0,0.0,28.173,...,0.5,0.197,0.041,0.196,0.0,0.0,8.8,1571.0,0.0,3.1


In [322]:
nutrition.nlargest(10, columns='potassium_mg').potassium_mg

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, powder, unsweetened, instant, tea               6040.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium_mg, dtype: float64

In [323]:
nutrition.potassium_mg.nlargest(10)

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, powder, unsweetened, instant, tea               6040.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium_mg, dtype: float64

In [324]:
nutrition.nsmallest(10, 'sodium_mg')

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"PACE, Green Taco Sauce",100.0,25.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.05,0.0,0.0,90.7
"Oil, soybean lecithin",100.0,763.0,100.0,15.0,0.0,0.0,350.0,0.0,0.0,0.0,...,100.0,15.005,10.977,45.318,0.0,0.0,0.0,0.0,0.0,0.0
"Syrup, fruit flavored",100.0,261.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,34.82
"Oranges, Florida, raw",100.0,46.0,0.2,,0.0,0.0,8.4,17.0,0.0,0.4,...,0.21,0.025,0.039,0.042,0.0,0.0,0.41,0.0,0.0,87.14
"Cherries, raw, sweet",100.0,63.0,0.2,,0.0,0.0,6.1,4.0,0.0,0.154,...,0.2,0.038,0.047,0.052,0.0,0.0,0.48,0.0,0.0,82.25
"Peaches, raw, yellow",100.0,39.0,0.3,,0.0,0.0,6.1,4.0,0.0,0.806,...,0.25,0.019,0.067,0.086,0.0,0.0,0.43,0.0,0.0,88.87
"Oil, corn and canola",100.0,884.0,100.0,8.0,0.0,0.0,0.2,0.0,0.0,0.0,...,100.0,8.026,58.537,29.113,0.0,0.0,0.0,0.0,0.0,0.0
"Game meat, raw, bear",100.0,161.0,8.3,,0.0,0.0,0.0,0.0,0.0,3.2,...,8.3,0.0,0.0,0.0,0.0,0.0,0.7,0.0,0.0,71.2
"Oil, ucuhuba butter",100.0,884.0,100.0,85.0,0.0,0.0,0.0,0.0,0.0,0.0,...,100.0,85.2,6.7,2.9,0.0,0.0,0.0,0.0,0.0,0.0


In [325]:
nutrition.nsmallest(10, ['sodium_mg', 'calories', 'folate_mcg'])

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Beverages, well, tap, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,99.9
"Water, NAYA, non-carbonated, bottled",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,100.0
"Beverages, decaffeinated, brewed, green, tea",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.93
"Beverages, EVIAN, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,99.97
"Beverages, CALISTOGA, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,100.0
"Beverages, DANNON, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.98
"Sweetener, herbal extract powder from Stevia leaf",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Beverages, CRYSTAL GEYSER, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,100.0
"Babyfood, without added fluoride., GERBER, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.9
"Beverages, AQUAFINA, PEPSI, non-carbonated, bottled, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.97


#31. Skill Challenge

1. Find the 10 foods that have the most Vitamin B12. What do they have in common?

In [326]:
nutrition.loc[:, 'vitamin_b12_mcg']

name
Cornstarch                                                                                            0.00
Nuts, pecans                                                                                          0.00
Eggplant, raw                                                                                         0.00
Teff, uncooked                                                                                        0.00
Sherbet, orange                                                                                       0.13
                                                                                                      ... 
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    1.64
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    2.95
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    2.42
Beef, raw, all grades, trimmed t

In [327]:
nutrition.loc[:, 'vitamin_b12_mcg'].nlargest(10)

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12_mcg, dtype: float64

In [328]:
nutrition.vitamin_b12_mcg.nlargest(10)

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12_mcg, dtype: float64

In [329]:
nutrition.nlargest(n=10, columns='vitamin_b12_mcg').vitamin_b12_mcg

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12_mcg, dtype: float64

In [330]:
nutrition.sort_values(by='vitamin_b12_mcg', ascending=False).head(10).vitamin_b12_mcg

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12_mcg, dtype: float64

2. Isolate the foods in the dataset that contain, or are based on, eggplant. Which of them has the most sodium?

In [331]:
nutrition.filter(regex='(?i)eggplant', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Eggplant, pickled",100.0,49.0,0.7,0.1,0.0,1674.0,11.9,20.0,0.0,0.66,...,0.7,0.14,0.063,0.294,0.0,0.0,1.73,0.0,0.0,86.9
"Eggplant, with salt, drained, boiled, cooked",100.0,33.0,0.2,,0.0,239.0,9.4,14.0,0.0,0.6,...,0.23,0.044,0.02,0.093,0.0,0.0,1.13,0.0,0.0,89.67
"Eggplant, without salt, drained, boiled, cooked",100.0,35.0,0.2,,0.0,1.0,9.4,14.0,0.0,0.6,...,0.23,0.044,0.02,0.093,0.0,0.0,0.54,0.0,0.0,89.67


In [332]:
nutrition.filter(regex='(?i)eggplant', axis=0).sodium_mg.nlargest(1)

name
Eggplant, pickled    1674.0
Name: sodium_mg, dtype: float64

3. Select a slice of the dataframe that contains 4 random rows and 2 random columns.

In [333]:
nutrition.sample(4).sample(2, axis=1)

Unnamed: 0_level_0,vitamin_e_mg,lucopene
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Soup, vegetable broth, SWANSON",0.0,0.0
"Rice and vermicelli mix, prepared with 80% margarine, chicken flavor",0.27,0.0
"Beef, raw, 80% lean meat / 20% fat, ground",0.17,0.0
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, chuck eye steak",0.15,0.0


#32. Another Skill Challenge

1. Remove all the food items that contain at least one Nan. Do this in a way that modifies the dataframe, i.e. the changes stick. How many food items remain after the exclusions?

In [334]:
nutrition.shape

(8789, 75)

In [336]:
nutrition.dropna(axis=0, how='any', inplace=True)

In [337]:
nutrition.shape

(7199, 75)

2. From the remaining records, isolate those that have between 20 and 40 mg ov Vitamin C per 100 g serving. Of these foods, which one is the least caloric, i.e. has the minimum calories?

In [338]:
nutrition[nutrition.vitamin_c_mg.between(20, 40)]

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Broccoli, raw, chinese",100.0,30.0,0.8,0.1,0.0,7.0,26.5,104.0,0.0,0.459,...,0.76,0.116,0.053,0.347,0.0,0.0,0.83,0.0,0.0,92.55
"Broccoli raab, cooked",100.0,33.0,0.5,0.1,0.0,56.0,33.6,71.0,0.0,2.015,...,0.52,0.057,0.030,0.150,0.0,0.0,1.11,0.0,0.0,91.41
"Horseradish, prepared",100.0,48.0,0.7,0.1,0.0,420.0,6.5,57.0,0.0,0.386,...,0.69,0.090,0.130,0.339,0.0,0.0,1.76,0.0,0.0,85.08
"Spices, white, pepper",100.0,296.0,2.1,0.6,0.0,5.0,0.0,10.0,0.0,0.212,...,2.12,0.626,0.789,0.616,0.0,0.0,1.59,0.0,0.0,11.42
"Dandelion greens, raw",100.0,45.0,0.7,0.2,0.0,76.0,35.3,27.0,0.0,0.806,...,0.70,0.170,0.014,0.306,0.0,0.0,1.80,0.0,0.0,85.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, fortified, ready to drink, milk and soy based, chocolate drink",100.0,101.0,1.7,0.4,4.0,63.0,1.6,42.0,42.0,1.688,...,1.69,0.422,1.154,0.057,4.0,0.0,0.97,0.0,13.0,75.82
"Cereals ready-to-eat, Peanut Butter, Multi Grain CHEERIOS, GENERAL MILLS",100.0,390.0,6.2,0.9,0.0,447.0,0.0,714.0,0.0,17.890,...,6.19,0.890,2.700,2.290,0.0,0.0,3.09,0.0,0.0,2.00
"Infant formula, with ARA and DHA, ready-to-feed, ADVANCE, NATURAL CARE, SIMILAC, ABBOTT NUTRITION",100.0,78.0,4.2,2.5,2.0,34.0,6.0,29.0,29.0,3.902,...,4.24,2.544,0.402,0.842,2.0,0.0,0.75,0.0,0.0,85.07
"Cereals ready-to-eat, KELLOGG'S SMART START Strong Heart Antioxidants Cereal, KELLOGG",100.0,371.0,1.5,0.3,0.0,398.0,14.9,800.0,781.0,40.000,...,1.50,0.300,0.100,0.500,0.0,0.0,1.70,0.0,0.0,2.50


In [339]:
nutrition[nutrition.vitamin_c_mg.between(20, 40)].calories.nsmallest(1)

name
Asparagus, with salt, drained, boiled, cooked, frozen    18.0
Name: calories, dtype: float64

3. How many food items in the dataframe have Vitamin C levels of between 2 and 3 standard deviations (inclusive) above the mean?

In [340]:
m = nutrition.vitamin_c_mg.mean()

In [341]:
m

5.553368523406037

In [342]:
mp2sd = m + nutrition.vitamin_c_mg.std() * 2

In [343]:
mp3sd = m + nutrition.vitamin_c_mg.std() * 3

In [344]:
print(m, mp2sd, mp3sd)

5.553368523406037 97.7621389681903 143.86652419058245


In [345]:
nutrition[nutrition.vitamin_c_mg.between(mp2sd, mp3sd)].shape

(17, 75)

In [348]:
result_set = nutrition[nutrition.vitamin_c_mg.between(mp2sd, mp3sd)]

In [349]:
result_set.describe()

Unnamed: 0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,fat_g,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g
count,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,...,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0
mean,100.0,254.588235,4.129412,0.964706,3.411765,267.588235,32.994118,139.882353,72.058824,7.618235,...,4.112941,0.946882,1.768353,0.954471,3.411765,0.0,3.882941,3.705882,41.588235,31.429412
std,0.0,140.672074,4.693581,1.134648,10.718319,215.914062,66.390535,150.379887,151.563712,10.817866,...,4.663148,1.136581,2.854251,1.288771,10.718319,0.0,3.49288,12.726767,147.915119,36.400353
min,100.0,29.0,0.4,0.1,0.0,3.0,0.0,0.0,0.0,0.0,...,0.37,0.062,0.029,0.093,0.0,0.0,0.53,0.0,0.0,2.0
25%,100.0,146.0,0.8,0.1,0.0,56.0,0.0,23.0,0.0,1.0,...,0.79,0.092,0.063,0.124,0.0,0.0,1.0,0.0,0.0,5.89
50%,100.0,302.0,3.0,0.7,0.0,317.0,7.5,120.0,0.0,3.397,...,3.0,0.672,0.704,0.338,0.0,0.0,2.84,0.0,0.0,10.2
75%,100.0,371.0,5.1,1.4,0.0,429.0,24.5,180.0,0.0,9.943,...,5.1,1.36,1.724,1.07,0.0,0.0,5.53,0.0,0.0,62.0
max,100.0,422.0,14.0,4.5,44.0,717.0,266.9,500.0,500.0,38.442,...,14.08,4.499,9.11,4.804,44.0,0.0,11.36,52.0,608.0,91.69


In [350]:
result_set.vitamin_c_mg.describe()

count     17.000000
mean     119.617647
std        9.190772
min      101.800000
25%      118.000000
50%      120.000000
75%      120.000000
max      138.000000
Name: vitamin_c_mg, dtype: float64

In [351]:
result_set.vitamin_c_mg.shape

(17,)