In [2]:
import numpy as np
import pandas as pd

In [3]:
# Series are 1D, but DataFrames are 2D

In [4]:
# Each column in a DataFrame is a series

In [5]:
# Unlike Series, DataFrames could be heterogenous

## Creating DataFrames

In [6]:
names = ['Olga','Andrew','Brian','Telulah','Nicole','Tilda']

In [7]:
ages = [29, 21, 43, 23, 39, 46]

In [8]:
married = [False, True, True, True, False, True]

In [9]:
df = pd.DataFrame({'name': names, 'age': ages, 'married': married})

## info() method

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [11]:
# verbose = False will not show column level details
df.info(verbose = False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Columns: 3 entries, name to married
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [12]:
# memory_usage = False will not show memory usage
df.info(memory_usage = False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)

In [13]:
df.info(memory_usage = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 230.0+ bytes


In [14]:
df.info(memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     6 non-null      object
 1   age      6 non-null      int64 
 2   married  6 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 557.0 bytes


In [15]:
df.describe()

Unnamed: 0,age
count,6.0
mean,33.5
std,10.616026
min,21.0
25%,24.5
50%,34.0
75%,42.0
max,46.0


## Reading nutrition data

In [16]:
dataurl = 'https://andybek.com/pandas-nutrition'

In [17]:
nutrition = pd.read_csv(dataurl)

In [18]:
nutrition.head()

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [19]:
nutrition.info(verbose = False, memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Columns: 77 entries, Unnamed: 0 to water
dtypes: int64(3), object(74)
memory usage: 39.2 MB


In [20]:
#Approach 1
nutrition.drop('Unnamed: 0', axis=1)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [21]:
#Approach 2
nutrition.set_index('Unnamed: 0')

Unnamed: 0_level_0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [22]:
#Approach 3


In [23]:
nutrition = pd.read_csv(dataurl, index_col=[0])

In [24]:
nutrition.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


## sample() method

In [25]:
# Returns random sample from the dataset
nutrition.sample()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
3905,"Frozen novelties, CREAMSICLE Pops, Sugar Free",100 g,49,2.3g,2g,0,6.00 mg,0,0,0,...,2.33 g,2.000 g,0,0,0.00 mg,0,0.44 g,0,0,82.38 g


In [26]:
# Three samples
nutrition.sample(n=3)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
1890,"Vitasoy USA Azumaya, Firm Tofu",100 g,80,4.2g,0.5g,0,0,0,0,0,...,4.20 g,0.500 g,1.200 g,2.500 g,0.00 mg,0,1.20 g,0,0,84.00 g
4992,"CAMPBELL'S Red and White, condensed, Golden Mu...",100 g,65,2.8g,0.8g,0,524.00 mg,0,0,0,...,2.82 g,0.806 g,0.403 g,1.210 g,0.00 mg,0,0.72 g,0,0,87.20 g
2607,"Swamp cabbage, raw, (skunk cabbage)",100 g,19,0.2g,,0,113.00 mg,0,57.00 mcg,0.00 mcg,...,0.20 g,0,0,0,0.00 mg,0,1.60 g,0,0,92.47 g


In [27]:
# Getting 0.1% samples of the datafram
nutrition.sample(frac=0.001)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
7989,"Lamb, roasted, cooked, choice, trimmed to 1/4""...",100 g,202,9.8g,3.7g,87mg,66.00 mg,0,25.00 mcg,0.00 mcg,...,9.76 g,3.720 g,3.950 g,0.860 g,87.00 mg,0,1.35 g,0,0,62.76 g
3502,"Crackers, LA MODERNA RIKIS CREAM CRACKERS, cream",100 g,464,20g,2.6g,0,752.00 mg,0,0,0,...,19.50 g,2.634 g,14.468 g,1.265 g,0,0,3.95 g,0,0,4.48 g
7432,"Beans, with salt, drained, boiled, cooked, spr...",100 g,20,0.3g,,0,287.00 mg,0,29.00 mcg,0.00 mcg,...,0.32 g,0.039 g,0.024 g,0.185 g,0.00 mg,0,0.93 g,0,0,93.39 g
6063,"Veal, braised, cooked, separable lean only, le...",100 g,203,5.1g,1.9g,135mg,67.00 mg,0,18.00 mcg,0.00 mcg,...,5.09 g,1.920 g,1.840 g,0.410 g,135.00 mg,0,1.97 g,0,0,56.18 g
8555,"Beef, grilled, cooked, choice, trimmed to 1/8""...",100 g,217,12g,4.6g,80mg,60.00 mg,57.7 mg,7.00 mcg,0.00 mcg,...,11.97 g,4.645 g,5.433 g,0.554 g,80.00 mg,0.0 g,1.01 g,0.00 mg,0.00 mg,60.57 g
5296,"Snacks, COMBOS Snacks Cheddar Cheese Pretzel, ...",100 g,463,17g,9.7g,5mg,1117.00 mg,21.0 mg,8.00 mcg,0.00 mcg,...,16.92 g,9.656 g,3.806 g,1.354 g,5.00 mg,0.0 g,5.01 g,0.00 mg,0.00 mg,1.67 g
3604,"Turkey, raw, meat only, drumstick, retail parts",100 g,118,4g,1.1g,79mg,87.00 mg,75.6 mg,6.00 mcg,0,...,3.97 g,1.073 g,1.163 g,1.072 g,79.00 mg,0.0 g,0.91 g,0.00 mg,0.00 mg,75.67 g
8158,"Beef, braised, cooked, separable lean only, br...",100 g,265,16g,5.5g,79mg,39.00 mg,0,0,0,...,16.37 g,5.527 g,6.997 g,0.509 g,79.00 mg,0.0 g,0.93 g,0.00 mg,0.00 mg,54.88 g
2335,"Fish, smoked, mixed species, whitefish",100 g,108,0.9g,0.2g,33mg,1019.00 mg,95.0 mg,7.00 mcg,0.00 mcg,...,0.93 g,0.228 g,0.278 g,0.293 g,33.00 mg,0.0 g,5.06 g,0.00 mg,0.00 mg,70.83 g


In [28]:
nutrition.shape

(8789, 76)

In [29]:
nutrition.axes

[Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
             ...
             8779, 8780, 8781, 8782, 8783, 8784, 8785, 8786, 8787, 8788],
            dtype='int64', length=8789),
 Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
        'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
        'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
        'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
        'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
        'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
        'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
        'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
        'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
        'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
        'methionine', 'phenylalanine', 'proline', 'ser

In [30]:
nutrition.axes[0][3]

3

In [31]:
nutrition.axes[1][3]

'total_fat'

In [32]:
nutrition.index

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            8779, 8780, 8781, 8782, 8783, 8784, 8785, 8786, 8787, 8788],
           dtype='int64', length=8789)

In [33]:
nutrition.index = pd.RangeIndex(start=0, stop=8789, step=1)

In [34]:
type(nutrition.index)

pandas.core.indexes.range.RangeIndex

In [35]:
nutrition.set_index('name', inplace=True)

In [36]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [37]:
# verify_integrity is used to maintain uniqueness of index
nutrition.set_index('calories', verify_integrity=True)

ValueError: Index has duplicate keys: Int64Index([ 25, 316, 424, 282, 322, 103, 147, 108,  28,  16,
            ...
            580, 524, 596, 547, 639, 583, 586, 880, 548, 714],
           dtype='int64', name='calories', length=600)

## Extracting data from DataFrame

In [38]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [39]:
# Extraction by labels - loc[]

In [40]:
nutrition.loc['Eggplant, raw']

serving_size       100 g
calories              25
total_fat           0.2g
saturated_fat        NaN
cholesterol            0
                  ...   
alcohol            0.0 g
ash               0.66 g
caffeine         0.00 mg
theobromine      0.00 mg
water            92.30 g
Name: Eggplant, raw, Length: 75, dtype: object

In [41]:
nutrition['rank'] = nutrition['calories'].rank(ascending=True)
nutrition.sort_values(by = 'rank')

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Beverages, POLAND SPRING, bottled, water",100 g,0,0g,,0,1.00 mg,0,0.00 mcg,0.00 mcg,0.000 mg,...,0.000 g,0.000 g,0.000 g,0.00 mg,0,0.00 g,0,0,100.00 g,20.0
"Beverages, EVIAN, non-carbonated, bottled, water",100 g,0,0g,,0,0,0,0,0,0,...,0,0,0,0,0,0.05 g,0,0,99.97 g,20.0
"Beverages, cola, ZEVIA",100 g,0,0g,,0,6.00 mg,0.0 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.000 g,0.000 g,0.000 g,0.00 mg,0.0 g,0.00 g,13.00 mg,0.00 mg,98.36 g,20.0
"Beverages, unsweetened, ready to drink, green, tea",100 g,0,0g,,0,7.00 mg,0.0 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.000 g,0.000 g,0.000 g,0.00 mg,0.0 g,0.12 g,12.00 mg,0.00 mg,99.88 g,20.0
"Beverages,,Gerolsteiner naturally sparkling mineral water, GEROLSTEINER BRUNNEN GmbH & Co. KG",100 g,0,0g,,0,13.00 mg,0.0 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.000 g,0.000 g,0.000 g,0.00 mg,0.0 g,0.05 g,0.00 mg,0.00 mg,99.95 g,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Fat, beef tallow",100 g,902,100g,50g,109mg,0.00 mg,79.8 mg,0.00 mcg,0.00 mcg,0.000 mg,...,49.800 g,41.800 g,4.000 g,109.00 mg,0.0 g,0.00 g,0.00 mg,0.00 mg,0.00 g,8785.0
"Fish oil, sardine",100 g,902,100g,30g,710mg,0.00 mg,0,0.00 mcg,0.00 mcg,0.000 mg,...,29.892 g,33.841 g,31.867 g,710.00 mg,0,0.00 g,0,0,0.00 g,8785.0
"Fish oil, menhaden",100 g,902,100g,30g,521mg,0.00 mg,0,0.00 mcg,0.00 mcg,0.000 mg,...,30.427 g,26.694 g,34.197 g,521.00 mg,0,0.00 g,0,0,0.00 g,8785.0
"Fish oil, fully hydrogenated, menhaden",100 g,902,100g,96g,500mg,0.00 mg,0,0.00 mcg,0.00 mcg,0.000 mg,...,95.600 g,0.000 g,0.000 g,500.00 mg,0,0.00 g,0,0,0.00 g,8785.0


In [42]:
nutrition.loc['Eggplant, raw']['calories']

25

In [43]:
nutrition.loc['Eggplant, raw','calories']

25

In [44]:
nutrition.loc['Eggplant, raw':'Sherbet, orange', 'calories':'cholesterol']

Unnamed: 0_level_0,calories,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Eggplant, raw",25,0.2g,,0
"Teff, uncooked",367,2.4g,0.4g,0
"Sherbet, orange",144,2g,1.2g,1mg


In [45]:
nutrition.loc[
    ['Raspberries, raw'],
    ['protein', 'vitamin_b6']
]

Unnamed: 0_level_0,protein,vitamin_b6
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Raspberries, raw",1.20 g,0.055 mg


In [46]:
# Extraction by positions - iloc[]

In [47]:
nutrition.iloc[3] # 4th item

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
rank             7018.5
Name: Teff, uncooked, Length: 76, dtype: object

In [48]:
nutrition.iloc['Raspberries, raw']

TypeError: Cannot index by location index with a non-integer key

In [49]:
nutrition.iloc[3, :] # 3 indicates the 4th row, colon indicates that we want all the columns

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
rank             7018.5
Name: Teff, uncooked, Length: 76, dtype: object

In [50]:
nutrition.iloc[[4,6,9], :]

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g,3436.0
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g,889.5
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g,6077.5


In [51]:
nutrition.iloc[[4,6,9], 2:5]

Unnamed: 0_level_0,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Sherbet, orange",2g,1.2g,1mg
"Taro leaves, raw",0.7g,0.2g,0
Vegetarian fillets,18g,2.8g,0


In [52]:
# boolean masks

In [53]:
nutrition.iloc[
    [True if i%2==0 else False for i in range(8789)],
    [True if i%2==0 else False for i in range(75)]
]

IndexError: Boolean index has wrong length: 75 instead of 76

## Single value access - .at and .iat

In [54]:
nutrition.loc['Nuts, pecans','calories']

691

In [55]:
nutrition.iloc[1,1]

691

In [56]:
# .at[] and .iat[] are faster for single value access

In [57]:
nutrition.at['Nuts, pecans','calories']

691

In [58]:
nutrition.iat[1,1]

691

## More cleanup

In [60]:
nutrition.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Columns: 76 entries, serving_size to rank
dtypes: float64(1), int64(2), object(73)
memory usage: 5.4+ MB


## astype() method

In [62]:
df = pd.DataFrame({
    'age':[12 ,13, 14, 16],
    'weight':[41.1, 34.5, 83.2, 90.1],
    'height':['1.72','1.74','1.91','1.54']
})

In [63]:
df.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Columns: 3 entries, age to height
dtypes: float64(1), int64(1), object(1)
memory usage: 224.0+ bytes


In [68]:
# Change everything to float
df = df.astype(float)

In [69]:
df.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Columns: 3 entries, age to height
dtypes: float64(3)
memory usage: 224.0 bytes


In [72]:
df = df.astype({'age': int})
df

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [74]:
type(df.iloc[0,0])

numpy.int64

## replace() method + regex

In [84]:
dfm = nutrition.iloc[:6, :1]

In [85]:
dfm.replace('100 g', 100)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


In [86]:
dfm

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100 g
"Nuts, pecans",100 g
"Eggplant, raw",100 g
"Teff, uncooked",100 g
"Sherbet, orange",100 g
"Cauliflower, raw",100 g


In [87]:
# regex =. regular expression

In [89]:
dfm.replace('\sg','',regex=True).astype(int)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


## Isolating the units

In [90]:
nutrition.sample(20, axis=1).head()

Unnamed: 0_level_0,serving_size,potassium,glucose,serine,ash,vitamin_a,vitamin_a_rae,protein,proline,lucopene,arginine,theobromine,cystine,magnesium,aspartic_acid,folate,lactose,cryptoxanthin_beta,choline,thiamin
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Cornstarch,100 g,3.00 mg,0,0.012 g,0.09 g,0.00 IU,0.00 mcg,0.26 g,0.024 g,0,0.012 g,0.00 mg,0.006 g,3.00 mg,0.020 g,0.00 mcg,0,0.00 mcg,0.4 mg,0.000 mg
"Nuts, pecans",100 g,410.00 mg,0.04 g,0.474 g,1.49 g,56.00 IU,3.00 mcg,9.17 g,0.363 g,0,1.177 g,0.00 mg,0.152 g,121.00 mg,0.929 g,22.00 mcg,0.00 g,9.00 mcg,40.5 mg,0.660 mg
"Eggplant, raw",100 g,229.00 mg,1.58 g,0.042 g,0.66 g,23.00 IU,1.00 mcg,0.98 g,0.043 g,0,0.057 g,0.00 mg,0.006 g,14.00 mg,0.164 g,22.00 mcg,0,0.00 mcg,6.9 mg,0.039 mg
"Teff, uncooked",100 g,427.00 mg,0.73 g,0.622 g,2.37 g,9.00 IU,0.00 mcg,13.30 g,0.664 g,0,0.517 g,0,0.236 g,184.00 mg,0.820 g,0,0.00 g,0.00 mcg,13.1 mg,0.390 mg
"Sherbet, orange",100 g,96.00 mg,0,0,0.40 g,46.00 IU,12.00 mcg,1.10 g,0,0,0,0.00 mg,0,8.00 mg,0,4.00 mcg,0,5.00 mcg,7.7 mg,0.027 mg


In [93]:
units = nutrition.astype(str).replace('[^a-zA-Z]','',regex=True)

In [94]:
units.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,g,,g,,,mg,mg,mcg,mcg,mg,...,g,g,g,mg,g,g,mg,mg,g,
"Nuts, pecans",g,,g,g,,mg,mg,mcg,mcg,mg,...,g,g,g,mg,g,g,mg,mg,g,
"Eggplant, raw",g,,g,,,mg,mg,mcg,mcg,mg,...,g,g,g,mg,g,g,mg,mg,g,
"Teff, uncooked",g,,g,g,,mg,mg,,,mg,...,g,g,g,,,g,,,g,
"Sherbet, orange",g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,mg,g,g,mg,mg,g,


In [100]:
units.saturated_fat.mode()

0    g
Name: saturated_fat, dtype: object

In [101]:
units.mode()

Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water,rank
0,g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,mg,g,g,mg,mg,g,


In [102]:
units = units.mode()

## rename() method

In [103]:
df

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [104]:
df.rename(index={0:'Pikachu'})

Unnamed: 0,age,weight,height
Pikachu,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [105]:
df.rename(index={0:'Pikachu', 1:'Abhishek'})

Unnamed: 0,age,weight,height
Pikachu,12,41.1,1.72
Abhishek,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [107]:
df.rename(columns={'weight':'Weight (kg)'})

Unnamed: 0,age,Weight (kg),height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


## dropna() method

In [108]:
# exludes all null values from the given series

In [109]:
df

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,83.2,1.91
3,16,90.1,1.54


In [112]:
df.loc[2, 'weight'] = np.nan

In [113]:
df

Unnamed: 0,age,weight,height
0,12,41.1,1.72
1,13,34.5,1.74
2,14,,1.91
3,16,90.1,1.54


In [114]:
df.loc[1, :] = np.nan

In [115]:
df

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,,,
2,14.0,,1.91
3,16.0,90.1,1.54


In [116]:
df.dropna()

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [117]:
df.dropna(axis=1) #Drops the columns if even a single NaN present

0
1
2
3


In [119]:
df.dropna(axis=0) #Drops the rows if even a single NaN present

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [120]:
df.dropna(how='any', axis=0) #how parameter specifies the condition udner which the method applies. Default is any.

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [123]:
df.dropna(how='all', axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
2,14.0,,1.91
3,16.0,90.1,1.54


In [124]:
df.dropna(how='all', axis=1)

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
1,,,
2,14.0,,1.91
3,16.0,90.1,1.54


In [125]:
# thresh parameter

In [126]:
df.dropna(thresh=3, axis=0) # Keep rows having atleast three NaN values

Unnamed: 0,age,weight,height
0,12.0,41.1,1.72
3,16.0,90.1,1.54


In [128]:
df.dropna(thresh=3, axis=1) # Keep columns having atleast three NaN values

Unnamed: 0,age,height
0,12.0,1.72
1,,
2,14.0,1.91
3,16.0,1.54


## Merging units with column names

In [131]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g,7266.5
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g,8640.0
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g,432.5
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g,7018.5
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g,3436.0


In [132]:
units

Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water,rank
0,g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,mg,g,g,mg,mg,g,


In [137]:
for k in units:
    print(units[k].at[0])

g

g
g
mg
mg
mg
mcg
mcg
mg
mg
mg
mg
IU
mcg
mcg
mcg
mcg
mcg

mcg
mg
mg
IU
mg
mg
mcg
mg
mg
mg
mg
mg
mg
mg
mcg
mg
g
g
g
g
g
g
g
g

g
g
g
g
g
g
g
g
g
g
g
g
g
g






g
g
g
g
mg
g
g
mg
mg
g



In [139]:
units = units.replace('', np.nan).dropna(axis=1)

In [140]:
for k in units:
    print(units[k].at[0])

g
g
g
mg
mg
mg
mcg
mcg
mg
mg
mg
mg
IU
mcg
mcg
mcg
mcg
mcg
mcg
mg
mg
IU
mg
mg
mcg
mg
mg
mg
mg
mg
mg
mg
mcg
mg
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
mg
g
g
mg
mg
g


In [141]:
# {
#     'serving_size': 'serving_size_g',
#     'total_fat': 'total_fat_g'
# }

In [146]:
mapper = {k: k + '_' + units[k].at[0] for k in units}

In [147]:
mapper

{'serving_size': 'serving_size_g',
 'total_fat': 'total_fat_g',
 'saturated_fat': 'saturated_fat_g',
 'cholesterol': 'cholesterol_mg',
 'sodium': 'sodium_mg',
 'choline': 'choline_mg',
 'folate': 'folate_mcg',
 'folic_acid': 'folic_acid_mcg',
 'niacin': 'niacin_mg',
 'pantothenic_acid': 'pantothenic_acid_mg',
 'riboflavin': 'riboflavin_mg',
 'thiamin': 'thiamin_mg',
 'vitamin_a': 'vitamin_a_IU',
 'vitamin_a_rae': 'vitamin_a_rae_mcg',
 'carotene_alpha': 'carotene_alpha_mcg',
 'carotene_beta': 'carotene_beta_mcg',
 'cryptoxanthin_beta': 'cryptoxanthin_beta_mcg',
 'lutein_zeaxanthin': 'lutein_zeaxanthin_mcg',
 'vitamin_b12': 'vitamin_b12_mcg',
 'vitamin_b6': 'vitamin_b6_mg',
 'vitamin_c': 'vitamin_c_mg',
 'vitamin_d': 'vitamin_d_IU',
 'vitamin_e': 'vitamin_e_mg',
 'tocopherol_alpha': 'tocopherol_alpha_mg',
 'vitamin_k': 'vitamin_k_mcg',
 'calcium': 'calcium_mg',
 'copper': 'copper_mg',
 'irom': 'irom_mg',
 'magnesium': 'magnesium_mg',
 'manganese': 'manganese_mg',
 'phosphorous': 'phospho

In [149]:
nutrition.rename(columns=mapper, inplace = True)

## Removing units from values

In [150]:
nutrition.replace('[a-zA-Z]','',regex=True, inplace=True)

In [151]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100,381,0.1,,0,9.0,0.4,0.0,0.0,0.0,...,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32,7266.5
"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,1.167,...,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52,8640.0
"Eggplant, raw",100,25,0.2,,0,2.0,6.9,22.0,0.0,0.649,...,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3,432.5
"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,3.363,...,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82,7018.5
"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,0.063,...,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1,3436.0


In [154]:
nutrition.dtypes

serving_size_g      object
calories             int64
total_fat_g         object
saturated_fat_g     object
cholesterol_mg      object
                    ...   
ash_g               object
caffeine_mg         object
theobromine_mg      object
water_g             object
rank               float64
Length: 76, dtype: object

In [155]:
nutrition = nutrition.astype(float)

In [156]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32,7266.5
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52,8640.0
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3,432.5
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82,7018.5
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1,3436.0


In [160]:
nutrition.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Columns: 76 entries, serving_size_g to rank
dtypes: float64(76)
memory usage: 5.4+ MB


## Filtering in 2D

In [161]:
nutrition.shape

(8789, 76)

In [165]:
nutrition.filter(like='Octopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0,1341.0


In [166]:
# filtering with regex

In [169]:
nutrition.filter(regex='octopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25,1995.5
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5,3886.5


In [170]:
nutrition.filter(regex='[oO]ctopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0,1341.0
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25,1995.5
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5,3886.5


In [173]:
# (?i) is for case-insensitive
nutrition.filter(regex='(?i)octopus', axis=0)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0,1341.0
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25,1995.5
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5,3886.5


In [178]:
nutrition.filter(regex='(?i)octopus', axis=0).iloc[:,1:3]

Unnamed: 0_level_0,calories,total_fat_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Octopus (Alaska Native),56.0,0.8
"Mollusks, raw, common, octopus",82.0,1.0
"Mollusks, moist heat, cooked, common, octopus",164.0,2.1


## DataFrame sorting

In [179]:
nutrition.head()

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.0,...,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32,7266.5
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52,8640.0
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3,432.5
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82,7018.5
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1,3436.0


In [183]:
nutrition.calories.sort_values(ascending=False)

name
Fat, mutton tallow                                                                                      902.0
Fish oil, salmon                                                                                        902.0
Lard                                                                                                    902.0
Fat, beef tallow                                                                                        902.0
Fish oil, cod liver                                                                                     902.0
                                                                                                        ...  
Beverages, decaffeinated, brewed, green, tea                                                              0.0
Beverages, caffeine free, cola, ZEVIA                                                                     0.0
Carbonated beverage, without caffeine, with sodium saccharin, other than cola or pepper, low calorie      0.0
Bever

In [184]:
nutrition.sort_values(by = ['calories'], ascending=False)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Fat, mutton tallow",100.0,902.0,100.0,47.0,102.0,0.0,79.8,0.0,0.0,0.00,...,47.300,40.600,7.800,102.0,0.0,0.00,0.0,0.0,0.00,8785.0
"Fish oil, salmon",100.0,902.0,100.0,20.0,485.0,0.0,0.0,0.0,0.0,0.00,...,19.872,29.037,40.324,485.0,0.0,0.00,0.0,0.0,0.00,8785.0
Lard,100.0,902.0,100.0,39.0,95.0,0.0,49.7,0.0,0.0,0.00,...,39.200,45.100,11.200,95.0,0.0,0.00,0.0,0.0,0.00,8785.0
"Fat, beef tallow",100.0,902.0,100.0,50.0,109.0,0.0,79.8,0.0,0.0,0.00,...,49.800,41.800,4.000,109.0,0.0,0.00,0.0,0.0,0.00,8785.0
"Fish oil, cod liver",100.0,902.0,100.0,23.0,570.0,0.0,0.0,0.0,0.0,0.00,...,22.608,46.711,22.541,570.0,0.0,0.00,0.0,0.0,0.00,8785.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, decaffeinated, brewed, green, tea",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.03,...,0.000,0.000,0.000,0.0,0.0,0.00,0.0,0.0,99.93,20.0
"Beverages, caffeine free, cola, ZEVIA",100.0,0.0,0.0,,0.0,6.0,0.0,0.0,0.0,0.00,...,0.000,0.000,0.000,0.0,0.0,0.01,0.0,0.0,98.87,20.0
"Carbonated beverage, without caffeine, with sodium saccharin, other than cola or pepper, low calorie",100.0,0.0,0.0,,0.0,16.0,0.0,0.0,0.0,0.00,...,0.000,0.000,0.000,0.0,0.0,0.10,0.0,0.0,99.80,20.0
"Beverages, unsweetened, ready to drink, green, tea",100.0,0.0,0.0,,0.0,7.0,0.0,0.0,0.0,0.00,...,0.000,0.000,0.000,0.0,0.0,0.12,12.0,0.0,99.88,20.0


In [186]:
nutrition.sort_values(by = ['cholesterol_mg','sodium_mg'], ascending=[False, True])

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Beef, simmered, cooked, brain, variety meats and by-products",100.0,151.0,11.0,2.4,3100.0,108.0,490.9,5.0,0.0,3.620,...,2.394,1.882,1.632,3100.0,0.0,1.46,0.0,0.0,74.86,3595.5
"Veal, braised, cooked, brain, variety meats and by-products",100.0,136.0,9.6,2.2,3100.0,156.0,0.0,3.0,0.0,2.430,...,2.180,1.740,1.490,3100.0,0.0,1.40,0.0,0.0,76.89,3255.5
"Beef, raw, brain, variety meats and by-products",100.0,143.0,10.0,2.3,3010.0,126.0,0.0,3.0,0.0,3.550,...,2.300,1.890,1.586,3010.0,0.0,1.51,0.0,0.0,76.29,3407.0
"Lamb, soaked and fried, cooked, brains, imported, New Zealand",100.0,154.0,11.0,1.4,2559.0,101.0,0.0,0.0,0.0,2.995,...,1.365,4.168,0.999,2559.0,0.0,3.39,0.0,0.0,73.11,3651.0
"Pork, braised, cooked, brain, variety meats and by-products, fresh",100.0,138.0,9.5,2.2,2552.0,91.0,0.0,4.0,0.0,3.330,...,2.150,1.720,1.470,2552.0,0.0,1.40,0.0,0.0,75.88,3294.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Leavening agents, sodium aluminum sulfate, double-acting, baking powder",100.0,53.0,0.0,,0.0,10600.0,0.0,0.0,0.0,0.000,...,0.000,0.000,0.000,0.0,0.0,67.30,0.0,0.0,5.00,1263.0
"Seasoning mix, coriander & annatto, sazon, dry",100.0,0.0,0.0,,0.0,17000.0,0.0,0.0,0.0,0.000,...,0.000,0.000,0.000,0.0,0.0,99.80,0.0,0.0,0.20,20.0
"Desserts, unsweetened, tablets, rennin",100.0,84.0,0.1,,0.0,26050.0,0.0,0.0,0.0,0.000,...,0.041,0.038,0.007,0.0,0.0,72.50,0.0,0.0,6.50,2048.0
"Leavening agents, baking soda",100.0,0.0,0.0,,0.0,27360.0,0.0,0.0,0.0,0.000,...,0.000,0.000,0.000,0.0,0.0,36.90,0.0,0.0,0.20,20.0


In [189]:
nutrition.loc['Veal, braised, cooked, brain, variety meats and by-products'].filter(like='_g').sort_values(ascending=False)

serving_size_g                   100.000
water_g                           76.890
protein_g                         11.480
fat_g                              9.630
total_fat_g                        9.600
saturated_fat_g                    2.200
saturated_fatty_acids_g            2.180
monounsaturated_fatty_acids_g      1.740
polyunsaturated_fatty_acids_g      1.490
ash_g                              1.400
glutamic_acid_g                    1.373
aspartic_acid_g                    0.974
leucine_g                          0.886
lysine_g                           0.711
arginine_g                         0.629
phenylalanine_g                    0.604
alanine_g                          0.591
serine_g                           0.589
threonine_g                        0.568
valine_g                           0.546
glycine_g                          0.504
proline_g                          0.474
isoleucine_g                       0.467
tyrosine_g                         0.445
histidine_g     

## between() method

In [192]:
# Both inclusive values
nutrition.calories.between(20, 60)

name
Cornstarch                                                                                            False
Nuts, pecans                                                                                          False
Eggplant, raw                                                                                          True
Teff, uncooked                                                                                        False
Sherbet, orange                                                                                       False
                                                                                                      ...  
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    False
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    False
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    False
Beef, raw, all grades, 

In [194]:
nutrition[nutrition.calories.between(20, 60)].sample(4)

Unnamed: 0_level_0,serving_size_g,calories,total_fat_g,saturated_fat_g,cholesterol_mg,sodium_mg,choline_mg,folate_mcg,folic_acid_mcg,niacin_mg,...,saturated_fatty_acids_g,monounsaturated_fatty_acids_g,polyunsaturated_fatty_acids_g,fatty_acids_total_trans_mg,alcohol_g,ash_g,caffeine_mg,theobromine_mg,water_g,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Apples, without skin, raw",100.0,48.0,0.1,,0.0,0.0,3.4,0.0,0.0,0.091,...,0.021,0.005,0.037,0.0,0.0,0.17,0.0,0.0,86.67,1091.0
"Fruit salad, solids and liquids, light syrup, canned, (peach and pear and apricot and pineapple and cherry)",100.0,58.0,0.1,,0.0,6.0,0.0,3.0,0.0,0.365,...,0.009,0.012,0.028,0.0,0.0,0.21,0.0,0.0,84.24,1400.5
CAMPBELL'S Homestyle Italian-Style Wedding Soup,100.0,49.0,1.4,0.6,6.0,322.0,0.0,0.0,0.0,0.0,...,0.612,0.0,0.0,6.0,0.0,0.6,0.0,0.0,89.2,1125.5
"Babyfood, strained, green beans and turkey",100.0,51.0,1.5,0.5,11.0,11.0,0.0,29.0,0.0,1.14,...,0.5,0.27,0.42,11.0,0.0,1.55,0.0,0.0,87.5,1199.5


## min, max, idxmin, idxmax

In [196]:
nutrition.min(axis=0)

serving_size_g     100.0
calories             0.0
total_fat_g          0.0
saturated_fat_g      0.1
cholesterol_mg       0.0
                   ...  
ash_g                0.0
caffeine_mg          0.0
theobromine_mg       0.0
water_g              0.0
rank                20.0
Length: 76, dtype: float64

In [197]:
# What food has max pottasium

In [198]:
nutrition.potassium_mg.max()

16500.0

In [199]:
nutrition.potassium_mg.idxmax()

'Leavening agents, cream of tartar'

In [200]:
nutrition.potassium_mg.sort_values(ascending=False)

name
Leavening agents, cream of tartar                              16500.0
Leavening agents, low-sodium, baking powder                    10100.0
Parsley, freeze-dried                                           6300.0
Beverages, unsweetened, decaffeinated, instant, tea             6040.0
Beverages, powder, unsweetened, instant, tea                    6040.0
                                                                ...   
CAMPBELL'S CHUNKY Soups, Beef with White and Wild Rice Soup        0.0
Alcoholic beverage, Pinot Gris (Grigio), white, table, wine        0.0
Cloudberries, raw (Alaska Native)                                  0.0
Oil, all purpose, soy ( partially hydrogenated), industrial        0.0
Oil, sunflower, mid-oleic, industrial                              0.0
Name: potassium_mg, Length: 8789, dtype: float64

In [201]:
# targeting: potassiuam to sodiuam == 16

In [202]:
K_to_Na = ((nutrition.potassium_mg.replace(0,1))/(nutrition.sodium_mg.replace(0,1))).sort_values(ascending=False)

In [203]:
K_to_Na.head(10)

name
Peanut flour, low fat                                           1358.0
Nuts, raw, pistachio nuts                                       1025.0
Beverages, reduced calorie, with whitener, instant, coffee       909.0
Soybeans, raw, mature seeds                                      898.5
Soy meal, raw, defatted                                          830.0
Babyfood, dry, with bananas, rice, cereal                        769.0
Nuts, without salt added, dry roasted, hazelnuts or filberts     755.0
Soy protein concentrate, produced by alcohol extraction          734.0
Nuts, almonds                                                    733.0
Nuts, full fat, acorn flour                                      712.0
dtype: float64

In [205]:
K_to_Na[K_to_Na.between(14,18)].sample(10)

name
Finger snacks, apple and cinnamon, GERBER GRADUATE PUFFS                                                              15.076923
Tofu, prepared with calcium sulfate, firm, raw                                                                        16.928571
Turnip greens, unprepared, frozen                                                                                     15.333333
Salsify, without salt, drained, boiled, cooked                                                                        17.687500
Fruit cocktail, solids and liquids, extra heavy syrup, canned, (peach and pineapple and pear and grape and cherry)    14.333333
Fish, dry heat, cooked, spot                                                                                          17.189189
Alcoholic beverages, higher alcohol, beer                                                                             15.500000
Babyfood, strained, green beans and turkey                                                         