In [1]:
# 1.Import the NumPy and pandas libraries using the following command in the Jupyter notebook:

import numpy as np
import pandas as pd

In [2]:
# 2. Read the RadNet dataset from the EPA (U.S. Environmental Protection Agency), available from
# the Socrata project from file: RadNet_Laboratory_Analysis.csv

df = pd.read_csv("RadNet_Laboratory_Analysis.csv")

In [3]:
# 3. Start by selecting a column using the ['<name of the column>'] notation. Use the State column:

df['State'].head()

0    ID
1    ID
2    AK
3    AK
4    AK
Name: State, dtype: object

In [4]:
# 4. Now filter the selected values in a column using the MN column name:

df[df.State == "MN"]

Unnamed: 0,State,Location,Date Posted,Date Collected,Sample Type,Unit,Ba-140,Co-60,Cs-134,Cs-136,Cs-137,I-131,I-132,I-133,Te-129,Te-129m,Te-132
367,MN,St. Paul,04/08/2011,03/28/2011,Drinking Water,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect
368,MN,St. Paul,04/22/2011,04/13/2011,Drinking Water,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,0.16,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect
380,MN,Welch,04/08/2011,03/29/2011,Drinking Water,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect
381,MN,Welch,06/01/2011,04/14/2011,Drinking Water,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect
555,MN,St. Paul,04/04/2011,03/22/2011,Precipitation,pCi/l,Non-detect,Non-detect,Non-detect,,Non-detect,32.3,Non-detect,Non-detect,,,Non-detect
556,MN,St. Paul,04/10/2011,03/29/2011,Precipitation,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,16,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect
557,MN,Welch,04/04/2011,03/17/2011,Precipitation,pCi/l,Non-detect,Non-detect,Non-detect,,Non-detect,Non-detect,Non-detect,Non-detect,,,Non-detect
558,MN,Welch/510,04/13/2011,04/04/2011,Precipitation,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,9.1,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect


In [5]:
# 5. Select more than one column per condition. Add the Sample Type column for filtering:

df[(df.State == 'CA') & (df['Sample Type'] == 'Drinking Water')]

Unnamed: 0,State,Location,Date Posted,Date Collected,Sample Type,Unit,Ba-140,Co-60,Cs-134,Cs-136,Cs-137,I-131,I-132,I-133,Te-129,Te-129m,Te-132
305,CA,Los Angeles,04/10/2011,04/04/2011,Drinking Water,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,0.39,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect
306,CA,Los Angeles,06/01/2011,04/12/2011,Drinking Water,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,0.18,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect
356,CA,Richmond,04/09/2011,03/29/2011,Drinking Water,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect
357,CA,Richmond,06/01/2011,04/13/2011,Drinking Water,pCi/l,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect,Non-detect


In [6]:
# 6. Next, select the MN state and the isotope I-131:

df[(df.State == "MN") ]["I-131"]

367    Non-detect
368          0.16
380    Non-detect
381    Non-detect
555          32.3
556            16
557    Non-detect
558           9.1
Name: I-131, dtype: object

In [7]:
# 7. We can do the same more easily with the .loc method, filtering by state and selecting a column
# on the same .loc call:

df_rad=df
df_rad.loc[df_rad.State == "MN", "I-131"]

367    Non-detect
368          0.16
380    Non-detect
381    Non-detect
555          32.3
556            16
557    Non-detect
558           9.1
Name: I-131, dtype: object

In [8]:
# Note
# The result of the .loc filter is a series and not a DataFrame. This depends on the operation and
# selection done on the DataFrame and not is caused only by .loc . Because the DataFrame can be
# understood as a 2D combination of series, the selection of one column will return a series. To make
# a selection and still return a DataFrame, use double brackets e.g.:


df[['I-132']].head()

Unnamed: 0,I-132
0,Non-detect
1,Non-detect
2,Non-detect
3,Non-detect
4,Non-detect
