In [1]:
import pandas as pd


In [2]:
df = pd.read_csv('earthquakes.csv')

In [3]:
# Grab an entire column using attribute notation:
df.mag

0       4.90
1       0.68
2       1.90
3       2.15
4       2.00
        ... 
6741    3.00
6742    0.54
6743    0.28
6744    1.07
6745    0.08
Name: mag, Length: 6746, dtype: float64

In [4]:
# Grab an entire column using dictionary syntax:
df['mag']

0       4.90
1       0.68
2       1.90
3       2.15
4       2.00
        ... 
6741    3.00
6742    0.54
6743    0.28
6744    1.07
6745    0.08
Name: mag, Length: 6746, dtype: float64

In [5]:
# Selecting multiple columns:
df[['mag', 'title']]

Unnamed: 0,mag,title
0,4.90,"M 4.9 - 191 km ESE of Ust’-Kamchatsk Staryy, R..."
1,0.68,"M 0.7 - 7 km WNW of Cobb, CA"
2,1.90,"M 1.9 - 20 km W of Point MacKenzie, Alaska"
3,2.15,"M 2.2 - 4 km W of Magalia, CA"
4,2.00,"M 2.0 - 33 km WNW of Petersville, Alaska"
...,...,...
6741,3.00,"M 3.0 - 26 km NNE of Chase, Alaska"
6742,0.54,"M 0.5 - 9 km ENE of San Martin, CA"
6743,0.28,"M 0.3 - 8 km NNW of The Geysers, CA"
6744,1.07,"M 1.1 - 20 km E of Little Lake, CA"


In [6]:
# Selecting columns using list comprehensions and string operations offered by pandas Series objects:
df[
    ['title', 'time']
    + [col for col in df.columns if col.startswith('mag')]
]

Unnamed: 0,title,time,mag,magType
0,"M 4.9 - 191 km ESE of Ust’-Kamchatsk Staryy, R...",1731369384328,4.90,mb
1,"M 0.7 - 7 km WNW of Cobb, CA",1731367157080,0.68,md
2,"M 1.9 - 20 km W of Point MacKenzie, Alaska",1731366482588,1.90,ml
3,"M 2.2 - 4 km W of Magalia, CA",1731366324690,2.15,md
4,"M 2.0 - 33 km WNW of Petersville, Alaska",1731365892605,2.00,ml
...,...,...,...,...
6741,"M 3.0 - 26 km NNE of Chase, Alaska",1729126041870,3.00,ml
6742,"M 0.5 - 9 km ENE of San Martin, CA",1729125111440,0.54,md
6743,"M 0.3 - 8 km NNW of The Geysers, CA",1729124975210,0.28,md
6744,"M 1.1 - 20 km E of Little Lake, CA",1729124731770,1.07,ml


In [7]:
# Breaking down this example:
# 1- the list comprehension:
[col for col in df.columns if col.startswith('mag')]

['mag', 'magType']

In [8]:
# 2- assembling the list:
['title', 'time'] \
+ [col for col in df.columns if col.startswith('mag')]

['title', 'time', 'mag', 'magType']

In [9]:
# 3-using this list as the list of columns:
df[
    ['title', 'time']
    + [col for col in df.columns if col.startswith('mag')]
]

Unnamed: 0,title,time,mag,magType
0,"M 4.9 - 191 km ESE of Ust’-Kamchatsk Staryy, R...",1731369384328,4.90,mb
1,"M 0.7 - 7 km WNW of Cobb, CA",1731367157080,0.68,md
2,"M 1.9 - 20 km W of Point MacKenzie, Alaska",1731366482588,1.90,ml
3,"M 2.2 - 4 km W of Magalia, CA",1731366324690,2.15,md
4,"M 2.0 - 33 km WNW of Petersville, Alaska",1731365892605,2.00,ml
...,...,...,...,...
6741,"M 3.0 - 26 km NNE of Chase, Alaska",1729126041870,3.00,ml
6742,"M 0.5 - 9 km ENE of San Martin, CA",1729125111440,0.54,md
6743,"M 0.3 - 8 km NNW of The Geysers, CA",1729124975210,0.28,md
6744,"M 1.1 - 20 km E of Little Lake, CA",1729124731770,1.07,ml


In [10]:
# Using row numbers (inclusive of first index, exclusive of last):
df[100:103]

Unnamed: 0,mag,place,time,updated,tz,url,detail,felt,cdi,mmi,...,ids,sources,types,nst,dmin,rms,gap,magType,type,title
100,2.7,"55 km S of Whites City, New Mexico",1731322030800,1731336673174,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,1.0,3.1,,...,",tx2024weiy,us7000nr53,",",tx,us,",",dyfi,origin,phase-data,",33.0,0.1,0.1,68.0,ml,earthquake,"M 2.7 - 55 km S of Whites City, New Mexico"
101,0.7,"22 km ESE of Anza, CA",1731321593400,1731339106987,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,",ci40791567,",",ci,",",nearby-cities,origin,phase-data,scitech-link,",43.0,0.09866,0.17,81.0,ml,earthquake,"M 0.7 - 22 km ESE of Anza, CA"
102,1.36,"3 km SSE of Muscoy, CA",1731321042860,1731347588310,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,",ci40791559,",",ci,",",focal-mechanism,nearby-cities,origin,phase-da...",95.0,0.02822,0.17,27.0,ml,earthquake,"M 1.4 - 3 km SSE of Muscoy, CA"


In [11]:
# Selecting rows and columns with chaining:
df[['title', 'time']][100:153]

Unnamed: 0,title,time
100,"M 2.7 - 55 km S of Whites City, New Mexico",1731322030800
101,"M 0.7 - 22 km ESE of Anza, CA",1731321593400
102,"M 1.4 - 3 km SSE of Muscoy, CA",1731321042860
103,"M 1.7 - 101 km WNW of Skwentna, Alaska",1731320673300
104,"M 1.3 - 30 km W of Ester, Alaska",1731320229522
105,"M 5.6 - Maug Islands region, Northern Mariana ...",1731320218633
106,"M 4.9 - 111 km NW of Tual, Indonesia",1731319701798
107,"M 0.9 - 11 km ESE of Indian Springs, Nevada",1731319539733
108,"M 0.4 - 11 km SE of Anza, CA",1731318947920
109,"M 1.8 - 9 km SW of Morgan Hill, CA",1731318940210


In [12]:
# Order doesn't matter here:
df[100:103][['title', 'time']].equals(
    df[['title', 'time']][100:103]
)

True

In [13]:
df[100:113]['title']

100           M 2.7 - 55 km S of Whites City, New Mexico
101                        M 0.7 - 22 km ESE of Anza, CA
102                       M 1.4 - 3 km SSE of Muscoy, CA
103               M 1.7 - 101 km WNW of Skwentna, Alaska
104                     M 1.3 - 30 km W of Ester, Alaska
105    M 5.6 - Maug Islands region, Northern Mariana ...
106                 M 4.9 - 111 km NW of Tual, Indonesia
107          M 0.9 - 11 km ESE of Indian Springs, Nevada
108                         M 0.4 - 11 km SE of Anza, CA
109                   M 1.8 - 9 km SW of Morgan Hill, CA
110                     M 1.3 - 25 km E of Forsan, Texas
111              M 1.2 - 86 km SE of King Salmon, Alaska
112           M 2.3 - 55 km S of Whites City, New Mexico
Name: title, dtype: object

In [14]:
# Using loc:
df.loc[110:112, 'title'] = df.loc[110:112, 'title'].str.lower()
df.loc[110:112, 'title']

110              m 1.3 - 25 km e of forsan, texas
111       m 1.2 - 86 km se of king salmon, alaska
112    m 2.3 - 55 km s of whites city, new mexico
Name: title, dtype: object

In [15]:
# we can select all rows:
df.loc[:, 'title']

0       M 4.9 - 191 km ESE of Ust’-Kamchatsk Staryy, R...
1                            M 0.7 - 7 km WNW of Cobb, CA
2              M 1.9 - 20 km W of Point MacKenzie, Alaska
3                           M 2.2 - 4 km W of Magalia, CA
4                M 2.0 - 33 km WNW of Petersville, Alaska
                              ...                        
6741                   M 3.0 - 26 km NNE of Chase, Alaska
6742                   M 0.5 - 9 km ENE of San Martin, CA
6743                  M 0.3 - 8 km NNW of The Geysers, CA
6744                   M 1.1 - 20 km E of Little Lake, CA
6745                 M 0.1 - 77 km W of Salamatof, Alaska
Name: title, Length: 6746, dtype: object

In [16]:
df.loc[10:15, ['title', 'mag']]

Unnamed: 0,title,mag
10,"M 4.9 - 265 km WNW of Houma, Tonga",4.9
11,"M 1.6 - 22 km N of Sutton-Alpine, Alaska",1.6
12,"M 1.8 - 5 km N of Fontana, CA",1.78
13,"M 2.0 - 11 km SSE of Fern Forest, Hawaii",2.02
14,"M 2.1 - 56 km SE of Pope-Vannoy Landing, Alaska",2.1
15,"M 1.6 - 70 km ENE of Chase, Alaska",1.6


In [17]:
# Integer Location Indexing with iloc
# Exclusive of the endpoint just as Python slicing of lists:
df.iloc[10:15, [19,8]]

Unnamed: 0,nst,cdi
10,103.0,
11,,
12,84.0,
13,50.0,
14,,


In [18]:
# We can use slicing syntax with iloc for both rows and columns:
df.iloc[10:15, 8:10]

Unnamed: 0,cdi,mmi
10,,
11,,
12,,
13,,
14,,


In [19]:
df.iloc[10:15, 8:10].equals(df.loc[10:14, 'mag':'magType'])

False

In [20]:
# we can use  take() method, which allows us to select specific row numbers (essentially iloc without having to specify the columns):
df.loc[10:14, 'mag':'magType'].take([1, 3])

Unnamed: 0,mag,place,time,updated,tz,url,detail,felt,cdi,mmi,...,net,code,ids,sources,types,nst,dmin,rms,gap,magType
11,1.6,"22 km N of Sutton-Alpine, Alaska",1731362578073,1731362792104,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,ak,024eiyxr9r,",ak024eiyxr9r,",",ak,",",origin,phase-data,",,,0.55,,ml
13,2.02,"11 km SSE of Fern Forest, Hawaii",1731361832310,1731362010240,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,hv,74528247,",hv74528247,",",hv,",",origin,phase-data,",50.0,0.02308,0.37,148.0,ml


In [21]:
# to look up scaler values, we use the faster at[] and iat[]:
df.at[10, 'mag']

4.9

In [22]:
df.iat[10, 8]

nan

In [23]:
# We can filter our dataframes using a Boolean mask, which can be made as follows:
df.mag > 2

0        True
1       False
2       False
3        True
4       False
        ...  
6741     True
6742    False
6743    False
6744    False
6745    False
Name: mag, Length: 6746, dtype: bool

In [24]:
# To use a mask for selection, we simply place it inside the brackets:
df[df.mag >= 7]

Unnamed: 0,mag,place,time,updated,tz,url,detail,felt,cdi,mmi,...,ids,sources,types,nst,dmin,rms,gap,magType,type,title


In [25]:

df.loc[
    df.mag >= 7.0,
    ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']
]

Unnamed: 0,alert,mag,magType,title,tsunami,type


In [26]:
# usage & :
df.loc[
    (df.tsunami == 1) & (df.alert == 'red'),
    ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']
]

Unnamed: 0,alert,mag,magType,title,tsunami,type


In [27]:
# usage OR:
df.loc[
    (df.tsunami == 1) | (df.alert == 'red'),
    ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']
]

Unnamed: 0,alert,mag,magType,title,tsunami,type
228,yellow,6.8,mww,"M 6.8 - 40 km SSW of Bartolomé Masó, Cuba",1,earthquake
1812,green,5.8,mww,"M 5.8 - 81 km ESE of Adak, Alaska",1,earthquake
2813,green,6.0,mww,"M 6.0 - 279 km W of Bandon, Oregon",1,earthquake
4688,green,4.36,mw,"M 4.4 - 5 km SSW of Petrolia, CA",1,earthquake


In [28]:
# we can select all earthquakes with the string Alaska in the place column with a non-null value for the alert column. To get non-nulls, 
# we can use the isnull() method with the pandas negation operator (~) or the notnull() method:
df.loc[
    (df.place.str.contains('Alaska')) & (df.alert.notnull()),
    ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']
]

Unnamed: 0,alert,mag,magType,title,tsunami,type
1812,green,5.8,mww,"M 5.8 - 81 km ESE of Adak, Alaska",1,earthquake


In [29]:
# we can say regular:
df.loc[
    (df.place.str.contains(r'CA|California$')) & (df.mag > 3.8),
    ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']
]

Unnamed: 0,alert,mag,magType,title,tsunami,type
4058,,3.92,mw,"M 3.9 - 127 km WNW of Petrolia, CA",0,earthquake
4061,green,4.15,mw,"M 4.2 - 118 km WNW of Petrolia, CA",0,earthquake
4272,green,4.54,ml,"M 4.5 - 10 km SE of Stovepipe Wells, CA",0,earthquake
4348,green,4.72,mw,"M 4.7 - 5 km E of Stovepipe Wells, CA",0,earthquake
4366,green,4.17,mw,"M 4.2 - 4 km E of Stovepipe Wells, CA",0,earthquake
4686,green,3.92,mw,"M 3.9 - 5 km SSW of Petrolia, CA",0,earthquake
4688,green,4.36,mw,"M 4.4 - 5 km SSW of Petrolia, CA",1,earthquake
4696,green,4.0,mw,"M 4.0 - 5 km SSW of Petrolia, CA",0,earthquake


In [30]:
# usage between:
df.loc[
    df.mag.between(6.5, 7.5),
    ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']
]

Unnamed: 0,alert,mag,magType,title,tsunami,type
228,yellow,6.8,mww,"M 6.8 - 40 km SSW of Bartolomé Masó, Cuba",1,earthquake


In [31]:
# isin() method to check for membership in a list of values:
df.loc[
    df.alert.isin(['orange', 'red']),
    ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']
]

Unnamed: 0,alert,mag,magType,title,tsunami,type


In [32]:
# We can grab the index of the minimum and maximum values of a given column and use those to select the entire row where they occur:
[df.mag.idxmin(), df.mag.idxmax()]

[5771, 228]

In [33]:
df.loc[(df.place.str.contains(r'CA | Calfornia$')) & 
(df.mag > 3.8), 
['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type


In [34]:
df.place.str.contains('alaska')

0       False
1       False
2       False
3       False
4       False
        ...  
6741    False
6742    False
6743    False
6744    False
6745    False
Name: place, Length: 6746, dtype: bool

In [35]:
# Using the filter() method, we can grab columns of a dataframe by passing a list to items:
df.filter(items=['mag', 'magType']).head()

Unnamed: 0,mag,magType
0,4.9,mb
1,0.68,md
2,1.9,ml
3,2.15,md
4,2.0,ml


In [36]:
# We can also grab all the columns that contain a string with the like parameter:
df.filter(like='mag').head()

Unnamed: 0,mag,magType
0,4.9,mb
1,0.68,md
2,1.9,ml
3,2.15,md
4,2.0,ml


In [37]:
# We can even use regular expressions; here, we select any columns that start with t:
df.filter(regex=r'^t').head()

Unnamed: 0,time,tz,tsunami,types,type,title
0,1731369384328,,0,",internal-moment-tensor,origin,phase-data,",earthquake,"M 4.9 - 191 km ESE of Ust’-Kamchatsk Staryy, R..."
1,1731367157080,,0,",nearby-cities,origin,phase-data,scitech-link,",earthquake,"M 0.7 - 7 km WNW of Cobb, CA"
2,1731366482588,,0,",origin,phase-data,",earthquake,"M 1.9 - 20 km W of Point MacKenzie, Alaska"
3,1731366324690,,0,",nearby-cities,origin,phase-data,scitech-link,",earthquake,"M 2.2 - 4 km W of Magalia, CA"
4,1731365892605,,0,",origin,phase-data,",earthquake,"M 2.0 - 33 km WNW of Petersville, Alaska"


In [38]:
# we  use filter() along the rows, by passing in axis=0. Here we will use the place column as the index:
df.set_index('place').filter(like='Japan', axis=0).filter(items=['mag', 'magType', 'title']).head()

Unnamed: 0_level_0,mag,magType,title
place,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Izu Islands, Japan region",4.9,mb,"M 4.9 - Izu Islands, Japan region"
"Volcano Islands, Japan region",4.7,mb,"M 4.7 - Volcano Islands, Japan region"
"Volcano Islands, Japan region",4.9,mb,"M 4.9 - Volcano Islands, Japan region"
"47 km SSE of Kushima, Japan",4.5,mb,"M 4.5 - 47 km SSE of Kushima, Japan"
"Volcano Islands, Japan region",4.9,mb,"M 4.9 - Volcano Islands, Japan region"


In [39]:
# we use Series objects and will run on the index:
df.set_index('place').title.filter(like='Japan').head()

place
Izu Islands, Japan region            M 4.9 - Izu Islands, Japan region
Volcano Islands, Japan region    M 4.7 - Volcano Islands, Japan region
Volcano Islands, Japan region    M 4.9 - Volcano Islands, Japan region
47 km SSE of Kushima, Japan        M 4.5 - 47 km SSE of Kushima, Japan
Volcano Islands, Japan region    M 4.9 - Volcano Islands, Japan region
Name: title, dtype: object