In [1]:
import numpy as np
import pandas as pd
import datetime as dt

In [2]:
data = np.genfromtxt('example_data.csv', delimiter=';', names=True, dtype=None, encoding='UTF')
data

array([('2018-10-13 11:10:23.560', '262km NW of Ozernovskiy, Russia', 'mww', 6.7, 'green', 1),
       ('2018-10-13 04:34:15.580', '25km E of Bitung, Indonesia', 'mww', 5.2, 'green', 0),
       ('2018-10-13 00:13:46.220', '42km WNW of Sola, Vanuatu', 'mww', 5.7, 'green', 0),
       ('2018-10-12 21:09:49.240', '13km E of Nueva Concepcion, Guatemala', 'mww', 5.7, 'green', 0),
       ('2018-10-12 02:52:03.620', '128km SE of Kimbe, Papua New Guinea', 'mww', 5.6, 'green', 1)],
      dtype=[('time', '<U23'), ('place', '<U37'), ('magType', '<U3'), ('mag', '<f8'), ('alert', '<U5'), ('tsunami', '<i4')])

In [3]:
data.shape

(5,)

In [4]:
data.dtype

dtype([('time', '<U23'), ('place', '<U37'), ('magType', '<U3'), ('mag', '<f8'), ('alert', '<U5'), ('tsunami', '<i4')])

In [5]:
# making an array where the keys are the column names and the values are numpy arrays of the data
array_dict = {}

for i, col in enumerate(data.dtype.names):
  array_dict[col] = np.array([row[i] for row in data])

array_dict

{'time': array(['2018-10-13 11:10:23.560', '2018-10-13 04:34:15.580',
        '2018-10-13 00:13:46.220', '2018-10-12 21:09:49.240',
        '2018-10-12 02:52:03.620'], dtype='<U23'),
 'place': array(['262km NW of Ozernovskiy, Russia', '25km E of Bitung, Indonesia',
        '42km WNW of Sola, Vanuatu',
        '13km E of Nueva Concepcion, Guatemala',
        '128km SE of Kimbe, Papua New Guinea'], dtype='<U37'),
 'magType': array(['mww', 'mww', 'mww', 'mww', 'mww'], dtype='<U3'),
 'mag': array([6.7, 5.2, 5.7, 5.7, 5.6]),
 'alert': array(['green', 'green', 'green', 'green', 'green'], dtype='<U5'),
 'tsunami': array([1, 0, 0, 0, 1])}

In [6]:
np.array([value[array_dict['mag'].argmax()] for key, value in array_dict.items()])

array(['2018-10-13 11:10:23.560', '262km NW of Ozernovskiy, Russia',
       'mww', '6.7', 'green', '1'], dtype='<U32')

## Series

In [7]:
place = pd.Series(array_dict['place'], name='place')
place

0          262km NW of Ozernovskiy, Russia
1              25km E of Bitung, Indonesia
2                42km WNW of Sola, Vanuatu
3    13km E of Nueva Concepcion, Guatemala
4      128km SE of Kimbe, Papua New Guinea
Name: place, dtype: object

## Index

In [8]:
place_index = place.index
place_index

RangeIndex(start=0, stop=5, step=1)

In [9]:
place_index.values

array([0, 1, 2, 3, 4], dtype=int64)

## Dataframe

In [10]:
df = pd.DataFrame(array_dict)
df

Unnamed: 0,time,place,magType,mag,alert,tsunami
0,2018-10-13 11:10:23.560,"262km NW of Ozernovskiy, Russia",mww,6.7,green,1
1,2018-10-13 04:34:15.580,"25km E of Bitung, Indonesia",mww,5.2,green,0
2,2018-10-13 00:13:46.220,"42km WNW of Sola, Vanuatu",mww,5.7,green,0
3,2018-10-12 21:09:49.240,"13km E of Nueva Concepcion, Guatemala",mww,5.7,green,0
4,2018-10-12 02:52:03.620,"128km SE of Kimbe, Papua New Guinea",mww,5.6,green,1


In [11]:
df.dtypes

time        object
place       object
magType     object
mag        float64
alert       object
tsunami      int32
dtype: object

In [12]:
df + df

Unnamed: 0,time,place,magType,mag,alert,tsunami
0,2018-10-13 11:10:23.5602018-10-13 11:10:23.560,"262km NW of Ozernovskiy, Russia262km NW of Oze...",mwwmww,13.4,greengreen,2
1,2018-10-13 04:34:15.5802018-10-13 04:34:15.580,"25km E of Bitung, Indonesia25km E of Bitung, I...",mwwmww,10.4,greengreen,0
2,2018-10-13 00:13:46.2202018-10-13 00:13:46.220,"42km WNW of Sola, Vanuatu42km WNW of Sola, Van...",mwwmww,11.4,greengreen,0
3,2018-10-12 21:09:49.2402018-10-12 21:09:49.240,"13km E of Nueva Concepcion, Guatemala13km E of...",mwwmww,11.4,greengreen,0
4,2018-10-12 02:52:03.6202018-10-12 02:52:03.620,"128km SE of Kimbe, Papua New Guinea128km SE of...",mwwmww,11.2,greengreen,2


In [13]:
np.random.seed(0)
pd.DataFrame({'random': np.random.rand(5), 'text':['hot', 'warm', 'cool', 'cold', None], 'truth': [np.random.choice([True, False]) for _ in range(5)]},
              index=pd.date_range(end=dt.date(2019, 4, 21), freq='1D', periods=5, name='date'))

Unnamed: 0_level_0,random,text,truth
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-17,0.548814,hot,False
2019-04-18,0.715189,warm,True
2019-04-19,0.602763,cool,True
2019-04-20,0.544883,cold,False
2019-04-21,0.423655,,True


In [14]:
pd.DataFrame([{'mag': 5.2, 'place': 'California'},
              {'mag': 1.2, 'place': 'Alaska'},
              {'mag': 0.2, 'place': 'California'},])

Unnamed: 0,mag,place
0,5.2,California
1,1.2,Alaska
2,0.2,California


In [15]:
list_of_tuples = [(n, n ** 2, n ** 3) for n in range(5)]
list_of_tuples

[(0, 0, 0), (1, 1, 1), (2, 4, 8), (3, 9, 27), (4, 16, 64)]

In [16]:
pd.DataFrame(list_of_tuples, columns=['n', 'n_squared', 'n_cubed'])

Unnamed: 0,n,n_squared,n_cubed
0,0,0,0
1,1,1,1
2,2,4,8
3,3,9,27
4,4,16,64


In [17]:
earthquakes = pd.read_csv('earthquakes.csv')
earthquakes.head()

Unnamed: 0,alert,cdi,code,detail,dmin,felt,gap,ids,mag,magType,...,sources,status,time,title,tsunami,type,types,tz,updated,url
0,,,37389218,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.008693,,85.0,",ci37389218,",1.35,ml,...,",ci,",automatic,1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475395144,https://earthquake.usgs.gov/earthquakes/eventp...
1,,,37389202,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02003,,79.0,",ci37389202,",1.29,ml,...,",ci,",automatic,1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475253925,https://earthquake.usgs.gov/earthquakes/eventp...
2,,4.4,37389194,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02137,28.0,21.0,",ci37389194,",3.42,ml,...,",ci,",automatic,1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,earthquake,",dyfi,focal-mechanism,geoserve,nearby-cities,o...",-480.0,1539536756176,https://earthquake.usgs.gov/earthquakes/eventp...
3,,,37389186,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02618,,39.0,",ci37389186,",0.44,ml,...,",ci,",automatic,1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475196167,https://earthquake.usgs.gov/earthquakes/eventp...
4,,,73096941,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.07799,,192.0,",nc73096941,",2.16,md,...,",nc,",automatic,1539474716050,"M 2.2 - 10km NW of Avenal, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,scit...",-480.0,1539477547926,https://earthquake.usgs.gov/earthquakes/eventp...


In [18]:
import sqlite3

In [19]:
# reading csv into a database
with sqlite3.connect('quakes.db') as conn:
  pd.read_csv('tsunamis.csv').to_sql('tsunamis', conn, index=False, if_exists='replace')

In [20]:
# querying database
with sqlite3.connect('quakes.db') as conn:
  tsunamis = pd.read_sql('SELECT * FROM tsunamis', conn)

In [21]:
tsunamis.head()

Unnamed: 0,alert,type,title,place,magType,mag,time
0,,earthquake,"M 5.0 - 165km NNW of Flying Fish Cove, Christm...","165km NNW of Flying Fish Cove, Christmas Island",mww,5.0,1539459504090
1,green,earthquake,"M 6.7 - 262km NW of Ozernovskiy, Russia","262km NW of Ozernovskiy, Russia",mww,6.7,1539429023560
2,green,earthquake,"M 5.6 - 128km SE of Kimbe, Papua New Guinea","128km SE of Kimbe, Papua New Guinea",mww,5.6,1539312723620
3,green,earthquake,"M 6.5 - 148km S of Severo-Kuril'sk, Russia","148km S of Severo-Kuril'sk, Russia",mww,6.5,1539213362130
4,green,earthquake,"M 6.2 - 94km SW of Kokopo, Papua New Guinea","94km SW of Kokopo, Papua New Guinea",mww,6.2,1539208835130


## From API

In [22]:
import requests

In [23]:
# taking data from web source
yesterday = dt.date.today() - dt.timedelta(days=1)
api = 'https://earthquake.usgs.gov/fdsnws/event/1/query'
payload = {'format': 'geojson', 'starttime': yesterday - dt.timedelta(days=26), 'endtime': yesterday}
response = requests.get(api, params=payload)

In [24]:
response.status_code

200

In [25]:
# isolating JSON payload from the HTTP response stored in 'response' variable and then look at the keys to view the main sections of the resulting data
earthquake_json = response.json()
earthquake_json.keys()

dict_keys(['type', 'metadata', 'features', 'bbox'])

In [26]:
# metadata tells us some information about our request
earthquake_json['metadata']

{'generated': 1643915926000,
 'url': 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2022-01-07&endtime=2022-02-02',
 'title': 'USGS Earthquakes',
 'status': 200,
 'api': '1.13.2',
 'count': 8892}

In [27]:
type(earthquake_json['features'])

list

In [28]:
earthquake_json['features'][0]

{'type': 'Feature',
 'properties': {'mag': 1.95,
  'place': '1 km WNW of Cole, Oklahoma',
  'time': 1643759312549,
  'updated': 1643819696878,
  'tz': None,
  'url': 'https://earthquake.usgs.gov/earthquakes/eventpage/ok2022cgls',
  'detail': 'https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=ok2022cgls&format=geojson',
  'felt': 2,
  'cdi': 2.7,
  'mmi': None,
  'alert': None,
  'status': 'reviewed',
  'tsunami': 0,
  'sig': 59,
  'net': 'ok',
  'code': '2022cgls',
  'ids': ',ok2022cgls,',
  'sources': ',ok,',
  'types': ',dyfi,origin,phase-data,',
  'nst': 55,
  'dmin': 0.06298763968,
  'rms': 0.27,
  'gap': 38,
  'magType': 'ml',
  'type': 'earthquake',
  'title': 'M 2.0 - 1 km WNW of Cole, Oklahoma'},
 'geometry': {'type': 'Point',
  'coordinates': [-97.58533333, 35.10766667, 6.78]},
 'id': 'ok2022cgls'}

In [29]:
# pulling properties out of the json

data = [quake['properties'] for quake in earthquake_json['features']]

In [30]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,mag,place,time,updated,tz,url,detail,felt,cdi,mmi,...,ids,sources,types,nst,dmin,rms,gap,magType,type,title
0,1.95,"1 km WNW of Cole, Oklahoma",1643759312549,1643819696878,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,2.0,2.7,,...,",ok2022cgls,",",ok,",",dyfi,origin,phase-data,",55.0,0.062988,0.27,38.0,ml,earthquake,"M 2.0 - 1 km WNW of Cole, Oklahoma"
1,0.4,"16 km NNW of Sutcliffe, Nevada",1643758895187,1643769518163,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,",nn00832999,",",nn,",",origin,phase-data,",4.0,0.093,0.182,165.98,ml,earthquake,"M 0.4 - 16 km NNW of Sutcliffe, Nevada"
2,1.49,"7km W of Palomar Observatory, CA",1643758877630,1643764377670,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,",ci39930231,",",ci,",",focal-mechanism,nearby-cities,origin,phase-da...",49.0,0.066,0.19,30.0,ml,earthquake,"M 1.5 - 7km W of Palomar Observatory, CA"
3,1.47,Puerto Rico region,1643758273200,1643759611930,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,",pr71332143,",",pr,",",origin,phase-data,",3.0,,0.05,256.0,md,earthquake,M 1.5 - Puerto Rico region
4,2.05,"4 km S of Indios, Puerto Rico",1643758241850,1643759100610,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,",pr71332138,",",pr,",",origin,phase-data,",15.0,,0.07,170.0,md,earthquake,"M 2.1 - 4 km S of Indios, Puerto Rico"


## Inspecting Dataframe object

In [31]:
# check if there is data in dataframe
earthquakes.empty

False

In [32]:
earthquakes.shape

(9332, 26)

In [33]:
earthquakes.head()

Unnamed: 0,alert,cdi,code,detail,dmin,felt,gap,ids,mag,magType,...,sources,status,time,title,tsunami,type,types,tz,updated,url
0,,,37389218,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.008693,,85.0,",ci37389218,",1.35,ml,...,",ci,",automatic,1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475395144,https://earthquake.usgs.gov/earthquakes/eventp...
1,,,37389202,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02003,,79.0,",ci37389202,",1.29,ml,...,",ci,",automatic,1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475253925,https://earthquake.usgs.gov/earthquakes/eventp...
2,,4.4,37389194,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02137,28.0,21.0,",ci37389194,",3.42,ml,...,",ci,",automatic,1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,earthquake,",dyfi,focal-mechanism,geoserve,nearby-cities,o...",-480.0,1539536756176,https://earthquake.usgs.gov/earthquakes/eventp...
3,,,37389186,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02618,,39.0,",ci37389186,",0.44,ml,...,",ci,",automatic,1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475196167,https://earthquake.usgs.gov/earthquakes/eventp...
4,,,73096941,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.07799,,192.0,",nc73096941,",2.16,md,...,",nc,",automatic,1539474716050,"M 2.2 - 10km NW of Avenal, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,scit...",-480.0,1539477547926,https://earthquake.usgs.gov/earthquakes/eventp...


In [34]:
earthquakes.columns

Index(['alert', 'cdi', 'code', 'detail', 'dmin', 'felt', 'gap', 'ids', 'mag',
       'magType', 'mmi', 'net', 'nst', 'place', 'rms', 'sig', 'sources',
       'status', 'time', 'title', 'tsunami', 'type', 'types', 'tz', 'updated',
       'url'],
      dtype='object')

In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8892 entries, 0 to 8891
Data columns (total 26 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   mag      8891 non-null   float64
 1   place    8857 non-null   object 
 2   time     8892 non-null   int64  
 3   updated  8892 non-null   int64  
 4   tz       0 non-null      object 
 5   url      8892 non-null   object 
 6   detail   8892 non-null   object 
 7   felt     670 non-null    float64
 8   cdi      670 non-null    float64
 9   mmi      145 non-null    float64
 10  alert    58 non-null     object 
 11  status   8892 non-null   object 
 12  tsunami  8892 non-null   int64  
 13  sig      8892 non-null   int64  
 14  net      8892 non-null   object 
 15  code     8892 non-null   object 
 16  ids      8892 non-null   object 
 17  sources  8892 non-null   object 
 18  types    8892 non-null   object 
 19  nst      6680 non-null   float64
 20  dmin     6355 non-null   float64
 21  rms      8892 

## Describing and summarizing data

In [36]:
earthquakes.describe()

Unnamed: 0,cdi,dmin,felt,gap,mag,mmi,nst,rms,sig,time,tsunami,tz,updated
count,329.0,6139.0,329.0,6164.0,9331.0,93.0,5364.0,9332.0,9332.0,9332.0,9332.0,9331.0,9332.0
mean,2.754711,0.544925,12.31003,121.506588,1.497345,3.651398,19.053878,0.362122,56.899914,1538284000000.0,0.006537,-451.99014,1538537000000.0
std,1.010637,2.214305,48.954944,72.962363,1.203347,1.790523,15.492315,0.317784,91.872163,608030600.0,0.080589,231.752571,656413500.0
min,0.0,0.000648,0.0,12.0,-1.26,0.0,0.0,0.0,0.0,1537229000000.0,0.0,-720.0,1537230000000.0
25%,2.0,0.020425,1.0,66.1425,0.72,2.68,8.0,0.119675,8.0,1537793000000.0,0.0,-540.0,1537996000000.0
50%,2.7,0.05905,2.0,105.0,1.3,3.72,15.0,0.21,26.0,1538245000000.0,0.0,-480.0,1538621000000.0
75%,3.3,0.17725,5.0,159.0,1.9,4.57,25.0,0.59,56.0,1538766000000.0,0.0,-480.0,1539110000000.0
max,8.4,53.737,580.0,355.91,7.5,9.12,172.0,1.91,2015.0,1539475000000.0,1.0,720.0,1539537000000.0


In [37]:
# different percentiles
earthquakes.describe(percentiles=[0.05, 0.95])

Unnamed: 0,cdi,dmin,felt,gap,mag,mmi,nst,rms,sig,time,tsunami,tz,updated
count,329.0,6139.0,329.0,6164.0,9331.0,93.0,5364.0,9332.0,9332.0,9332.0,9332.0,9331.0,9332.0
mean,2.754711,0.544925,12.31003,121.506588,1.497345,3.651398,19.053878,0.362122,56.899914,1538284000000.0,0.006537,-451.99014,1538537000000.0
std,1.010637,2.214305,48.954944,72.962363,1.203347,1.790523,15.492315,0.317784,91.872163,608030600.0,0.080589,231.752571,656413500.0
min,0.0,0.000648,0.0,12.0,-1.26,0.0,0.0,0.0,0.0,1537229000000.0,0.0,-720.0,1537230000000.0
5%,2.0,0.005491,1.0,35.0,-0.04,0.0,4.0,0.03,0.0,1537344000000.0,0.0,-600.0,1537387000000.0
50%,2.7,0.05905,2.0,105.0,1.3,3.72,15.0,0.21,26.0,1538245000000.0,0.0,-480.0,1538621000000.0
95%,4.3,2.6789,40.2,276.0,4.4,6.38,49.0,0.96,298.0,1539319000000.0,0.0,-60.0,1539400000000.0
max,8.4,53.737,580.0,355.91,7.5,9.12,172.0,1.91,2015.0,1539475000000.0,1.0,720.0,1539537000000.0


In [38]:
# information on the columns of the object type
earthquakes.describe(include=np.object)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  earthquakes.describe(include=np.object)


Unnamed: 0,alert,code,detail,ids,magType,net,place,sources,status,title,type,types,url
count,59,9332,9332,9332,9331,9332,9332,9332,9332,9332,9332,9332,9332
unique,2,9332,9332,9332,10,14,5433,52,2,7807,5,42,9332
top,green,20273987,https://earthquake.usgs.gov/fdsnws/event/1/que...,",us1000h5gh,",ml,ak,"10km NE of Aguanga, CA",",ak,",reviewed,"M 0.4 - 10km NE of Aguanga, CA",earthquake,",geoserve,origin,phase-data,",https://earthquake.usgs.gov/earthquakes/eventp...
freq,58,1,1,1,6803,3166,306,2981,7797,55,9081,5301,1


## Grabbing subsets of data

### Selection

In [39]:
earthquakes.mag

0       1.35
1       1.29
2       3.42
3       0.44
4       2.16
        ... 
9327    0.62
9328    1.00
9329    2.40
9330    1.10
9331    0.66
Name: mag, Length: 9332, dtype: float64

In [40]:
earthquakes[['mag', 'title']]

Unnamed: 0,mag,title
0,1.35,"M 1.4 - 9km NE of Aguanga, CA"
1,1.29,"M 1.3 - 9km NE of Aguanga, CA"
2,3.42,"M 3.4 - 8km NE of Aguanga, CA"
3,0.44,"M 0.4 - 9km NE of Aguanga, CA"
4,2.16,"M 2.2 - 10km NW of Avenal, CA"
...,...,...
9327,0.62,"M 0.6 - 9km ENE of Mammoth Lakes, CA"
9328,1.00,"M 1.0 - 3km W of Julian, CA"
9329,2.40,"M 2.4 - 35km NNE of Hatillo, Puerto Rico"
9330,1.10,"M 1.1 - 9km NE of Aguanga, CA"


In [41]:
# select all of the columns that start with mag, along with the title and time columns

earthquakes[['title', 'time'] + [col for col in earthquakes.columns if col.startswith('mag')]]

Unnamed: 0,title,time,mag,magType
0,"M 1.4 - 9km NE of Aguanga, CA",1539475168010,1.35,ml
1,"M 1.3 - 9km NE of Aguanga, CA",1539475129610,1.29,ml
2,"M 3.4 - 8km NE of Aguanga, CA",1539475062610,3.42,ml
3,"M 0.4 - 9km NE of Aguanga, CA",1539474978070,0.44,ml
4,"M 2.2 - 10km NW of Avenal, CA",1539474716050,2.16,md
...,...,...,...,...
9327,"M 0.6 - 9km ENE of Mammoth Lakes, CA",1537230228060,0.62,md
9328,"M 1.0 - 3km W of Julian, CA",1537230135130,1.00,ml
9329,"M 2.4 - 35km NNE of Hatillo, Puerto Rico",1537229908180,2.40,md
9330,"M 1.1 - 9km NE of Aguanga, CA",1537229545350,1.10,ml


### Slicing

In [42]:
earthquakes[100:103]

Unnamed: 0,alert,cdi,code,detail,dmin,felt,gap,ids,mag,magType,...,sources,status,time,title,tsunami,type,types,tz,updated,url
100,,,20280310,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,",ak20280310,",1.2,ml,...,",ak,",automatic,1539435449480,"M 1.2 - 25km NW of Ester, Alaska",0,earthquake,",geoserve,origin,",-540.0,1539443551010,https://earthquake.usgs.gov/earthquakes/eventp...
101,,,73096756,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.01355,,185.0,",nc73096756,",0.59,md,...,",nc,",automatic,1539435391320,"M 0.6 - 8km ESE of Mammoth Lakes, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,scit...",-480.0,1539439802162,https://earthquake.usgs.gov/earthquakes/eventp...
102,,,37388730,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02987,,39.0,",ci37388730,",1.33,ml,...,",ci,",automatic,1539435293090,"M 1.3 - 8km ENE of Aguanga, CA",0,earthquake,",focal-mechanism,geoserve,nearby-cities,origin...",-480.0,1539435940470,https://earthquake.usgs.gov/earthquakes/eventp...


In [43]:
earthquakes[['title', 'time']][100:103]

Unnamed: 0,title,time
100,"M 1.2 - 25km NW of Ester, Alaska",1539435449480
101,"M 0.6 - 8km ESE of Mammoth Lakes, CA",1539435391320
102,"M 1.3 - 8km ENE of Aguanga, CA",1539435293090


### Indexing

In [44]:
earthquakes[110:113]['title'] = earthquakes[110:113]['title'].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  earthquakes[110:113]['title'] = earthquakes[110:113]['title'].str.lower()


In [45]:
# use loc and iloc to subset dataframe and avoid the previous notification

earthquakes.loc[110:112, 'title'] = earthquakes.loc[110:112, 'title'].str.lower()
earthquakes.loc[110:112, 'title']

110               m 1.1 - 35km s of ester, alaska
111    m 1.9 - 93km wnw of arctic village, alaska
112      m 0.9 - 20km wsw of smith valley, nevada
Name: title, dtype: object

In [46]:
# grabbing all rows of the title column
earthquakes.loc[:, 'title']

0                  M 1.4 - 9km NE of Aguanga, CA
1                  M 1.3 - 9km NE of Aguanga, CA
2                  M 3.4 - 8km NE of Aguanga, CA
3                  M 0.4 - 9km NE of Aguanga, CA
4                  M 2.2 - 10km NW of Avenal, CA
                          ...                   
9327        M 0.6 - 9km ENE of Mammoth Lakes, CA
9328                 M 1.0 - 3km W of Julian, CA
9329    M 2.4 - 35km NNE of Hatillo, Puerto Rico
9330               M 1.1 - 9km NE of Aguanga, CA
9331               M 0.7 - 9km NE of Aguanga, CA
Name: title, Length: 9332, dtype: object

In [47]:
# selecting multiple rows and columns
earthquakes.loc[10:15, ['title', 'mag']]

Unnamed: 0,title,mag
10,"M 0.5 - 10km NE of Aguanga, CA",0.5
11,"M 2.8 - 53km SE of Punta Cana, Dominican Republic",2.77
12,"M 0.5 - 9km NE of Aguanga, CA",0.5
13,"M 4.5 - 120km SSW of Banda Aceh, Indonesia",4.5
14,"M 2.1 - 14km NW of Parkfield, CA",2.13
15,"M 2.0 - 156km WNW of Haines Junction, Canada",2.0


In [48]:
# using loc[], slicing can be done on the column names

earthquakes.iloc[10:15, 8:10]

Unnamed: 0,mag,magType
10,0.5,ml
11,2.77,md
12,0.5,ml
13,4.5,mb
14,2.13,md


In [49]:
earthquakes.loc[10:14, 'mag':'magType']

Unnamed: 0,mag,magType
10,0.5,ml
11,2.77,md
12,0.5,ml
13,4.5,mb
14,2.13,md


In [50]:
# looking up scalar values with at[] and iat[]
earthquakes.at[10, 'mag']

0.5

In [51]:
earthquakes.iat[10, 8]

0.5

### Filtering

In [52]:
earthquakes[earthquakes.mag >= 7.0]

Unnamed: 0,alert,cdi,code,detail,dmin,felt,gap,ids,mag,magType,...,sources,status,time,title,tsunami,type,types,tz,updated,url
837,green,4.1,1000haa3,https://earthquake.usgs.gov/fdsnws/event/1/que...,1.763,3.0,14.0,",us1000haa3,pt18283003,at00pgehsk,",7.0,mww,...,",us,pt,at,",reviewed,1539204500290,"M 7.0 - 117km E of Kimbe, Papua New Guinea",1,earthquake,",dyfi,finite-fault,general-text,geoserve,groun...",600.0,1539378744253,https://earthquake.usgs.gov/earthquakes/eventp...
5263,red,8.4,1000h3p4,https://earthquake.usgs.gov/fdsnws/event/1/que...,1.589,18.0,27.0,",us1000h3p4,us1000h4p4,",7.5,mww,...,",us,us,",reviewed,1538128963480,"M 7.5 - 78km N of Palu, Indonesia",1,earthquake,",dyfi,finite-fault,general-text,geoserve,groun...",480.0,1539123134531,https://earthquake.usgs.gov/earthquakes/eventp...


In [53]:
earthquakes.loc[earthquakes.mag >= 7.0, ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type
837,green,7.0,mww,"M 7.0 - 117km E of Kimbe, Papua New Guinea",1,earthquake
5263,red,7.5,mww,"M 7.5 - 78km N of Palu, Indonesia",1,earthquake


In [54]:
# multiple criteries
earthquakes.loc[(earthquakes.tsunami == 1) & (earthquakes.alert == 'red'), ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type
5263,red,7.5,mww,"M 7.5 - 78km N of Palu, Indonesia",1,earthquake


In [55]:
earthquakes.loc[(earthquakes.tsunami == 1) | (earthquakes.alert == 'red'), ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type
36,,5.0,mww,"M 5.0 - 165km NNW of Flying Fish Cove, Christm...",1,earthquake
118,green,6.7,mww,"M 6.7 - 262km NW of Ozernovskiy, Russia",1,earthquake
501,green,5.6,mww,"M 5.6 - 128km SE of Kimbe, Papua New Guinea",1,earthquake
799,green,6.5,mww,"M 6.5 - 148km S of Severo-Kuril'sk, Russia",1,earthquake
816,green,6.2,mww,"M 6.2 - 94km SW of Kokopo, Papua New Guinea",1,earthquake
...,...,...,...,...,...,...
8561,,5.4,mb,"M 5.4 - 228km S of Taron, Papua New Guinea",1,earthquake
8624,,5.1,mb,"M 5.1 - 278km SE of Pondaguitan, Philippines",1,earthquake
9133,green,5.1,ml,"M 5.1 - 64km SSW of Kaktovik, Alaska",1,earthquake
9175,,5.2,mb,"M 5.2 - 126km N of Dili, East Timor",1,earthquake


In [56]:
earthquakes.loc[(earthquakes.place.str.contains('Alaska')) & (earthquakes.alert.notnull()), ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type
1015,green,5.0,ml,"M 5.0 - 61km SSW of Chignik Lake, Alaska",1,earthquake
1273,green,4.0,ml,"M 4.0 - 71km SW of Kaktovik, Alaska",1,earthquake
1795,green,4.0,ml,"M 4.0 - 60km WNW of Valdez, Alaska",1,earthquake
2752,green,4.0,ml,"M 4.0 - 67km SSW of Kaktovik, Alaska",1,earthquake
3260,green,3.9,ml,"M 3.9 - 44km N of North Nenana, Alaska",0,earthquake
4101,green,4.2,ml,"M 4.2 - 131km NNW of Arctic Village, Alaska",0,earthquake
6897,green,3.8,ml,"M 3.8 - 80km SSW of Kaktovik, Alaska",0,earthquake
8524,green,3.8,ml,"M 3.8 - 69km SSW of Kaktovik, Alaska",0,earthquake
9133,green,5.1,ml,"M 5.1 - 64km SSW of Kaktovik, Alaska",1,earthquake


In [57]:
# Using Regex to select all earthquakes in California that have magnitudes at least 3.8
# Select entries in the place column that end in CA or California because the data isn't consistent

earthquakes.loc[(earthquakes.place.str.contains(r'CA|California$')) & (earthquakes.mag > 3.8), ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type
1465,green,3.83,mw,"M 3.8 - 109km WNW of Trinidad, CA",0,earthquake
2414,green,3.83,mw,"M 3.8 - 5km SW of Tres Pinos, CA",1,earthquake


In [58]:
# Between method
earthquakes.loc[earthquakes.mag.between(6.5, 7.5), ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type
118,green,6.7,mww,"M 6.7 - 262km NW of Ozernovskiy, Russia",1,earthquake
799,green,6.5,mww,"M 6.5 - 148km S of Severo-Kuril'sk, Russia",1,earthquake
837,green,7.0,mww,"M 7.0 - 117km E of Kimbe, Papua New Guinea",1,earthquake
4363,green,6.7,mww,"M 6.7 - 263km NNE of Ndoi Island, Fiji",1,earthquake
5263,red,7.5,mww,"M 7.5 - 78km N of Palu, Indonesia",1,earthquake


In [59]:
# isin method
earthquakes.loc[earthquakes.alert.isin(['orange', 'red']), ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type
5263,red,7.5,mww,"M 7.5 - 78km N of Palu, Indonesia",1,earthquake


In [60]:
# Using idxmin and idxmax for the index of minimum and maximum
[earthquakes.mag.idxmin(), earthquakes.mag.idxmax()]

[2409, 5263]

In [61]:
earthquakes.loc[[earthquakes.mag.idxmin(), earthquakes.mag.idxmax()], ['alert', 'mag', 'magType', 'title', 'tsunami', 'type']]

Unnamed: 0,alert,mag,magType,title,tsunami,type
2409,,-1.26,ml,"M -1.3 - 41km ENE of Adak, Alaska",0,earthquake
5263,red,7.5,mww,"M 7.5 - 78km N of Palu, Indonesia",1,earthquake


## Adding and removing data

In [62]:
earthquakes = pd.read_csv('earthquakes.csv', usecols=['time', 'title', 'place', 'magType', 'mag', 'alert', 'tsunami'])
earthquakes.head()

Unnamed: 0,alert,mag,magType,place,time,title,tsunami
0,,1.35,ml,"9km NE of Aguanga, CA",1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0
1,,1.29,ml,"9km NE of Aguanga, CA",1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0
2,,3.42,ml,"8km NE of Aguanga, CA",1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0
3,,0.44,ml,"9km NE of Aguanga, CA",1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0
4,,2.16,md,"10km NW of Avenal, CA",1539474716050,"M 2.2 - 10km NW of Avenal, CA",0


### Creating new data

In [63]:
earthquakes['ones'] = 1
earthquakes.head()

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,ones
0,,1.35,ml,"9km NE of Aguanga, CA",1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,1
1,,1.29,ml,"9km NE of Aguanga, CA",1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,1
2,,3.42,ml,"8km NE of Aguanga, CA",1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,1
3,,0.44,ml,"9km NE of Aguanga, CA",1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,1
4,,2.16,md,"10km NW of Avenal, CA",1539474716050,"M 2.2 - 10km NW of Avenal, CA",0,1


In [64]:
earthquakes['mag_negative'] = earthquakes.mag < 0
earthquakes.head()

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,ones,mag_negative
0,,1.35,ml,"9km NE of Aguanga, CA",1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,1,False
1,,1.29,ml,"9km NE of Aguanga, CA",1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,1,False
2,,3.42,ml,"8km NE of Aguanga, CA",1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,1,False
3,,0.44,ml,"9km NE of Aguanga, CA",1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,1,False
4,,2.16,md,"10km NW of Avenal, CA",1539474716050,"M 2.2 - 10km NW of Avenal, CA",0,1,False


In [65]:
# extracting everything in place column after the comma using regex to check inconsistencies in data
earthquakes.place.str.extract(r', (.*$)')[0].sort_values().unique()

array(['Afghanistan', 'Alaska', 'Argentina', 'Arizona', 'Arkansas',
       'Australia', 'Azerbaijan', 'B.C., MX', 'Barbuda', 'Bolivia',
       'Bonaire, Saint Eustatius and Saba ', 'British Virgin Islands',
       'Burma', 'CA', 'California', 'Canada', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Colorado', 'Costa Rica',
       'Dominican Republic', 'East Timor', 'Ecuador', 'Ecuador region',
       'El Salvador', 'Fiji', 'Greece', 'Greenland', 'Guam', 'Guatemala',
       'Haiti', 'Hawaii', 'Honduras', 'Idaho', 'Illinois', 'India',
       'Indonesia', 'Iran', 'Iraq', 'Italy', 'Jamaica', 'Japan', 'Kansas',
       'Kentucky', 'Kyrgyzstan', 'Martinique', 'Mauritius', 'Mayotte',
       'Mexico', 'Missouri', 'Montana', 'NV', 'Nevada', 'New Caledonia',
       'New Hampshire', 'New Mexico', 'New Zealand', 'Nicaragua',
       'North Carolina', 'Northern Mariana Islands', 'Oklahoma', 'Oregon',
       'Pakistan', 'Papua New Guinea', 'Peru', 'Philippines',
       'Puerto Rico', 'Roman

In [66]:
# using replace() method to replace some patterns
earthquakes['parsed_place'] = earthquakes.place.str.replace(r'.* of ', '' # remove something of something
            ).str.replace(r'the ', '' # remove things starting with 'the'
            ).str.replace(r'CA$', 'California' # fix California
            ).str.replace(r'NV$', 'Nevada' # fix Nevada
            ).str.replace(r'MX$', 'Mexico' # fix Mexico
            ).str.replace(r' region$', '' # chop off endings with 'region'
            ).str.replace(r'northern ', '' # remove northern
            ).str.replace(r'Fiji Islands', 'Fiji' # line up the Fiji places
            ).str.replace(r'^.*, ', '' # remove anything else extraneous from beginning
            ).str.strip() # remove any extra spaces


In [67]:
earthquakes.parsed_place.sort_values().unique()

array(['Afghanistan', 'Alaska', 'Argentina', 'Arizona', 'Arkansas',
       'Ascension Island', 'Australia', 'Azerbaijan', 'Balleny Islands',
       'Barbuda', 'Bolivia', 'British Virgin Islands', 'Burma',
       'California', 'Canada', 'Carlsberg Ridge',
       'Central East Pacific Rise', 'Central Mid-Atlantic Ridge', 'Chile',
       'China', 'Christmas Island', 'Colombia', 'Colorado', 'Costa Rica',
       'Dominican Republic', 'East Timor', 'Ecuador', 'El Salvador',
       'Fiji', 'Greece', 'Greenland', 'Guam', 'Guatemala', 'Haiti',
       'Hawaii', 'Honduras', 'Idaho', 'Illinois', 'India',
       'Indian Ocean Triple Junction', 'Indonesia', 'Iran', 'Iraq',
       'Italy', 'Jamaica', 'Japan', 'Kansas', 'Kentucky',
       'Kermadec Islands', 'Kuril Islands', 'Kyrgyzstan', 'Martinique',
       'Mauritius', 'Mayotte', 'Mexico', 'Mid-Indian Ridge', 'Missouri',
       'Montana', 'Nevada', 'New Caledonia', 'New Hampshire',
       'New Mexico', 'New Zealand', 'Nicaragua', 'North Carolina',


In [68]:
# Making many new columns at once using assign() method

earthquakes.assign(in_ca=earthquakes.parsed_place.str.endswith('California'), in_alaska=earthquakes.parsed_place.str.endswith('Alaska')).head()

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,ones,mag_negative,parsed_place,in_ca,in_alaska
0,,1.35,ml,"9km NE of Aguanga, CA",1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,1,False,California,True,False
1,,1.29,ml,"9km NE of Aguanga, CA",1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,1,False,California,True,False
2,,3.42,ml,"8km NE of Aguanga, CA",1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,1,False,California,True,False
3,,0.44,ml,"9km NE of Aguanga, CA",1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,1,False,California,True,False
4,,2.16,md,"10km NW of Avenal, CA",1539474716050,"M 2.2 - 10km NW of Avenal, CA",0,1,False,California,True,False


In [69]:
# Using lambda in assign method
earthquakes.assign(abs_mag=lambda x: x.mag.abs())

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,ones,mag_negative,parsed_place,abs_mag
0,,1.35,ml,"9km NE of Aguanga, CA",1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,1,False,California,1.35
1,,1.29,ml,"9km NE of Aguanga, CA",1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,1,False,California,1.29
2,,3.42,ml,"8km NE of Aguanga, CA",1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,1,False,California,3.42
3,,0.44,ml,"9km NE of Aguanga, CA",1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,1,False,California,0.44
4,,2.16,md,"10km NW of Avenal, CA",1539474716050,"M 2.2 - 10km NW of Avenal, CA",0,1,False,California,2.16
...,...,...,...,...,...,...,...,...,...,...,...
9327,,0.62,md,"9km ENE of Mammoth Lakes, CA",1537230228060,"M 0.6 - 9km ENE of Mammoth Lakes, CA",0,1,False,California,0.62
9328,,1.00,ml,"3km W of Julian, CA",1537230135130,"M 1.0 - 3km W of Julian, CA",0,1,False,California,1.00
9329,,2.40,md,"35km NNE of Hatillo, Puerto Rico",1537229908180,"M 2.4 - 35km NNE of Hatillo, Puerto Rico",0,1,False,Puerto Rico,2.40
9330,,1.10,ml,"9km NE of Aguanga, CA",1537229545350,"M 1.1 - 9km NE of Aguanga, CA",0,1,False,California,1.10


In [70]:
# Using concat method
tsunami = earthquakes[earthquakes.tsunami == 1]
no_tsunami = earthquakes[earthquakes.tsunami == 0]

In [71]:
tsunami.shape, no_tsunami.shape

((61, 10), (9271, 10))

In [72]:
pd.concat([tsunami, no_tsunami]).shape # 61 rows + 9271 rows

(9332, 10)

In [73]:
# Previous result is the same as running append() method on the dataframe
tsunami.append(no_tsunami).shape

(9332, 10)

In [74]:
# getting some of the ignored columns using concatenate along the columns (axis=1)
additional_columns = pd.read_csv('earthquakes.csv', usecols=['tz', 'felt', 'ids'])

In [75]:
pd.concat([earthquakes.head(2), additional_columns.head(2)], axis=1)

# concat() method uses the index to determine how to concatenate values

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,ones,mag_negative,parsed_place,felt,ids,tz
0,,1.35,ml,"9km NE of Aguanga, CA",1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,1,False,California,,",ci37389218,",-480.0
1,,1.29,ml,"9km NE of Aguanga, CA",1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,1,False,California,,",ci37389202,",-480.0


In [76]:
# if they don't align, this will generate additional rows
additional_columns = pd.read_csv('earthquakes.csv', usecols=['tz', 'felt', 'ids', 'time'], index_col='time')

In [77]:
pd.concat([earthquakes.head(2), additional_columns.head(2)], axis=1)

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,ones,mag_negative,parsed_place,felt,ids,tz
0,,1.35,ml,"9km NE of Aguanga, CA",1539475000000.0,"M 1.4 - 9km NE of Aguanga, CA",0.0,1.0,False,California,,,
1,,1.29,ml,"9km NE of Aguanga, CA",1539475000000.0,"M 1.3 - 9km NE of Aguanga, CA",0.0,1.0,False,California,,,
1539475129610,,,,,,,,,,,,",ci37389202,",-480.0
1539475168010,,,,,,,,,,,,",ci37389218,",-480.0


In [78]:
# concatenating two dataframes but one has an additional column
# the join parameter specifies how to handle any overlap in column names or in row names
pd.concat([tsunami.head(2), no_tsunami.head(2).assign(type='earthquake')], join='inner')

# type column from no_tsunami dataframe doesn't show up because it wasn't present in the tsunami dataframe

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,ones,mag_negative,parsed_place
36,,5.0,mww,"165km NNW of Flying Fish Cove, Christmas Island",1539459504090,"M 5.0 - 165km NNW of Flying Fish Cove, Christm...",1,1,False,Christmas Island
118,green,6.7,mww,"262km NW of Ozernovskiy, Russia",1539429023560,"M 6.7 - 262km NW of Ozernovskiy, Russia",1,1,False,Russia
0,,1.35,ml,"9km NE of Aguanga, CA",1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,1,False,California
1,,1.29,ml,"9km NE of Aguanga, CA",1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,1,False,California


In [79]:
# if the index is not meaningful, we can pass ignore_index to get sequential values in the index
pd.concat([tsunami.head(2), no_tsunami.head(2).assign(type='earthquake')], join='inner', ignore_index=True)

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,ones,mag_negative,parsed_place
0,,5.0,mww,"165km NNW of Flying Fish Cove, Christmas Island",1539459504090,"M 5.0 - 165km NNW of Flying Fish Cove, Christm...",1,1,False,Christmas Island
1,green,6.7,mww,"262km NW of Ozernovskiy, Russia",1539429023560,"M 6.7 - 262km NW of Ozernovskiy, Russia",1,1,False,Russia
2,,1.35,ml,"9km NE of Aguanga, CA",1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,1,False,California
3,,1.29,ml,"9km NE of Aguanga, CA",1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,1,False,California


### Deleting unwanted data

In [80]:
del earthquakes['ones']
earthquakes.columns

Index(['alert', 'mag', 'magType', 'place', 'time', 'title', 'tsunami',
       'mag_negative', 'parsed_place'],
      dtype='object')

In [82]:
try:
  del earthquakes['ones']
except KeyError:
  print('No such column')

No such column


In [83]:
# using pop() method
mag_negative = earthquakes.pop('mag_negative')
earthquakes.columns

Index(['alert', 'mag', 'magType', 'place', 'time', 'title', 'tsunami',
       'parsed_place'],
      dtype='object')

In [84]:
# we are left with a boolean mask that used to be column in earthquakes
mag_negative.value_counts()

False    8841
True      491
Name: mag_negative, dtype: int64

In [85]:
# this can be used to filter earthquakes without needing to be a column
earthquakes[mag_negative].head()

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,parsed_place
39,,-0.1,ml,"6km NW of Lemmon Valley, Nevada",1539458844506,"M -0.1 - 6km NW of Lemmon Valley, Nevada",0,Nevada
49,,-0.1,ml,"6km NW of Lemmon Valley, Nevada",1539455017464,"M -0.1 - 6km NW of Lemmon Valley, Nevada",0,Nevada
135,,-0.4,ml,"10km SSE of Beatty, Nevada",1539422175717,"M -0.4 - 10km SSE of Beatty, Nevada",0,Nevada
161,,-0.02,md,"20km SSE of Ronan, Montana",1539412475360,"M -0.0 - 20km SSE of Ronan, Montana",0,Montana
198,,-0.2,ml,"60km N of Pahrump, Nevada",1539398340822,"M -0.2 - 60km N of Pahrump, Nevada",0,Nevada


In [87]:
# using drop() method to remove multiple rows or columns 
earthquakes.drop([0, 1]).head(2)

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,parsed_place
2,,3.42,ml,"8km NE of Aguanga, CA",1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,California
3,,0.44,ml,"9km NE of Aguanga, CA",1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,California


In [88]:
# if we want to drop columns, we can pass list of columns in the columns argument
earthquakes.drop(columns=[col for col in earthquakes.columns if col not in ['alert', 'mag', 'title', 'time', 'tsunami']]).head()

Unnamed: 0,alert,mag,time,title,tsunami
0,,1.35,1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0
1,,1.29,1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0
2,,3.42,1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0
3,,0.44,1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0
4,,2.16,1539474716050,"M 2.2 - 10km NW of Avenal, CA",0


In [90]:
# we can also use axis=1 argument
earthquakes.drop([col for col in earthquakes.columns if col not in ['alert', 'mag', 'title', 'time', 'tsunami']], axis=1).head()

Unnamed: 0,alert,mag,time,title,tsunami
0,,1.35,1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0
1,,1.29,1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0
2,,3.42,1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0
3,,0.44,1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0
4,,2.16,1539474716050,"M 2.2 - 10km NW of Avenal, CA",0


## Further reading

Styling DataFrames: https:/​/​pandas.​pydata.​org/​pandas-​docs/​stable/
style.​html

The pandas ecosystem: https:/​/​pandas.​pydata.​org/​pandas-​docs/​stable/
ecosystem.​html