In [1]:
import datetime
import numpy as np
import pandas as pd

In [2]:
# creating series
np.random.seed(0) # set a seed for reproducibility
pd.Series(np.random.rand(5), name='random')

0    0.548814
1    0.715189
2    0.602763
3    0.544883
4    0.423655
Name: random, dtype: float64

In [3]:
# Creating DataFrame from Series
pd.Series(np.linspace(0, 10, num=5)).to_frame()

Unnamed: 0,0
0,0.0
1,2.5
2,5.0
3,7.5
4,10.0


In [4]:
# Creating a DataFrame from Python Data Structures
# From a dictionary of list-like structures
np.random.seed(0) # set seed so result reprocucible
pd.DataFrame({'random' : np.random.rand(5),
              'text' : ['hot', 'warm', 'cool', 'cold', 'None'],
               'truth' : [np.random.choice([True, False]) for _ in range(5)]},
             index=pd.date_range(
                 end=datetime.date(2024, 11, 12 ),
                 freq='1D',
                 periods=5,
                 name='date')
            )

Unnamed: 0_level_0,random,text,truth
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-11-08,0.548814,hot,False
2024-11-09,0.715189,warm,True
2024-11-10,0.602763,cool,True
2024-11-11,0.544883,cold,False
2024-11-12,0.423655,,True


In [5]:
# From a list of dictionaries
pd.DataFrame([
    {'mag' : 5.2, 'place' : 'Edinburge' },
    {'mag' : 6.7, 'place' : 'McAllen' },
    {'mag' : 7.7, 'place' : 'SanAntonio'},
        ])

Unnamed: 0,mag,place
0,5.2,Edinburge
1,6.7,McAllen
2,7.7,SanAntonio


In [6]:
# From a list of tuples
# This is equivalent to using pd.DataFrame.from_records()
list_of_tupples = [(n, n**2, n**3) for n in range(7)]
list_of_tupples

[(0, 0, 0),
 (1, 1, 1),
 (2, 4, 8),
 (3, 9, 27),
 (4, 16, 64),
 (5, 25, 125),
 (6, 36, 216)]

In [7]:
pd.DataFrame(
    list_of_tupples, columns=['S', 'S_squared', 'S_cubed']
)

Unnamed: 0,S,S_squared,S_cubed
0,0,0,0
1,1,1,1
2,2,4,8
3,3,9,27
4,4,16,64
5,5,25,125
6,6,36,216


In [8]:
from array import array

In [9]:
# From a NumPy array
pd.DataFrame(
    np.array([
        [0, 0, 0],
        [8, 9, 10],
        [12,14, 18],
        [3, 9, 27],
        [4, 16, 64]
    ]), columns=['V', 'V_squared', 'V_cubed']
)

Unnamed: 0,V,V_squared,V_cubed
0,0,0,0
1,8,9,10
2,12,14,18
3,3,9,27
4,4,16,64


In [10]:
# Creating a DataFrame by Reading in a CSV File
# We can run command line code from Jupyter Notebooks (thanks to IPython) by using ! before the code.
!wc -l earthquakes.csv

9333 earthquakes.csv


In [11]:
!head -n 2 earthquakes.csv

alert,cdi,code,detail,dmin,felt,gap,ids,mag,magType,mmi,net,nst,place,rms,sig,sources,status,time,title,tsunami,type,types,tz,updated,url
,,37389218,https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=ci37389218&format=geojson,0.008693,,85.0,",ci37389218,",1.35,ml,,ci,26.0,"9km NE of Aguanga, CA",0.19,28,",ci,",automatic,1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475395144,https://earthquake.usgs.gov/earthquakes/eventpage/ci37389218


In [12]:
!tail -n 2 earthquakes.csv

,,38063959,https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=ci38063959&format=geojson,0.01865,,61.0,",ci38063959,",1.1,ml,,ci,27.0,"9km NE of Aguanga, CA",0.1,19,",ci,",reviewed,1537229545350,"M 1.1 - 9km NE of Aguanga, CA",0,earthquake,",focal-mechanism,geoserve,nearby-cities,origin,phase-data,scitech-link,",-480.0,1537230211640,https://earthquake.usgs.gov/earthquakes/eventpage/ci38063959
,,38063935,https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=ci38063935&format=geojson,0.01698,,39.0,",ci38063935,",0.66,ml,,ci,24.0,"9km NE of Aguanga, CA",0.1,7,",ci,",reviewed,1537228864470,"M 0.7 - 9km NE of Aguanga, CA",0,earthquake,",focal-mechanism,geoserve,nearby-cities,origin,phase-data,scitech-link,",-480.0,1537305830770,https://earthquake.usgs.gov/earthquakes/eventpage/ci38063935


In [13]:
# Column count
!awk -F',' '{print NF; exit}' earthquakes.csv

26


In [14]:
headers = !head -n 1 data/earthquakes.csv
len(headers[0].split(','))

1

In [15]:
df = pd.read_csv('earthquakes.csv')
df

Unnamed: 0,alert,cdi,code,detail,dmin,felt,gap,ids,mag,magType,...,sources,status,time,title,tsunami,type,types,tz,updated,url
0,,,37389218,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.008693,,85.0,",ci37389218,",1.35,ml,...,",ci,",automatic,1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475395144,https://earthquake.usgs.gov/earthquakes/eventp...
1,,,37389202,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.020030,,79.0,",ci37389202,",1.29,ml,...,",ci,",automatic,1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475253925,https://earthquake.usgs.gov/earthquakes/eventp...
2,,4.4,37389194,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.021370,28.0,21.0,",ci37389194,",3.42,ml,...,",ci,",automatic,1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,earthquake,",dyfi,focal-mechanism,geoserve,nearby-cities,o...",-480.0,1539536756176,https://earthquake.usgs.gov/earthquakes/eventp...
3,,,37389186,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.026180,,39.0,",ci37389186,",0.44,ml,...,",ci,",automatic,1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475196167,https://earthquake.usgs.gov/earthquakes/eventp...
4,,,73096941,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.077990,,192.0,",nc73096941,",2.16,md,...,",nc,",automatic,1539474716050,"M 2.2 - 10km NW of Avenal, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,scit...",-480.0,1539477547926,https://earthquake.usgs.gov/earthquakes/eventp...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9327,,,73086771,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.018060,,185.0,",nc73086771,",0.62,md,...,",nc,",reviewed,1537230228060,"M 0.6 - 9km ENE of Mammoth Lakes, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1537285598315,https://earthquake.usgs.gov/earthquakes/eventp...
9328,,,38063967,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.030410,,50.0,",ci38063967,",1.00,ml,...,",ci,",reviewed,1537230135130,"M 1.0 - 3km W of Julian, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,scit...",-480.0,1537276800970,https://earthquake.usgs.gov/earthquakes/eventp...
9329,,,2018261000,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.452600,,276.0,",pr2018261000,",2.40,md,...,",pr,",reviewed,1537229908180,"M 2.4 - 35km NNE of Hatillo, Puerto Rico",0,earthquake,",geoserve,origin,phase-data,",-240.0,1537243777410,https://earthquake.usgs.gov/earthquakes/eventp...
9330,,,38063959,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.018650,,61.0,",ci38063959,",1.10,ml,...,",ci,",reviewed,1537229545350,"M 1.1 - 9km NE of Aguanga, CA",0,earthquake,",focal-mechanism,geoserve,nearby-cities,origin...",-480.0,1537230211640,https://earthquake.usgs.gov/earthquakes/eventp...


In [16]:
# Writing a DataFrame to a CSV File
df.to_csv('output.csv', index=False)

In [17]:
import sqlite3

with sqlite3.connect('quakes.db') as connection:
    pd.read_csv('tsunamis.csv').to_sql(
        'tsunamis', connection, index=False, if_exists='replace'
    )

In [18]:
# Creating a DataFrame by Querying a Database
import sqlite3

with sqlite3.connect('quakes.db') as connection:
    tsunamis = pd.read_sql('SELECT * FROM tsunamis', connection)

tsunamis.head()

Unnamed: 0,alert,type,title,place,magType,mag,time
0,,earthquake,"M 5.0 - 165km NNW of Flying Fish Cove, Christm...","165km NNW of Flying Fish Cove, Christmas Island",mww,5.0,1539459504090
1,green,earthquake,"M 6.7 - 262km NW of Ozernovskiy, Russia","262km NW of Ozernovskiy, Russia",mww,6.7,1539429023560
2,green,earthquake,"M 5.6 - 128km SE of Kimbe, Papua New Guinea","128km SE of Kimbe, Papua New Guinea",mww,5.6,1539312723620
3,green,earthquake,"M 6.5 - 148km S of Severo-Kuril'sk, Russia","148km S of Severo-Kuril'sk, Russia",mww,6.5,1539213362130
4,green,earthquake,"M 6.2 - 94km SW of Kokopo, Papua New Guinea","94km SW of Kokopo, Papua New Guinea",mww,6.2,1539208835130
