In [3]:
import numpy as np

data = np.genfromtxt('data/example_data.csv', 
                     delimiter=';',
                     names=True, 
                     dtype=None, 
                     encoding='UTF')
data

array([('2018-10-13 11:10:23.560', '262km NW of Ozernovskiy, Russia', 'mww', 6.7, 'green', 1),
       ('2018-10-13 04:34:15.580', '25km E of Bitung, Indonesia', 'mww', 5.2, 'green', 0),
       ('2018-10-13 00:13:46.220', '42km WNW of Sola, Vanuatu', 'mww', 5.7, 'green', 0),
       ('2018-10-12 21:09:49.240', '13km E of Nueva Concepcion, Guatemala', 'mww', 5.7, 'green', 0),
       ('2018-10-12 02:52:03.620', '128km SE of Kimbe, Papua New Guinea', 'mww', 5.6, 'green', 1)],
      dtype=[('time', '<U23'), ('place', '<U37'), ('magType', '<U3'), ('mag', '<f8'), ('alert', '<U5'), ('tsunami', '<i4')])

In [6]:
data.shape

(5,)

In [8]:
data.dtype

dtype([('time', '<U23'), ('place', '<U37'), ('magType', '<U3'), ('mag', '<f8'), ('alert', '<U5'), ('tsunami', '<i4')])

In [9]:
%%timeit
max([row[3] for row in data])

4.04 µs ± 201 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [11]:
array_dict = {}
for i, col in enumerate(data.dtype.names):
    array_dict[col] = np.array([row[i] for row in data])

array_dict


{'time': array(['2018-10-13 11:10:23.560', '2018-10-13 04:34:15.580',
        '2018-10-13 00:13:46.220', '2018-10-12 21:09:49.240',
        '2018-10-12 02:52:03.620'], dtype='<U23'),
 'place': array(['262km NW of Ozernovskiy, Russia', '25km E of Bitung, Indonesia',
        '42km WNW of Sola, Vanuatu',
        '13km E of Nueva Concepcion, Guatemala',
        '128km SE of Kimbe, Papua New Guinea'], dtype='<U37'),
 'magType': array(['mww', 'mww', 'mww', 'mww', 'mww'], dtype='<U3'),
 'mag': array([6.7, 5.2, 5.7, 5.7, 5.6]),
 'alert': array(['green', 'green', 'green', 'green', 'green'], dtype='<U5'),
 'tsunami': array([1, 0, 0, 0, 1])}

In [12]:
%%timeit
array_dict['mag'].max()

2.01 µs ± 43.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [13]:
np.array([
    value[array_dict['mag'].argmax()] \
    for key, value in array_dict.items()
])

array(['2018-10-13 11:10:23.560', '262km NW of Ozernovskiy, Russia',
       'mww', '6.7', 'green', '1'], dtype='<U31')

In [15]:
import pandas as pd
place = pd.Series(array_dict['place'], name='place')
place

0          262km NW of Ozernovskiy, Russia
1              25km E of Bitung, Indonesia
2                42km WNW of Sola, Vanuatu
3    13km E of Nueva Concepcion, Guatemala
4      128km SE of Kimbe, Papua New Guinea
Name: place, dtype: object

In [17]:
place_index = place.index
place_index

RangeIndex(start=0, stop=5, step=1)

In [18]:
place_index.values

array([0, 1, 2, 3, 4], dtype=int64)

In [22]:
np.array([1, 1, 1]) + np.array([-1, 0, 1])

array([0, 1, 2])

In [23]:
pd.Series(np.linspace(0, 10, num=5))\
+ pd.Series(np.linspace(0, 10, num=5), index=pd.Index([1, 2, 3, 4, 5]))

0     NaN
1     2.5
2     7.5
3    12.5
4    17.5
5     NaN
dtype: float64

In [25]:
df = pd.DataFrame(array_dict) 
df

Unnamed: 0,time,place,magType,mag,alert,tsunami
0,2018-10-13 11:10:23.560,"262km NW of Ozernovskiy, Russia",mww,6.7,green,1
1,2018-10-13 04:34:15.580,"25km E of Bitung, Indonesia",mww,5.2,green,0
2,2018-10-13 00:13:46.220,"42km WNW of Sola, Vanuatu",mww,5.7,green,0
3,2018-10-12 21:09:49.240,"13km E of Nueva Concepcion, Guatemala",mww,5.7,green,0
4,2018-10-12 02:52:03.620,"128km SE of Kimbe, Papua New Guinea",mww,5.6,green,1


In [27]:
df.dtypes

time        object
place       object
magType     object
mag        float64
alert       object
tsunami      int32
dtype: object

In [28]:
df.values

array([['2018-10-13 11:10:23.560', '262km NW of Ozernovskiy, Russia',
        'mww', 6.7, 'green', 1],
       ['2018-10-13 04:34:15.580', '25km E of Bitung, Indonesia', 'mww',
        5.2, 'green', 0],
       ['2018-10-13 00:13:46.220', '42km WNW of Sola, Vanuatu', 'mww',
        5.7, 'green', 0],
       ['2018-10-12 21:09:49.240',
        '13km E of Nueva Concepcion, Guatemala', 'mww', 5.7, 'green', 0],
       ['2018-10-12 02:52:03.620', '128km SE of Kimbe, Papua New Guinea',
        'mww', 5.6, 'green', 1]], dtype=object)

In [29]:
df.columns

Index(['time', 'place', 'magType', 'mag', 'alert', 'tsunami'], dtype='object')

In [30]:
df + df

Unnamed: 0,time,place,magType,mag,alert,tsunami
0,2018-10-13 11:10:23.5602018-10-13 11:10:23.560,"262km NW of Ozernovskiy, Russia262km NW of Oze...",mwwmww,13.4,greengreen,2
1,2018-10-13 04:34:15.5802018-10-13 04:34:15.580,"25km E of Bitung, Indonesia25km E of Bitung, I...",mwwmww,10.4,greengreen,0
2,2018-10-13 00:13:46.2202018-10-13 00:13:46.220,"42km WNW of Sola, Vanuatu42km WNW of Sola, Van...",mwwmww,11.4,greengreen,0
3,2018-10-12 21:09:49.2402018-10-12 21:09:49.240,"13km E of Nueva Concepcion, Guatemala13km E of...",mwwmww,11.4,greengreen,0
4,2018-10-12 02:52:03.6202018-10-12 02:52:03.620,"128km SE of Kimbe, Papua New Guinea128km SE of...",mwwmww,11.2,greengreen,2


In [31]:
import datetime

In [33]:
np.random.seed(0)
pd.Series(np.random.rand(5), name='random')

0    0.548814
1    0.715189
2    0.602763
3    0.544883
4    0.423655
Name: random, dtype: float64

In [36]:
np.random.seed(0)
pd.DataFrame(
    {
        'random': np.random.rand(5),
        'text': ['hot', 'warm', 'cool', 'cold', None],
        'truth': [np.random.choice([True, False]) 
                  for _ in range(5)]
    }, 
    index=pd.date_range(
        end=datetime.date(2019, 4, 21),
        freq='1D',
        periods=5, 
        name='date'
    )
)

Unnamed: 0_level_0,random,text,truth
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-17,0.548814,hot,False
2019-04-18,0.715189,warm,True
2019-04-19,0.602763,cool,True
2019-04-20,0.544883,cold,False
2019-04-21,0.423655,,True


In [38]:
pd.DataFrame([
         {'mag' : 5.2, 'place' : 'California'},
         {'mag' : 1.2, 'place' : 'Alaska'},
         {'mag' : 0.2, 'place' : 'California'},
])

Unnamed: 0,mag,place
0,5.2,California
1,1.2,Alaska
2,0.2,California


In [43]:
df = pd.read_csv('data/earthquakes.csv')
df.head()

Unnamed: 0,alert,cdi,code,detail,dmin,felt,gap,ids,mag,magType,...,sources,status,time,title,tsunami,type,types,tz,updated,url
0,,,37389218,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.008693,,85.0,",ci37389218,",1.35,ml,...,",ci,",automatic,1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475395144,https://earthquake.usgs.gov/earthquakes/eventp...
1,,,37389202,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02003,,79.0,",ci37389202,",1.29,ml,...,",ci,",automatic,1539475129610,"M 1.3 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475253925,https://earthquake.usgs.gov/earthquakes/eventp...
2,,4.4,37389194,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02137,28.0,21.0,",ci37389194,",3.42,ml,...,",ci,",automatic,1539475062610,"M 3.4 - 8km NE of Aguanga, CA",0,earthquake,",dyfi,focal-mechanism,geoserve,nearby-cities,o...",-480.0,1539536756176,https://earthquake.usgs.gov/earthquakes/eventp...
3,,,37389186,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.02618,,39.0,",ci37389186,",0.44,ml,...,",ci,",automatic,1539474978070,"M 0.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475196167,https://earthquake.usgs.gov/earthquakes/eventp...
4,,,73096941,https://earthquake.usgs.gov/fdsnws/event/1/que...,0.07799,,192.0,",nc73096941,",2.16,md,...,",nc,",automatic,1539474716050,"M 2.2 - 10km NW of Avenal, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,scit...",-480.0,1539477547926,https://earthquake.usgs.gov/earthquakes/eventp...


In [49]:
import sqlite3
with sqlite3.connect('data/quakes.db') as connection:
    tsunamis = pd.read_sql('SELECT * FROM tsunamis', connection)

tsunamis.head()

Unnamed: 0,alert,type,title,place,magType,mag,time
0,,earthquake,"M 5.0 - 165km NNW of Flying Fish Cove, Christm...","165km NNW of Flying Fish Cove, Christmas Island",mww,5.0,1539459504090
1,green,earthquake,"M 6.7 - 262km NW of Ozernovskiy, Russia","262km NW of Ozernovskiy, Russia",mww,6.7,1539429023560
2,green,earthquake,"M 5.6 - 128km SE of Kimbe, Papua New Guinea","128km SE of Kimbe, Papua New Guinea",mww,5.6,1539312723620
3,green,earthquake,"M 6.5 - 148km S of Severo-Kuril'sk, Russia","148km S of Severo-Kuril'sk, Russia",mww,6.5,1539213362130
4,green,earthquake,"M 6.2 - 94km SW of Kokopo, Papua New Guinea","94km SW of Kokopo, Papua New Guinea",mww,6.2,1539208835130


In [77]:
pd.read_csv("data/epidemic.csv",skiprows=[0, 2], header = 1)

Unnamed: 0,Country,Population,Cases
0,Afghanistan,389465464,1412
1,Albania,28461651,103
2,Algeria,43851044,1949


In [80]:
df = pd.read_csv('http://data-fairfaxcountygis.opendata.arcgis.com/datasets/533db4de13ae4d729884b5ec41d65034_0.csv')
df.inspection()

AttributeError: 'DataFrame' object has no attribute 'inspection'

In [86]:
df.tail()

Unnamed: 0,LASTUPDATE,OBJECTID,PRECINCTID,NAME,COUNTY,LASTEDITOR,PRECINCTID_1,NAME_1,TOWNSHIP,WINPARTY,...,CANDIDATE4,PARTY4,NUMVOTES4,PERCVOTE4,CANDIDATE5,PARTY5,NUMVOTES5,PERCVOTE5,Shape__Area,Shape__Length
240,2016/11/17 10:58:50+00,241,201,ARMSTRONG,Fairfax,stulad,201,ARMSTRONG,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),Stein / Baraka (G),17,0.005461,McMullin / Johnson (I),McMullin / Johnson (I),51,0.016383,29668470.0,25881.328984
241,2016/11/17 10:58:50+00,242,233,NORTH POINT,Fairfax,stulad,233,NORTH POINT,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),Stein / Baraka (G),17,0.00567,McMullin / Johnson (I),McMullin / Johnson (I),29,0.009673,48270390.0,36216.367462
242,2016/11/17 10:58:50+00,243,735,ROTONDA,Fairfax,stulad,735,ROTONDA,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),Stein / Baraka (G),21,0.020369,McMullin / Johnson (I),McMullin / Johnson (I),14,0.013579,7376918.0,12615.457685
243,1970/01/01 00:00:00+00,1000,99999,ABSENTEE,Fairfax,,99999,ABSENTEE,Fairfax County,,...,Stein / Baraka (G),Stein / Baraka (G),726,,McMullin / Johnson (I),McMullin / Johnson (I),1880,,,
244,1970/01/01 00:00:00+00,1001,99998,PROVISIONAL,Fairfax,,99998,PROVISIONAL,Fairfax County,,...,Stein / Baraka (G),Stein / Baraka (G),5,,McMullin / Johnson (I),McMullin / Johnson (I),17,,,


In [93]:
df['check'] = df['OBJECTID'] > 241
df

Unnamed: 0,LASTUPDATE,OBJECTID,PRECINCTID,NAME,COUNTY,LASTEDITOR,PRECINCTID_1,NAME_1,TOWNSHIP,WINPARTY,...,PARTY4,NUMVOTES4,PERCVOTE4,CANDIDATE5,PARTY5,NUMVOTES5,PERCVOTE5,Shape__Area,Shape__Length,check
0,2016/11/17 10:58:50+00,241,623,ALBAN,Fairfax,stulad,623,ALBAN,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),21,0.008812,McMullin / Johnson (I),McMullin / Johnson (I),37,0.015527,6.141624e+07,44406.211569,False
1,2016/11/17 10:58:50+00,241,626,SARATOGA,Fairfax,stulad,626,SARATOGA,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),23,0.007877,McMullin / Johnson (I),McMullin / Johnson (I),41,0.014041,2.881603e+07,28082.575577,False
2,2016/11/17 10:58:50+00,241,421,KINGSTOWNE,Fairfax,stulad,421,KINGSTOWNE,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),18,0.005929,McMullin / Johnson (I),McMullin / Johnson (I),41,0.013505,2.253961e+07,23110.064289,False
3,2016/11/17 10:58:50+00,241,426,GREENSPRING,Fairfax,stulad,426,GREENSPRING,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),1,0.000666,McMullin / Johnson (I),McMullin / Johnson (I),17,0.011326,3.962441e+06,10051.875337,False
4,2016/11/17 10:58:50+00,241,840,WEST SPRINGFIELD,Fairfax,stulad,840,WEST SPRINGFIELD,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),17,0.006612,McMullin / Johnson (I),McMullin / Johnson (I),55,0.021392,2.786030e+07,32432.609862,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240,2016/11/17 10:58:50+00,241,201,ARMSTRONG,Fairfax,stulad,201,ARMSTRONG,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),17,0.005461,McMullin / Johnson (I),McMullin / Johnson (I),51,0.016383,2.966847e+07,25881.328984,False
241,2016/11/17 10:58:50+00,241,233,NORTH POINT,Fairfax,stulad,233,NORTH POINT,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),17,0.005670,McMullin / Johnson (I),McMullin / Johnson (I),29,0.009673,4.827039e+07,36216.367462,False
242,2016/11/17 10:58:50+00,241,735,ROTONDA,Fairfax,stulad,735,ROTONDA,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),21,0.020369,McMullin / Johnson (I),McMullin / Johnson (I),14,0.013579,7.376918e+06,12615.457685,False
243,1970/01/01 00:00:00+00,241,99999,ABSENTEE,Fairfax,,99999,ABSENTEE,Fairfax County,,...,Stein / Baraka (G),726,,McMullin / Johnson (I),McMullin / Johnson (I),1880,,,,False


In [98]:
df.drop([1], inplace= True)
df.head()

Unnamed: 0,LASTUPDATE,OBJECTID,PRECINCTID,NAME,COUNTY,LASTEDITOR,PRECINCTID_1,NAME_1,TOWNSHIP,WINPARTY,...,PARTY4,NUMVOTES4,PERCVOTE4,CANDIDATE5,PARTY5,NUMVOTES5,PERCVOTE5,Shape__Area,Shape__Length,check
2,2016/11/17 10:58:50+00,241,421,KINGSTOWNE,Fairfax,stulad,421,KINGSTOWNE,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),18,0.005929,McMullin / Johnson (I),McMullin / Johnson (I),41,0.013505,22539610.0,23110.064289,False
3,2016/11/17 10:58:50+00,241,426,GREENSPRING,Fairfax,stulad,426,GREENSPRING,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),1,0.000666,McMullin / Johnson (I),McMullin / Johnson (I),17,0.011326,3962441.0,10051.875337,False
4,2016/11/17 10:58:50+00,241,840,WEST SPRINGFIELD,Fairfax,stulad,840,WEST SPRINGFIELD,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),17,0.006612,McMullin / Johnson (I),McMullin / Johnson (I),55,0.021392,27860300.0,32432.609862,False
5,2016/11/17 10:58:50+00,241,825,CHERRY RUN,Fairfax,stulad,825,CHERRY RUN,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),7,0.002902,McMullin / Johnson (I),McMullin / Johnson (I),41,0.016998,24459970.0,24305.884623,False
6,2016/11/17 10:58:50+00,241,812,VALLEY,Fairfax,stulad,812,VALLEY,Fairfax County,Clinton / Kaine (D),...,Stein / Baraka (G),10,0.004186,McMullin / Johnson (I),McMullin / Johnson (I),40,0.016743,24091910.0,28773.685005,False
