# Series
## Creating

In [1]:
import pandas as pd

In [2]:
s = pd.Series(['foo', 'bar', 'baz'])

In [3]:
s

0    foo
1    bar
2    baz
dtype: object

In [4]:
s.index

RangeIndex(start=0, stop=3, step=1)

In [5]:
s.values

array(['foo', 'bar', 'baz'], dtype=object)

In [6]:
s = pd.Series([4, 5, 6], list('abc'))

In [7]:
s

a    4
b    5
c    6
dtype: int64

In [8]:
s = pd.Series([4, 5, 6], index=list('abc'))

In [9]:
s

a    4
b    5
c    6
dtype: int64

In [10]:
s = pd.Series({'a': 4, 'b': 5, 'c':6})

In [11]:
s

a    4
b    5
c    6
dtype: int64

In [12]:
s = pd.Series({'a': 4, 'b': 5, 'c':6}, ['a', 'b'])

In [13]:
s

a    4
b    5
dtype: int64

In [14]:
s = pd.Series({'a': 4}, ['a', 'b'])

In [15]:
s


a    4.0
b    NaN
dtype: float64

In [16]:
s

a    4.0
b    NaN
dtype: float64

In [17]:

s = pd.Series({'a': 4, 'b': 5, 'c':6} )

In [18]:
s['a']

4

In [19]:
s[['b', 'a', 'a']]

b    5
a    4
a    4
dtype: int64

In [20]:

s_dup = s[['b', 'a', 'a']]

In [21]:
s_dup['a']

a    4
a    4
dtype: int64

In [22]:
s

a    4
b    5
c    6
dtype: int64

In [23]:
s[1]

5

In [24]:
s = pd.Series({'a': 4, 'b': 5, 'c':6, 'd':7, 'e':8} )

In [25]:
s

a    4
b    5
c    6
d    7
e    8
dtype: int64

In [26]:
s[:2]

a    4
b    5
dtype: int64

In [27]:
s['a':'c']

a    4
b    5
c    6
dtype: int64

In [28]:
s

a    4
b    5
c    6
d    7
e    8
dtype: int64

In [29]:
s + 1

a    5
b    6
c    7
d    8
e    9
dtype: int64

In [30]:
s * 2

a     8
b    10
c    12
d    14
e    16
dtype: int64

In [31]:
s1 = pd.Series([1, 2], ['a', 'b'])

In [32]:
s1

a    1
b    2
dtype: int64

In [33]:
s2 = pd.Series([8, 9], ['b', 'a'])

In [34]:
s2

b    8
a    9
dtype: int64

In [35]:
s1 + s2

a    10
b    10
dtype: int64

In [36]:
s1

a    1
b    2
dtype: int64

In [37]:
s2 = pd.Series([8], ['b'])

In [38]:
s1 + s2

a     NaN
b    10.0
dtype: float64

In [39]:
s

a    4
b    5
c    6
d    7
e    8
dtype: int64

In [40]:
s > 5

a    False
b    False
c     True
d     True
e     True
dtype: bool

In [41]:
s1 = pd.Series([1, 2, 'foo'])

In [43]:
s

a    4
b    5
c    6
d    7
e    8
dtype: int64

In [44]:
s[[True, False, False, False, True]]

a    4
e    8
dtype: int64

In [45]:
s = pd.Series([5, 6, 7, 8], index=['A', 'B', 'C', 'D'])

In [46]:
s % 2

A    1
B    0
C    1
D    0
dtype: int64

In [47]:
s % 2 == 1

A     True
B    False
C     True
D    False
dtype: bool

In [48]:
s[s % 2 == 1]

A    5
C    7
dtype: int64

# DataFrames

## Creating

In [50]:
df = pd.DataFrame([[1, 2], [3, 4]])

In [51]:
df

Unnamed: 0,0,1
0,1,2
1,3,4


In [52]:
df = pd.DataFrame([[1, 2], [3, 4]], columns=['cA', 'cB'])

In [53]:
df

Unnamed: 0,cA,cB
0,1,2
1,3,4


In [55]:
df = pd.DataFrame([[1, 2], [3, 4]], ['row1', 'row2'], columns=['cA', 'cB'])

In [56]:
df

Unnamed: 0,cA,cB
row1,1,2
row2,3,4


In [57]:
d = pd.DataFrame({
    "colA": {'r1': 6, 'r2': 7},
    "colB": {'r1': 8, 'r2': 9}
})

In [58]:
d

Unnamed: 0,colA,colB
r1,6,8
r2,7,9


In [59]:
d = pd.DataFrame({
    "colA": {'r1': 6, 'r2': 7},
    "colB": {'r1': 8 }
})

In [60]:
d

Unnamed: 0,colA,colB
r1,6,8.0
r2,7,


In [61]:
d

Unnamed: 0,colA,colB
r1,6,8.0
r2,7,


In [62]:
d.values

array([[ 6.,  8.],
       [ 7., nan]])

In [63]:
d.index

Index(['r1', 'r2'], dtype='object')

In [64]:
d.columns

Index(['colA', 'colB'], dtype='object')

In [65]:
pd.DataFrame([[4, 5, 6], [7, 8, 9]],
    columns=['foo', 'bar', 'baz'])

Unnamed: 0,foo,bar,baz
0,4,5,6
1,7,8,9


In [66]:
d = pd.DataFrame([[4, 5, 6], [7, 8, 9]],
    columns=['foo', 'bar', 'baz'])

In [67]:
d['bar']

0    5
1    8
Name: bar, dtype: int64

In [68]:
type(d['bar'])

pandas.core.series.Series

In [69]:
d[['bar', 'bar', 'baz']]

Unnamed: 0,bar,bar.1,baz
0,5,5,6
1,8,8,9


In [70]:

type(d[['bar', 'bar', 'baz']])

pandas.core.frame.DataFrame

In [71]:
d

Unnamed: 0,foo,bar,baz
0,4,5,6
1,7,8,9


In [73]:
# d[['foo', 'bar', 'qux'] # key error!

SyntaxError: unexpected EOF while parsing (<ipython-input-73-e94194fa7352>, line 1)

In [74]:
d = pd.DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]])

In [75]:
d

Unnamed: 0,0,1
0,1,2
1,3,4
2,5,6
3,7,8


In [76]:
d[:2]

Unnamed: 0,0,1
0,1,2
1,3,4


In [77]:
d = pd.DataFrame({"cA": {'r1': 1, 'r2': 2, 'r3': 3},
                  "cB": {'r1': 4, 'r2': 5, 'r3': 6},
                  "cC": {'r1': 7, 'r2': 8, 'r3': 9}})

In [78]:
d

Unnamed: 0,cA,cB,cC
r1,1,4,7
r2,2,5,8
r3,3,6,9


In [79]:
d[[True, False, True]]

Unnamed: 0,cA,cB,cC
r1,1,4,7
r3,3,6,9


In [81]:
# d[[True, True]] # ValueError

In [82]:
d

Unnamed: 0,cA,cB,cC
r1,1,4,7
r2,2,5,8
r3,3,6,9


### give me only the rows where cA is less than 3

In [84]:
d['cA']

r1    1
r2    2
r3    3
Name: cA, dtype: int64

In [85]:
d['cA'] < 3

r1     True
r2     True
r3    False
Name: cA, dtype: bool

In [86]:
d[d['cA'] < 3]

Unnamed: 0,cA,cB,cC
r1,1,4,7
r2,2,5,8


In [87]:
d

Unnamed: 0,cA,cB,cC
r1,1,4,7
r2,2,5,8
r3,3,6,9


In [88]:
d['cA'] = 100

In [89]:
d

Unnamed: 0,cA,cB,cC
r1,100,4,7
r2,100,5,8
r3,100,6,9


In [90]:
d

Unnamed: 0,cA,cB,cC
r1,100,4,7
r2,100,5,8
r3,100,6,9


In [91]:
d['cA'] = [1, 2, 3]

In [92]:
d

Unnamed: 0,cA,cB,cC
r1,1,4,7
r2,2,5,8
r3,3,6,9


In [93]:
df = pd.DataFrame([[4, 5, 6], [7, 8, 9]],
    index=['r1', 'r2'],
    columns=['foo', 'bar', 'baz'])

In [94]:
df

Unnamed: 0,foo,bar,baz
r1,4,5,6
r2,7,8,9


In [95]:
df['foo'] = pd.Series([100, 200], ['r1', 'r3'])

In [96]:
df

Unnamed: 0,foo,bar,baz
r1,100.0,5,6
r2,,8,9


In [98]:
# df['foo'] = [12] 

In [99]:
d = pd.DataFrame({"cA": {'r1': 1, 'r2': 2, 'r3': 3},
                  "cB": {'r1': 4, 'r2': 5, 'r3': 6},
                  "cC": {'r1': 7, 'r2': 8, 'r3': 9}})
d[d['cA'] > 1] = 0

In [100]:
d

Unnamed: 0,cA,cB,cC
r1,1,4,7
r2,0,0,0
r3,0,0,0


In [101]:
d

Unnamed: 0,cA,cB,cC
r1,1,4,7
r2,0,0,0
r3,0,0,0


In [102]:
d['cD'] = [1, 2, 3]

In [103]:
d

Unnamed: 0,cA,cB,cC,cD
r1,1,4,7,1
r2,0,0,0,2
r3,0,0,0,3


In [104]:
df = pd.DataFrame([[4, 5, 6], [7, 8, 9]],
    columns=['foo', 'bar', 'baz'])

In [106]:
#df.drop('baz')
df

Unnamed: 0,foo,bar,baz
0,4,5,6
1,7,8,9


In [107]:
import numpy as np

In [109]:
arr = np.arange(12).reshape((3, 4))

In [110]:
arr.shape

(3, 4)

In [111]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [112]:
df

Unnamed: 0,foo,bar,baz
0,4,5,6
1,7,8,9


In [114]:
df.values

array([[4, 5, 6],
       [7, 8, 9]])

In [116]:
df.drop('baz', axis=1)

Unnamed: 0,foo,bar
0,4,5
1,7,8


In [117]:
del df['bar']

In [118]:
df

Unnamed: 0,foo,baz
0,4,6
1,7,9


In [120]:
d = pd.DataFrame({"cA": {'r1': 1, 'r2': 2, 'r3': 3},
                  "cB": {'r1': 4, 'r2': 5, 'r3': 6},
                  "cC": {'r1': 7, 'r2': 8, 'r3': 9}})

In [121]:
d

Unnamed: 0,cA,cB,cC
r1,1,4,7
r2,2,5,8
r3,3,6,9


In [125]:
d.loc['r1':'r2', 'cC']

r1    7
r2    8
Name: cC, dtype: int64

In [126]:
pd.read_csv("https://data.cityofnewyork.us/api/views/rsgh-akpg/rows.csv?accessType=DOWNLOAD")

Unnamed: 0,UniqueID,DateOfBite,Species,Breed,Age,Gender,SpayNeuter,Borough,ZipCode
0,8140,December 02 2015,DOG,Pug,7,F,True,Staten Island,
1,1,January 27 2015,DOG,Jack Russ,11,M,False,Brooklyn,11217
2,2,January 25 2015,DOG,"Mastiff, Bull",,U,False,Brooklyn,
3,3,January 20 2015,DOG,PIT BULL/GOLDEN RETRIVE X,,U,False,Brooklyn,11236
4,4,December 26 2014,DOG,Doberman Pinscher,,M,False,Brooklyn,11204
5,5,January 29 2015,DOG,Pit Bull,,U,False,Brooklyn,
6,6,January 31 2015,DOG,Chihuahua Crossbreed,3,M,False,Brooklyn,
7,7,January 31 2015,DOG,German Shepherd Crossbreed,,F,False,Brooklyn,11220
8,8,January 23 2015,DOG,German Shepherd,4,M,False,Brooklyn,11229
9,9,February 04 2015,DOG,Pomeranian,1,F,False,Brooklyn,11216
