In [2]:
import numpy as np
import pandas as pd

In [3]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a   -2.342959
b   -1.380177
c   -0.070481
d    0.761640
e   -0.083464
dtype: float64

In [4]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [5]:
# No provided indexes- so default

d = pd.Series(np.random.randn(5))
d

0   -0.438851
1    1.252719
2   -0.319492
3   -0.209894
4    1.091605
dtype: float64

In [6]:
d.index

RangeIndex(start=0, stop=5, step=1)

In [7]:
d = {'b' : 1, 'a' : 0, 'c' : 2}

In [8]:
pd.Series(d)

b    1
a    0
c    2
dtype: int64

In [9]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}

In [10]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [11]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [12]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [13]:
s[0]

-2.342959490629094

In [14]:
s[:3]

a   -2.342959
b   -1.380177
c   -0.070481
dtype: float64

In [15]:
s[s > s.median()]

c   -0.070481
d    0.761640
dtype: float64

In [16]:
s[[4, 3, 1]]

e   -0.083464
d    0.761640
b   -1.380177
dtype: float64

In [17]:
np.exp(s)

a    0.096043
b    0.251534
c    0.931945
d    2.141786
e    0.919924
dtype: float64

In [18]:
s['a']

-2.342959490629094

In [19]:
s['e'] = 12.

In [20]:
s

a    -2.342959
b    -1.380177
c    -0.070481
d     0.761640
e    12.000000
dtype: float64

In [21]:
'e' in s

True

In [22]:
'f' in s

False

In [23]:
s['f']

KeyError: 'f'

In [24]:
s.get('f')

In [25]:
s.get('f', np.nan)

nan

In [26]:
s + s

a    -4.685919
b    -2.760354
c    -0.140962
d     1.523280
e    24.000000
dtype: float64

In [27]:
s * 2

a    -4.685919
b    -2.760354
c    -0.140962
d     1.523280
e    24.000000
dtype: float64

In [28]:
np.exp(s)

a         0.096043
b         0.251534
c         0.931945
d         2.141786
e    162754.791419
dtype: float64

In [29]:
s[1:]

b    -1.380177
c    -0.070481
d     0.761640
e    12.000000
dtype: float64

In [30]:
s[:-1]

a   -2.342959
b   -1.380177
c   -0.070481
d    0.761640
dtype: float64

In [31]:
# A and e not defined which is why you see the Nans
s[1:] + s[:-1]

a         NaN
b   -2.760354
c   -0.140962
d    1.523280
e         NaN
dtype: float64

In [32]:
s = pd.Series(np.random.randn(5), name='something')

In [33]:
s

0    0.682992
1   -0.007713
2    0.590717
3   -2.250118
4   -0.300470
Name: something, dtype: float64

In [34]:
s.name

'something'

In [35]:
s2 = s.rename("different")

In [36]:
s2.name

'different'

In [37]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
   ....:      'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

In [38]:
df = pd.DataFrame(d)

In [39]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [40]:
# So now the entries in the dict will only show up which have d, b and a as indices
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [41]:
# We don't have a three column
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [42]:
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}

In [43]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [44]:
# Now we're just giving it an index- as we formerly did not have an index
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [45]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])
data

array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [46]:
data[:] = [(1, 2., 'Hello'), (2, 3., "World")]

In [47]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [48]:
# Now we're just giving it an index
pd.DataFrame(data, index=['first', 'second'])

Unnamed: 0,A,B,C
first,1,2.0,b'Hello'
second,2,3.0,b'World'


In [49]:
pd.DataFrame(data, columns=['C', 'A', 'B'])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


In [50]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

In [51]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [52]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [53]:
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
   ...:                           'foo', 'bar', 'foo', 'foo'],
   ...:                    'B' : ['one', 'one', 'two', 'three',
   ...:                           'two', 'two', 'one', 'three'],
   ...:                    'C' : np.random.randn(8),
   ...:                    'D' : np.random.randn(8)})
   ...: 

In [54]:
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.130886,-0.002058
1,bar,one,-0.74759,0.280769
2,foo,two,-0.357793,0.064128
3,bar,three,-0.05519,-0.136363
4,foo,two,-0.438044,-0.323922
5,bar,two,-0.949941,0.168522
6,foo,one,-0.154298,-0.667907
7,foo,three,-1.329419,0.988001


In [55]:
grouped = df.groupby('A')
grouped

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x7f614c81b8d0>

In [56]:
grouped = df.groupby(['A', 'B'])
grouped

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x7f614c81b5f8>

In [57]:
lst = [1, 2, 3, 1, 2, 3]

In [58]:
s = pd.Series([1, 2, 3, 10, 20, 30], lst)

In [59]:
grouped = s.groupby(level=0)

In [60]:
grouped.first()

1    1
2    2
3    3
dtype: int64

In [61]:
grouped.last()

1    10
2    20
3    30
dtype: int64

In [62]:
grouped.sum()

1    11
2    22
3    33
dtype: int64

In [63]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])
data

array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [64]:
data[:] = [(1,2.,'Hello'), (2,3.,"World")]
data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [65]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [66]:
pd.DataFrame(data, index=['first', 'second'])

Unnamed: 0,A,B,C
first,1,2.0,b'Hello'
second,2,3.0,b'World'


In [67]:
pd.DataFrame(data, columns=['C', 'A', 'B'])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


In [68]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

In [69]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [70]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [71]:
pd.DataFrame(data2, columns=['a', 'b'])

Unnamed: 0,a,b
0,1,2
1,5,10


In [72]:
pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
   ....:               ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
   ....:               ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
   ....:               ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8},
   ....:               ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}})
   ....: 

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


In [73]:
pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [74]:
pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]),
   ....:                        orient='index', columns=['one', 'two', 'three'])
   ....: 

Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


In [75]:
data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [76]:
pd.DataFrame.from_records(data, index='C')

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,3.0


In [77]:
a = pd.DataFrame([2, 1, 1, np.nan], index=['a', 'b', 'c', 'd'],
...                  columns=['one'])
a

Unnamed: 0,one
a,2.0
b,1.0
c,1.0
d,


In [78]:
b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan],
...                       two=[3, 2, np.nan, 2]),
...                  index=['a', 'b', 'd', 'e'])
b

Unnamed: 0,one,two
a,1.0,3.0
b,,2.0
d,1.0,
e,,2.0


In [79]:
a.sub(b, fill_value=0)

Unnamed: 0,one,two
a,1.0,-3.0
b,1.0,-2.0
c,1.0,
d,-1.0,
e,,-2.0


In [80]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

In [81]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [82]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [83]:
pd.DataFrame(data2, columns=['a', 'b'])

Unnamed: 0,a,b
0,1,2
1,5,10


In [84]:
pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
   ....:               ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
   ....:               ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
   ....:               ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8},
   ....:               ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}})
   ....: 

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


In [85]:
pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [86]:
# Orienting by index- puts A, B as the indices rather than columns like in the above example
pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]), orient='index', columns=['one', 'two', 'three'])

Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


In [87]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])
data

array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [88]:
pd.DataFrame.from_records(data, index='C')

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'',0,0.0
b'',0,0.0


In [89]:
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.130886,-0.002058
1,bar,one,-0.74759,0.280769
2,foo,two,-0.357793,0.064128
3,bar,three,-0.05519,-0.136363
4,foo,two,-0.438044,-0.323922
5,bar,two,-0.949941,0.168522
6,foo,one,-0.154298,-0.667907
7,foo,three,-1.329419,0.988001


In [90]:
df['one']


KeyError: 'one'

In [91]:
df['three'] = df['one'] * df['two']
df['flag'] = df['one'] > 2
df['three']

KeyError: 'one'

In [92]:
whos

Variable   Type             Data/Info
-------------------------------------
a          DataFrame           one\na  2.0\nb  1.0\nc  1.0\nd  NaN
b          DataFrame           one  two\na  1.0  3.0\<...>nd  1.0  NaN\ne  NaN  2.0
d          dict             n=2
data       ndarray          2: 2 elems, type `[('A', '<i4'), ('B', '<f4'), ('C', 'S10')]`, 36 bytes
data2      list             n=2
df         DataFrame             A      B         C  <...>three -1.329419  0.988001
grouped    SeriesGroupBy    <pandas.core.groupby.grou<...>object at 0x7f614c81b128>
lst        list             n=6
np         module           <module 'numpy' from '/us<...>kages/numpy/__init__.py'>
pd         module           <module 'pandas' from '/u<...>ages/pandas/__init__.py'>
s          Series           1     1\n2     2\n3     3<...>20\n3    30\ndtype: int64
s2         Series           0    0.682992\n1   -0.007<...>different, dtype: float64


In [94]:
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.130886,-0.002058
1,bar,one,-0.74759,0.280769
2,foo,two,-0.357793,0.064128
3,bar,three,-0.05519,-0.136363
4,foo,two,-0.438044,-0.323922
5,bar,two,-0.949941,0.168522
6,foo,one,-0.154298,-0.667907
7,foo,three,-1.329419,0.988001


In [97]:
# Index into the dataframe by index
df.loc[0]

A           foo
B           one
C     -0.130886
D   -0.00205757
Name: 0, dtype: object

In [98]:
# Index into the dataframe by index- except now the index is an integer
df.iloc[0]

A           foo
B           one
C     -0.130886
D   -0.00205757
Name: 0, dtype: object

In [100]:
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.130886,-0.002058
1,bar,one,-0.74759,0.280769
2,foo,two,-0.357793,0.064128
3,bar,three,-0.05519,-0.136363
4,foo,two,-0.438044,-0.323922
5,bar,two,-0.949941,0.168522
6,foo,one,-0.154298,-0.667907
7,foo,three,-1.329419,0.988001


In [108]:
df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

In [104]:
# Keys correspond to columns, indices are above, values are in a list
d = {'one': [1.0, 2.0, 3.0, 4.0], 'two': [4.0, 3.0, 2.0, 1.0]}

In [105]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [109]:
np.asarray(df)

array([[1., 4.],
       [2., 3.],
       [3., 2.],
       [4., 1.]])

In [110]:
np.exp(df)

Unnamed: 0,one,two
a,2.718282,54.59815
b,7.389056,20.085537
c,20.085537,7.389056
d,54.59815,2.718282


In [111]:
df.T.dot(df)

Unnamed: 0,one,two
one,30.0,20.0
two,20.0,30.0
