In [1]:
import numpy as np
import pandas as pd

In [3]:
df = pd.DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'])
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,a,b,c
0,-1.024287,-0.194671,1.365165
1,-1.114807,0.845638,-1.798821
2,-0.301628,-0.578813,-0.672559
3,-1.761237,0.636967,-0.518934
4,-0.53172,0.471739,0.236758
5,-1.438147,-0.452157,-0.320618
6,-0.537378,-0.627539,0.666639
7,1.136647,-1.910191,-1.873423


In [4]:
df.a.array

<PandasArray>
[-1.0242869926106277, -1.1148074782730535, -0.3016284692414169,
  -1.761236566097275, -0.5317199743543193, -1.4381465139243166,
 -0.5373777325433596,  1.1366472501784164]
Length: 8, dtype: float64

In [5]:
data = np.random.randint(0,7,size=50)
data

array([2, 1, 3, 2, 3, 6, 1, 0, 4, 1, 0, 6, 1, 4, 3, 1, 2, 1, 6, 5, 5, 3,
       3, 4, 6, 1, 3, 3, 3, 1, 1, 4, 0, 5, 0, 3, 3, 1, 2, 1, 2, 3, 2, 3,
       4, 0, 2, 6, 0, 4])

In [7]:
s = pd.Series(data)
s.value_counts()

3    12
1    11
2     7
4     6
0     6
6     5
5     3
dtype: int64

In [8]:
s5 = pd.Series([1,1,3,3,3,5,5,6,6,6])
s5.mode()

0    3
1    6
dtype: int64

In [10]:
df5 = pd.DataFrame({"A":np.random.randint(0,7,size=50),"B":np.random.randint(-10,15,size=50)})
df5.mode()

Unnamed: 0,A,B
0,3,-4


In [11]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0.692895
b    0.744609
c   -1.000412
d   -0.718385
e   -0.792689
dtype: float64

In [12]:
s.reindex(['e','b','f','d'])

e   -0.792689
b    0.744609
f         NaN
d   -0.718385
dtype: float64

In [13]:
df = pd.DataFrame({'one':pd.Series(np.random.randn(3), index=['a','b','c']),'two':pd.Series(np.random.rand(4),index=['a','b','c','d']),'three':pd.Series(np.random.randn(3),index=['b','c','d'])})
df.reindex(index=['c','f','b'], columns=['three','two','one'])

Unnamed: 0,three,two,one
c,0.494027,0.320893,1.44954
f,,,
b,0.82287,0.405397,-0.378162


In [14]:
df.reindex(['c','f','b'],axis='index')

Unnamed: 0,one,two,three
c,1.44954,0.320893,0.494027
f,,,
b,-0.378162,0.405397,0.82287


In [15]:
df

Unnamed: 0,one,two,three
a,-0.503534,0.209234,
b,-0.378162,0.405397,0.82287
c,1.44954,0.320893,0.494027
d,,0.063488,0.039752


In [17]:
df.drop(['a','d'],axis=0)

Unnamed: 0,one,two,three
b,-0.378162,0.405397,0.82287
c,1.44954,0.320893,0.494027


In [18]:
df.drop(['one'],axis=1)

Unnamed: 0,two,three
a,0.209234,
b,0.405397,0.82287
c,0.320893,0.494027
d,0.063488,0.039752


In [19]:
s

a    0.692895
b    0.744609
c   -1.000412
d   -0.718385
e   -0.792689
dtype: float64

In [20]:
s.rename(str.upper)

A    0.692895
B    0.744609
C   -1.000412
D   -0.718385
E   -0.792689
dtype: float64

In [21]:
df.rename(columns={'one':'foo','two':'bar'},index={'a':'apple','b':'banana','d':'durian'})

Unnamed: 0,foo,bar,three
apple,-0.503534,0.209234,
banana,-0.378162,0.405397,0.82287
c,1.44954,0.320893,0.494027
durian,,0.063488,0.039752


In [22]:
s = pd.Series(pd.date_range('20130101 09:10:12',periods=4))
s

0   2013-01-01 09:10:12
1   2013-01-02 09:10:12
2   2013-01-03 09:10:12
3   2013-01-04 09:10:12
dtype: datetime64[ns]

In [23]:
s.dt.hour

0    9
1    9
2    9
3    9
dtype: int64

In [24]:
stz = s.dt.tz_localize('US/Eastern')
stz

0   2013-01-01 09:10:12-05:00
1   2013-01-02 09:10:12-05:00
2   2013-01-03 09:10:12-05:00
3   2013-01-04 09:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [25]:
s.dt.tz_localize('UTC').dt.tz_convert("US/Eastern")

0   2013-01-01 04:10:12-05:00
1   2013-01-02 04:10:12-05:00
2   2013-01-03 04:10:12-05:00
3   2013-01-04 04:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [28]:
s = pd.Series(['A','B','C','Aaba','Baca',np.nan,'CABA','dog','cat'],dtype='string')
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5    <NA>
6    caba
7     dog
8     cat
dtype: string

In [30]:
df = pd.DataFrame({
    'one':pd.Series(np.random.randn(3),index=['a','b','c']),
    'two':pd.Series(np.random.randn(4),index=['a','b','c','d']),
    'three':pd.Series(np.random.randn(3),index=['b','c','d'])
})
unsorted_df = df.reindex(index=['a','d','c','b'],columns=['three','two','one'])

In [31]:
unsorted_df

Unnamed: 0,three,two,one
a,,1.0407,1.161933
d,-1.172705,-0.880448,
c,0.598009,-0.24727,1.043867
b,-0.177583,0.471493,1.426534


In [32]:
unsorted_df.sort_index()

Unnamed: 0,three,two,one
a,,1.0407,1.161933
b,-0.177583,0.471493,1.426534
c,0.598009,-0.24727,1.043867
d,-1.172705,-0.880448,


In [33]:
unsorted_df.sort_index(ascending=False)

Unnamed: 0,three,two,one
d,-1.172705,-0.880448,
c,0.598009,-0.24727,1.043867
b,-0.177583,0.471493,1.426534
a,,1.0407,1.161933


In [34]:
unsorted_df.sort_index(axis=1)

Unnamed: 0,one,three,two
a,1.161933,,1.0407
d,,-1.172705,-0.880448
c,1.043867,0.598009,-0.24727
b,1.426534,-0.177583,0.471493


In [35]:
unsorted_df['three'].sort_index()

a         NaN
b   -0.177583
c    0.598009
d   -1.172705
Name: three, dtype: float64

In [36]:
df1 = pd.DataFrame({
    'one':[2,1,1,1],
    'two':[1,3,2,4],
    'three':[5,4,3,2]
})
df1.sort_values(by='two')

Unnamed: 0,one,two,three
0,2,1,5
2,1,2,3
1,1,3,4
3,1,4,2


In [39]:
df1.sort_values(by=['one','two'])

Unnamed: 0,one,two,three
2,1,2,3
1,1,3,4
3,1,4,2
0,2,1,5


In [40]:
s[2]=np.nan

In [41]:
s.sort_values()

0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
2    <NA>
5    <NA>
dtype: string

In [42]:
s.sort_values(na_position='first')

2    <NA>
5    <NA>
0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
dtype: string

In [48]:
idx = pd.MultiIndex.from_tuples([('a',1),('a',2),('a',2),('b',2),('b',1),('b',1)])
idx.names = ['first','second']
df_multi = pd.DataFrame({'A':np.arange(6,0,-1)},index=idx)
df_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
a,1,6
a,2,5
a,2,4
b,2,3
b,1,2
b,1,1


In [49]:
df_multi.sort_values(by=['second','A'])

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
b,1,1
b,1,2
a,1,6
b,2,3
a,2,4
a,2,5
