In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
dates = pd.date_range("20130101", periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.986401,1.169657,-1.687242,2.392745
2013-01-02,0.808653,-0.623405,0.848104,0.043162
2013-01-03,1.004945,1.660898,-1.541642,-0.482085
2013-01-04,-1.120391,-0.349629,-0.703114,-1.182272
2013-01-05,0.467696,0.804033,1.059557,-0.258913
2013-01-06,0.584083,0.313591,0.416921,1.545715


In [6]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [7]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [9]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [11]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.986401,1.169657,-1.687242,2.392745
2013-01-02,0.808653,-0.623405,0.848104,0.043162
2013-01-03,1.004945,1.660898,-1.541642,-0.482085
2013-01-04,-1.120391,-0.349629,-0.703114,-1.182272
2013-01-05,0.467696,0.804033,1.059557,-0.258913


In [12]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-1.120391,-0.349629,-0.703114,-1.182272
2013-01-05,0.467696,0.804033,1.059557,-0.258913
2013-01-06,0.584083,0.313591,0.416921,1.545715


In [13]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [14]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [15]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.455231,0.495857,-0.267903,0.343059
std,0.801071,0.883981,1.208548,1.349622
min,-1.120391,-0.623405,-1.687242,-1.182272
25%,0.496793,-0.183824,-1.33201,-0.426292
50%,0.696368,0.558812,-0.143097,-0.107875
75%,0.941964,1.078251,0.740308,1.170077
max,1.004945,1.660898,1.059557,2.392745


In [16]:
df.loc["20130102", ["A", "B"]]

A    0.808653
B   -0.623405
Name: 2013-01-02 00:00:00, dtype: float64

In [17]:
df.loc["20130102", "A"]

np.float64(0.8086530134391878)

In [18]:
tuples = list(
    zip(
        *[
            ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
            ["one", "two", "one", "two", "one", "two", "one", "two"],
        ]
    )
)

In [19]:
tuple

tuple

In [20]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])

In [21]:
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [22]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])

In [23]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.241657,1.047524
bar,two,0.549887,0.525015
baz,one,-1.452207,0.701823
baz,two,-1.729104,0.280656
foo,one,1.110775,-0.242792
foo,two,-1.285232,-1.211176
qux,one,-1.051101,-0.088188
qux,two,0.598638,-0.065422


In [24]:
df2 = df[:4]

In [25]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.241657,1.047524
bar,two,0.549887,0.525015
baz,one,-1.452207,0.701823
baz,two,-1.729104,0.280656


In [26]:
stacked = df2.stack()


In [27]:
stacked

first  second   
bar    one     A   -0.241657
               B    1.047524
       two     A    0.549887
               B    0.525015
baz    one     A   -1.452207
               B    0.701823
       two     A   -1.729104
               B    0.280656
dtype: float64

In [28]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.241657,0.549887
bar,B,1.047524,0.525015
baz,A,-1.452207,-1.729104
baz,B,0.701823,0.280656


In [29]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.241657,1.047524
bar,two,0.549887,0.525015
baz,one,-1.452207,0.701823
baz,two,-1.729104,0.280656


In [30]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.241657,-1.452207
one,B,1.047524,0.701823
two,A,0.549887,-1.729104
two,B,0.525015,0.280656


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 3,
        "B": ["A", "B", "C"] * 4,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D": np.random.randn(12),
        "E": np.random.randn(12),
    }
)

In [4]:
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,1.712689,1.001185
1,one,B,foo,-0.412858,-0.212781
2,two,C,foo,-1.129232,-1.333904
3,three,A,bar,-1.037562,0.131382
4,one,B,bar,-0.029766,-0.942997
5,one,C,bar,0.139062,-2.176121
6,two,A,foo,-0.538948,-3.02307
7,three,B,foo,1.071507,-0.557921
8,one,C,foo,2.019069,-0.043509
9,one,A,bar,0.841421,1.534635


In [5]:
df.pivot_table(values=["D","E"], index=["A", "B"], columns="C")

Unnamed: 0_level_0,Unnamed: 1_level_0,D,D,E,E
Unnamed: 0_level_1,C,bar,foo,bar,foo
A,B,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
one,A,0.841421,1.712689,1.534635,1.001185
one,B,-0.029766,-0.412858,-0.942997,-0.212781
one,C,0.139062,2.019069,-2.176121,-0.043509
three,A,-1.037562,,0.131382,
three,B,,1.071507,,-0.557921
three,C,-0.359004,,-1.60299,
two,A,,-0.538948,,-3.02307
two,B,0.670676,,1.134894,
two,C,,-1.129232,,-1.333904


In [6]:
df.pivot_table(values="D", index=["A", "B"], columns=["C","E"])

Unnamed: 0_level_0,C,bar,bar,bar,bar,bar,bar,foo,foo,foo,foo,foo,foo
Unnamed: 0_level_1,E,-2.176121,-1.602990,-0.942997,0.131382,1.134894,1.534635,-3.023070,-1.333904,-0.557921,-0.212781,-0.043509,1.001185
A,B,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
one,A,,,,,,0.841421,,,,,,1.712689
one,B,,,-0.029766,,,,,,,-0.412858,,
one,C,0.139062,,,,,,,,,,2.019069,
three,A,,,,-1.037562,,,,,,,,
three,B,,,,,,,,,1.071507,,,
three,C,,-0.359004,,,,,,,,,,
two,A,,,,,,,-0.538948,,,,,
two,B,,,,,0.670676,,,,,,,
two,C,,,,,,,,-1.129232,,,,


In [7]:
rng = pd.date_range("1/1/2012", periods=100, freq="S")

  rng = pd.date_range("1/1/2012", periods=100, freq="S")


In [8]:
rng = pd.date_range("1/1/2012", periods=100, freq="s")

In [None]:
rng

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               '2012-01-01 00:00:10', '2012-01-01 00:00:11',
               '2012-01-01 00:00:12', '2012-01-01 00:00:13',
               '2012-01-01 00:00:14', '2012-01-01 00:00:15',
               '2012-01-01 00:00:16', '2012-01-01 00:00:17',
               '2012-01-01 00:00:18', '2012-01-01 00:00:19',
               '2012-01-01 00:00:20', '2012-01-01 00:00:21',
               '2012-01-01 00:00:22', '2012-01-01 00:00:23',
               '2012-01-01 00:00:24', '2012-01-01 00:00:25',
               '2012-01-01 00:00:26', '2012-01-01 00:00:27',
               '2012-01-01 00:00:28', '2012-01-01 00:00:29',
               '2012-01-01 00:00:30', '2012-01-01 00:00:31',
               '2012-01-

: 