In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
dates = pd.date_range("20130101", periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.215028,-0.865727,-0.066369,0.270531
2013-01-02,0.8463,-0.283493,-1.157011,1.442664
2013-01-03,1.929353,0.28462,-0.166289,0.197944
2013-01-04,-0.884702,0.686929,-0.203601,0.893277
2013-01-05,0.273755,-1.079756,-0.095892,1.471039
2013-01-06,0.75461,0.891727,0.048473,0.786606


In [4]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [5]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [6]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [7]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.215028,-0.865727,-0.066369,0.270531
2013-01-02,0.8463,-0.283493,-1.157011,1.442664
2013-01-03,1.929353,0.28462,-0.166289,0.197944
2013-01-04,-0.884702,0.686929,-0.203601,0.893277
2013-01-05,0.273755,-1.079756,-0.095892,1.471039


In [8]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.884702,0.686929,-0.203601,0.893277
2013-01-05,0.273755,-1.079756,-0.095892,1.471039
2013-01-06,0.75461,0.891727,0.048473,0.786606


In [9]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [11]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.522391,-0.06095,-0.273448,0.843677
std,0.924617,0.815045,0.441608,0.548371
min,-0.884702,-1.079756,-1.157011,0.197944
25%,0.22971,-0.720168,-0.194273,0.399549
50%,0.514182,0.000564,-0.131091,0.839942
75%,0.823377,0.586352,-0.07375,1.305317
max,1.929353,0.891727,0.048473,1.471039


In [12]:
df.loc["20130102", ["A", "B"]]

A    0.846300
B   -0.283493
Name: 2013-01-02 00:00:00, dtype: float64

In [13]:
df.loc["20130102", "A"]

np.float64(0.8463001551082648)

In [14]:
tuples = list(
    zip(
        *[
            ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
            ["one", "two", "one", "two", "one", "two", "one", "two"],
        ]
    )
)

In [15]:
tuple

tuple

In [16]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])

In [17]:
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [18]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])

In [19]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.41586,0.03818
bar,two,0.139016,0.327931
baz,one,0.755823,-0.269905
baz,two,-0.590937,-0.273359
foo,one,0.665703,1.531283
foo,two,-0.624473,0.450916
qux,one,-0.551922,-0.749883
qux,two,-0.480704,2.846081


In [20]:
df2 = df[:4]

In [21]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.41586,0.03818
bar,two,0.139016,0.327931
baz,one,0.755823,-0.269905
baz,two,-0.590937,-0.273359


In [22]:
stacked = df2.stack()


In [23]:
stacked

first  second   
bar    one     A    0.415860
               B    0.038180
       two     A    0.139016
               B    0.327931
baz    one     A    0.755823
               B   -0.269905
       two     A   -0.590937
               B   -0.273359
dtype: float64

In [24]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,0.41586,0.139016
bar,B,0.03818,0.327931
baz,A,0.755823,-0.590937
baz,B,-0.269905,-0.273359


In [25]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.41586,0.03818
bar,two,0.139016,0.327931
baz,one,0.755823,-0.269905
baz,two,-0.590937,-0.273359


In [26]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.41586,0.755823
one,B,0.03818,-0.269905
two,A,0.139016,-0.590937
two,B,0.327931,-0.273359


In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 3,
        "B": ["A", "B", "C"] * 4,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D": np.random.randn(12),
        "E": np.random.randn(12),
    }
)

In [28]:
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.801011,-0.259687
1,one,B,foo,-0.354836,1.227994
2,two,C,foo,2.432343,-0.667911
3,three,A,bar,0.507252,-0.989693
4,one,B,bar,0.398091,-1.211797
5,one,C,bar,0.355963,0.5824
6,two,A,foo,-0.229651,0.547053
7,three,B,foo,-0.217825,-0.959808
8,one,C,foo,0.354201,0.052607
9,one,A,bar,-0.44839,-0.69207


In [29]:
df.pivot_table(values=["D","E"], index=["A", "B"], columns="C")

Unnamed: 0_level_0,Unnamed: 1_level_0,D,D,E,E
Unnamed: 0_level_1,C,bar,foo,bar,foo
A,B,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
one,A,-0.44839,-0.801011,-0.69207,-0.259687
one,B,0.398091,-0.354836,-1.211797,1.227994
one,C,0.355963,0.354201,0.5824,0.052607
three,A,0.507252,,-0.989693,
three,B,,-0.217825,,-0.959808
three,C,-0.116716,,-1.289794,
two,A,,-0.229651,,0.547053
two,B,1.006276,,0.070229,
two,C,,2.432343,,-0.667911


In [30]:
df.pivot_table(values="D", index=["A", "B"], columns=["C","E"])

Unnamed: 0_level_0,C,bar,bar,bar,bar,bar,bar,foo,foo,foo,foo,foo,foo
Unnamed: 0_level_1,E,-1.289794,-1.211797,-0.989693,-0.692070,0.070229,0.582400,-0.959808,-0.667911,-0.259687,0.052607,0.547053,1.227994
A,B,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
one,A,,,,-0.44839,,,,,-0.801011,,,
one,B,,0.398091,,,,,,,,,,-0.354836
one,C,,,,,,0.355963,,,,0.354201,,
three,A,,,0.507252,,,,,,,,,
three,B,,,,,,,-0.217825,,,,,
three,C,-0.116716,,,,,,,,,,,
two,A,,,,,,,,,,,-0.229651,
two,B,,,,,1.006276,,,,,,,
two,C,,,,,,,,2.432343,,,,


In [31]:
rng = pd.date_range("1/1/2012", periods=100, freq="S")

  rng = pd.date_range("1/1/2012", periods=100, freq="S")


In [32]:
rng = pd.date_range("1/1/2012", periods=100, freq="s")

In [33]:
rng

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               '2012-01-01 00:00:10', '2012-01-01 00:00:11',
               '2012-01-01 00:00:12', '2012-01-01 00:00:13',
               '2012-01-01 00:00:14', '2012-01-01 00:00:15',
               '2012-01-01 00:00:16', '2012-01-01 00:00:17',
               '2012-01-01 00:00:18', '2012-01-01 00:00:19',
               '2012-01-01 00:00:20', '2012-01-01 00:00:21',
               '2012-01-01 00:00:22', '2012-01-01 00:00:23',
               '2012-01-01 00:00:24', '2012-01-01 00:00:25',
               '2012-01-01 00:00:26', '2012-01-01 00:00:27',
               '2012-01-01 00:00:28', '2012-01-01 00:00:29',
               '2012-01-01 00:00:30', '2012-01-01 00:00:31',
               '2012-01-

In [34]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)

In [35]:
ts

2012-01-01 00:00:00    155
2012-01-01 00:00:01    298
2012-01-01 00:00:02    398
2012-01-01 00:00:03     28
2012-01-01 00:00:04    104
                      ... 
2012-01-01 00:01:35    443
2012-01-01 00:01:36     69
2012-01-01 00:01:37    314
2012-01-01 00:01:38    412
2012-01-01 00:01:39    110
Freq: s, Length: 100, dtype: int32

you can use markdown here

In [36]:
ts.resample("5Min").sum()

2012-01-01    26137
Freq: 5min, dtype: int32

In [37]:
rng = pd.date_range("3/6/2012 00:00", periods=5, freq="D")

In [38]:
rng

DatetimeIndex(['2012-03-06', '2012-03-07', '2012-03-08', '2012-03-09',
               '2012-03-10'],
              dtype='datetime64[ns]', freq='D')