### 1、导入三方库

In [1]:
import numpy as np
import pandas as pd

### 2、通过Series函数创建对象

In [2]:
s = pd.Series([1, 3, 4, np.nan, 6, 8])
s

0    1.0
1    3.0
2    4.0
3    NaN
4    6.0
5    8.0
dtype: float64

### 3、通过DataFrame创建对象（参数为Numpy数组）

In [3]:
dates = pd.date_range("20130101", periods=6) # 创建时间列表，参数1为起始日期，参数2为列表元素数量
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD")) # randn()返回值为标准正态分布中的值，参数为生成的数组维度
df

Unnamed: 0,A,B,C,D
2013-01-01,0.132573,-0.701502,-1.200405,0.555923
2013-01-02,-1.760077,-0.864887,0.664937,-1.509499
2013-01-03,-0.103424,0.853946,-0.057892,-0.608758
2013-01-04,-0.098113,-0.539654,-0.074772,0.566189
2013-01-05,0.526374,-0.723935,-0.978592,-0.684789
2013-01-06,-0.05274,0.867664,-1.937692,-0.463876


### 4、通过DataFrame创建对象（参数为字典）

In [5]:
data_dict = {
    "A": 1.0,
    "B": pd.Timestamp("20210809"), 
    "C": pd.Series(1, index=list(range(4)), dtype="float32"),  
    "D": np.array([3] * 4, dtype="int32"),                     
    "E": pd.Categorical(["test", "train", "test", "train"]),
    "F": "Foo",
}
df2 = pd.DataFrame(data_dict)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2021-08-09,1.0,3,test,Foo
1,1.0,2021-08-09,1.0,3,train,Foo
2,1.0,2021-08-09,1.0,3,test,Foo
3,1.0,2021-08-09,1.0,3,train,Foo


In [6]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

#### 注释

In [7]:
pd.Timestamp("20210809")   # 创建时间戳

Timestamp('2021-08-09 00:00:00')

In [8]:
pd.Series(1, index=list(range(4)), dtype="float32") # 创建一维数组

0    1.0
1    1.0
2    1.0
3    1.0
dtype: float32

In [9]:
np.array([3] * 4, dtype="int32")  # 数组创建

array([3, 3, 3, 3])

In [10]:
pd.Categorical(["test", "train", "test", "train"]) # 创建category数据类型

['test', 'train', 'test', 'train']
Categories (2, object): ['test', 'train']

### 5、DataFrame对象对应方法

df2.A                  df2.bool
df2.abs                df2.boxplot
df2.add                df2.C
df2.add_prefix         df2.clip
df2.add_suffix         df2.columns
df2.align              df2.copy
df2.all                df2.count
df2.any                df2.combine
df2.append             df2.D
df2.apply              df2.describe
df2.applymap           df2.diff
df2.B                  df2.duplicated

### 6、查看DataFrame对象中的数据

In [14]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.132573,-0.701502,-1.200405,0.555923
2013-01-02,-1.760077,-0.864887,0.664937,-1.509499
2013-01-03,-0.103424,0.853946,-0.057892,-0.608758
2013-01-04,-0.098113,-0.539654,-0.074772,0.566189
2013-01-05,0.526374,-0.723935,-0.978592,-0.684789
2013-01-06,-0.05274,0.867664,-1.937692,-0.463876


In [15]:
df.head() # 默认前5行数据

Unnamed: 0,A,B,C,D
2013-01-01,0.132573,-0.701502,-1.200405,0.555923
2013-01-02,-1.760077,-0.864887,0.664937,-1.509499
2013-01-03,-0.103424,0.853946,-0.057892,-0.608758
2013-01-04,-0.098113,-0.539654,-0.074772,0.566189
2013-01-05,0.526374,-0.723935,-0.978592,-0.684789


In [16]:
df.head(3) # 指定显示前行数

Unnamed: 0,A,B,C,D
2013-01-01,0.132573,-0.701502,-1.200405,0.555923
2013-01-02,-1.760077,-0.864887,0.664937,-1.509499
2013-01-03,-0.103424,0.853946,-0.057892,-0.608758


In [17]:
df.tail() # 默认显示数据最后5行

Unnamed: 0,A,B,C,D
2013-01-02,-1.760077,-0.864887,0.664937,-1.509499
2013-01-03,-0.103424,0.853946,-0.057892,-0.608758
2013-01-04,-0.098113,-0.539654,-0.074772,0.566189
2013-01-05,0.526374,-0.723935,-0.978592,-0.684789
2013-01-06,-0.05274,0.867664,-1.937692,-0.463876


In [18]:
df.tail(3) # 指定显示后3行

Unnamed: 0,A,B,C,D
2013-01-04,-0.098113,-0.539654,-0.074772,0.566189
2013-01-05,0.526374,-0.723935,-0.978592,-0.684789
2013-01-06,-0.05274,0.867664,-1.937692,-0.463876


In [19]:
df.index # 查看数据索引

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [20]:
df.columns # 查看数据列名

Index(['A', 'B', 'C', 'D'], dtype='object')