In [1]:
import numpy as np
import pandas as pd

In [2]:
# 创建一个符合正态分布的10个股票5天的涨跌幅数据:
stock_change = np.random.normal(loc=0, scale=1.0, size=[10, 5])

In [3]:
stock_change

array([[-5.10086244e-01, -1.30445276e+00,  7.31938251e-01,
         1.27815756e+00,  5.76020745e-01],
       [-5.97502069e-01, -7.02154194e-01,  1.42922305e+00,
         1.64269977e-01,  4.02010906e-01],
       [-2.15858650e+00, -1.38129157e+00,  4.23083133e-01,
         4.69593404e-01,  7.98054625e-01],
       [ 4.65521203e-01,  1.37723882e+00,  4.60134860e-02,
         1.49147284e+00, -5.50973099e-01],
       [-6.48582905e-01, -1.11061535e+00,  8.75127630e-02,
        -1.18758800e+00, -1.21861456e+00],
       [-1.37291736e-01,  3.79712782e-01,  5.23579996e-03,
         1.36294265e+00, -3.51084399e-01],
       [-7.62029524e-01, -1.53686193e-02,  3.89791103e-01,
        -4.80809059e-01, -1.34350574e+00],
       [-1.05573136e-01, -1.16584547e+00,  7.89067146e-01,
         1.64566000e+00,  4.75926746e-01],
       [ 1.68850850e-01, -1.69036481e+00, -5.47058006e-01,
         2.54388313e-01, -6.28376997e-01],
       [-5.19738980e-01, -4.89809878e-01, -2.67100827e-02,
         3.47952806e-04

### DataFrame
        结构：既有行索引，又有列索引的二维数组 -> 类似二维表
        DataFrame对象既有行索引, 又有列索引
            - 行索引, 表明不同行, 横向索引, 叫index
            - 列索引, 表明不同列, 纵向索引, 叫columms

In [4]:
# 转换成DataFrame数据类型的数据：
pd.DataFrame(stock_change)
# 我们没有指定索引, 所以它默认就是0~...

Unnamed: 0,0,1,2,3,4
0,-0.510086,-1.304453,0.731938,1.278158,0.576021
1,-0.597502,-0.702154,1.429223,0.16427,0.402011
2,-2.158586,-1.381292,0.423083,0.469593,0.798055
3,0.465521,1.377239,0.046013,1.491473,-0.550973
4,-0.648583,-1.110615,0.087513,-1.187588,-1.218615
5,-0.137292,0.379713,0.005236,1.362943,-0.351084
6,-0.76203,-0.015369,0.389791,-0.480809,-1.343506
7,-0.105573,-1.165845,0.789067,1.64566,0.475927
8,0.168851,-1.690365,-0.547058,0.254388,-0.628377
9,-0.519739,-0.48981,-0.02671,0.000348,-0.810359


In [5]:
# 添加行索引：
stock_change_index = [f"股票{i}" for i in range(10)]
pd.DataFrame(stock_change, index=stock_change_index)

Unnamed: 0,0,1,2,3,4
股票0,-0.510086,-1.304453,0.731938,1.278158,0.576021
股票1,-0.597502,-0.702154,1.429223,0.16427,0.402011
股票2,-2.158586,-1.381292,0.423083,0.469593,0.798055
股票3,0.465521,1.377239,0.046013,1.491473,-0.550973
股票4,-0.648583,-1.110615,0.087513,-1.187588,-1.218615
股票5,-0.137292,0.379713,0.005236,1.362943,-0.351084
股票6,-0.76203,-0.015369,0.389791,-0.480809,-1.343506
股票7,-0.105573,-1.165845,0.789067,1.64566,0.475927
股票8,0.168851,-1.690365,-0.547058,0.254388,-0.628377
股票9,-0.519739,-0.48981,-0.02671,0.000348,-0.810359


In [6]:
# 添加列索引
# date_range是专门处理日期的函数
date = pd.date_range(start="20180101", periods=5, freq="B")
date

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05'],
              dtype='datetime64[ns]', freq='B')

In [7]:
data = pd.DataFrame(stock_change, index=stock_change_index, columns=date)
# 成功将列索引添加上去

In [8]:
data.head()

Unnamed: 0,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05
股票0,-0.510086,-1.304453,0.731938,1.278158,0.576021
股票1,-0.597502,-0.702154,1.429223,0.16427,0.402011
股票2,-2.158586,-1.381292,0.423083,0.469593,0.798055
股票3,0.465521,1.377239,0.046013,1.491473,-0.550973
股票4,-0.648583,-1.110615,0.087513,-1.187588,-1.218615


### DataFrame的属性:
        shape -> 形状
        index -> 行索引列表
        columns -> 列索引列表
        values -> 直接获取其中array的值 (刨除行索引和列索引的值)
        T -> 转置
### 方法:
        head() 返回前几行, 默认是前五行
        tail() 返回后几行, 默认是后五行

In [9]:
data

Unnamed: 0,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05
股票0,-0.510086,-1.304453,0.731938,1.278158,0.576021
股票1,-0.597502,-0.702154,1.429223,0.16427,0.402011
股票2,-2.158586,-1.381292,0.423083,0.469593,0.798055
股票3,0.465521,1.377239,0.046013,1.491473,-0.550973
股票4,-0.648583,-1.110615,0.087513,-1.187588,-1.218615
股票5,-0.137292,0.379713,0.005236,1.362943,-0.351084
股票6,-0.76203,-0.015369,0.389791,-0.480809,-1.343506
股票7,-0.105573,-1.165845,0.789067,1.64566,0.475927
股票8,0.168851,-1.690365,-0.547058,0.254388,-0.628377
股票9,-0.519739,-0.48981,-0.02671,0.000348,-0.810359


In [10]:
data.shape # data的形状

(10, 5)

In [11]:
data.index # 行索引, 代表一行

Index(['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7', '股票8', '股票9'], dtype='object')

In [12]:
data.columns # 列索引, 表示一列

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05'],
              dtype='datetime64[ns]', freq='B')

In [13]:
data.values # 去除行列索引, 只表示里面的数值
# 是ndarray类型

array([[-5.10086244e-01, -1.30445276e+00,  7.31938251e-01,
         1.27815756e+00,  5.76020745e-01],
       [-5.97502069e-01, -7.02154194e-01,  1.42922305e+00,
         1.64269977e-01,  4.02010906e-01],
       [-2.15858650e+00, -1.38129157e+00,  4.23083133e-01,
         4.69593404e-01,  7.98054625e-01],
       [ 4.65521203e-01,  1.37723882e+00,  4.60134860e-02,
         1.49147284e+00, -5.50973099e-01],
       [-6.48582905e-01, -1.11061535e+00,  8.75127630e-02,
        -1.18758800e+00, -1.21861456e+00],
       [-1.37291736e-01,  3.79712782e-01,  5.23579996e-03,
         1.36294265e+00, -3.51084399e-01],
       [-7.62029524e-01, -1.53686193e-02,  3.89791103e-01,
        -4.80809059e-01, -1.34350574e+00],
       [-1.05573136e-01, -1.16584547e+00,  7.89067146e-01,
         1.64566000e+00,  4.75926746e-01],
       [ 1.68850850e-01, -1.69036481e+00, -5.47058006e-01,
         2.54388313e-01, -6.28376997e-01],
       [-5.19738980e-01, -4.89809878e-01, -2.67100827e-02,
         3.47952806e-04

In [14]:
type(data.values)

numpy.ndarray

In [15]:
data.T # 转置

Unnamed: 0,股票0,股票1,股票2,股票3,股票4,股票5,股票6,股票7,股票8,股票9
2018-01-01,-0.510086,-0.597502,-2.158586,0.465521,-0.648583,-0.137292,-0.76203,-0.105573,0.168851,-0.519739
2018-01-02,-1.304453,-0.702154,-1.381292,1.377239,-1.110615,0.379713,-0.015369,-1.165845,-1.690365,-0.48981
2018-01-03,0.731938,1.429223,0.423083,0.046013,0.087513,0.005236,0.389791,0.789067,-0.547058,-0.02671
2018-01-04,1.278158,0.16427,0.469593,1.491473,-1.187588,1.362943,-0.480809,1.64566,0.254388,0.000348
2018-01-05,0.576021,0.402011,0.798055,-0.550973,-1.218615,-0.351084,-1.343506,0.475927,-0.628377,-0.810359


In [16]:
# DataFrame的方法:
data.head() # 默认查询前五行数据

# 我们有时候经常需要看列索引, 但是还不想将所有的数据打印出来
# 所以我们经常使用head

Unnamed: 0,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05
股票0,-0.510086,-1.304453,0.731938,1.278158,0.576021
股票1,-0.597502,-0.702154,1.429223,0.16427,0.402011
股票2,-2.158586,-1.381292,0.423083,0.469593,0.798055
股票3,0.465521,1.377239,0.046013,1.491473,-0.550973
股票4,-0.648583,-1.110615,0.087513,-1.187588,-1.218615


In [17]:
data.tail(2) # 默认显示后五行数据, 但是可以设置后几行



Unnamed: 0,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05
股票8,0.168851,-1.690365,-0.547058,0.254388,-0.628377
股票9,-0.519739,-0.48981,-0.02671,0.000348,-0.810359


### 修改行列索引值
    修改行列的索引值:
        索引必须整体修改, 不可以的单独修改某个索引值
    重设索引:
        reset_index(drop=False)
        设置新的索引
        drop: 默认为False, 不删除原来的索引, 如果为True, 则删除原来的索引值
    设置新的索引:
        设置某列值为新的索引:
        set_index(keys, drop=True)
            keys: 列所以名或列索引名称的列表
            drop: 当作为新的索引时, 删除原来的列.
    如果我们设置多个索引的时候:
    返回的就是MUltiIndex了, 此时数据就是三维数据
    MultiIndex: 多级或分层索对象
        

In [19]:
data.head()

Unnamed: 0,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05
股票0,-0.510086,-1.304453,0.731938,1.278158,0.576021
股票1,-0.597502,-0.702154,1.429223,0.16427,0.402011
股票2,-2.158586,-1.381292,0.423083,0.469593,0.798055
股票3,0.465521,1.377239,0.046013,1.491473,-0.550973
股票4,-0.648583,-1.110615,0.087513,-1.187588,-1.218615


In [22]:
# 显然有行列索引。
# 但是行列索引不可以单独修改：
# data.index[2] =  "股票3" # 这是错误的修改方式，索引不可以单独修改

In [23]:
# 索引只能整体修改：

In [24]:
stock_index = [f"股票_{i}" for i in range(data.shape[0])]

In [26]:
stock_index # 得到一个列表，此时我们就可以整体修改data的索引了

['股票_0',
 '股票_1',
 '股票_2',
 '股票_3',
 '股票_4',
 '股票_5',
 '股票_6',
 '股票_7',
 '股票_8',
 '股票_9']

In [27]:
data.index = stock_index

In [31]:
data.index # 这样就成功修改了索引

Index(['股票_0', '股票_1', '股票_2', '股票_3', '股票_4', '股票_5', '股票_6', '股票_7', '股票_8',
       '股票_9'],
      dtype='object')

In [32]:
data.head()

Unnamed: 0,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05
股票_0,-0.510086,-1.304453,0.731938,1.278158,0.576021
股票_1,-0.597502,-0.702154,1.429223,0.16427,0.402011
股票_2,-2.158586,-1.381292,0.423083,0.469593,0.798055
股票_3,0.465521,1.377239,0.046013,1.491473,-0.550973
股票_4,-0.648583,-1.110615,0.087513,-1.187588,-1.218615
