# Pandas 使用
## 安装
```shell
pip3 install pandas
```

## 加载数据

In [1]:
import pandas as pd

data = pd.read_csv('data/data.csv')

## 打印数据描述
### `describe`
#### 定义
```python
DataFrame.describe(percentiles=None, include=None, exclude=None)[source]
```

#### 参数
- percentiles
    - list-like of numbers, optional
    - The percentiles to include in the output. All should fall between 0 and 1. The default is [.25, .5, .75], which returns the 25th, 50th, and 75th percentiles.
- include
    - ‘all’, list-like of dtypes or None (default), optional
    - A white list of data types to include in the result. Ignored for Series. Here are the options:
        - `all` : All columns of the input will be included in the output.
        - `A list-like of dtypes` : Limits the results to the provided data types. To limit the result to numeric types submit numpy.number. To limit it instead to object columns submit the numpy.object data type. Strings can also be used in the style of select_dtypes (e.g. df.describe(`include=['O']`)). To select pandas categorical columns, use 'category'
        - `None (default)` : The result will include all numeric columns.
- exclude
    - list-like of dtypes or None (default), optional
    - A black list of data types to omit from the result. Ignored for Series. Here are the options:
        - `A list-like of dtypes` : Excludes the provided data types from the result. To exclude numeric types submit numpy.number. To exclude object columns submit the data type numpy.object. Strings can also be used in the style of select_dtypes (e.g. df.describe(`include=['O']`)). To exclude pandas categorical columns, use 'category'
        - `None (default)` : The result will exclude nothing

#### 官方文档
- [点击查看官方文档](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.describe.html#pandas.DataFrame.describe)

In [2]:
data.describe()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
count,1460.0,1460.0,1201.0,1460.0,1460.0,1460.0,1460.0,1460.0,1452.0,1460.0,...,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,730.5,56.89726,70.049958,10516.828082,6.099315,5.575342,1971.267808,1984.865753,103.685262,443.639726,...,94.244521,46.660274,21.95411,3.409589,15.060959,2.758904,43.489041,6.321918,2007.815753,180921.19589
std,421.610009,42.300571,24.284752,9981.264932,1.382997,1.112799,30.202904,20.645407,181.066207,456.098091,...,125.338794,66.256028,61.119149,29.317331,55.757415,40.177307,496.123024,2.703626,1.328095,79442.502883
min,1.0,20.0,21.0,1300.0,1.0,1.0,1872.0,1950.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2006.0,34900.0
25%,365.75,20.0,59.0,7553.5,5.0,5.0,1954.0,1967.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,129975.0
50%,730.5,50.0,69.0,9478.5,6.0,5.0,1973.0,1994.0,0.0,383.5,...,0.0,25.0,0.0,0.0,0.0,0.0,0.0,6.0,2008.0,163000.0
75%,1095.25,70.0,80.0,11601.5,7.0,6.0,2000.0,2004.0,166.0,712.25,...,168.0,68.0,0.0,0.0,0.0,0.0,0.0,8.0,2009.0,214000.0
max,1460.0,190.0,313.0,215245.0,10.0,9.0,2010.0,2010.0,1600.0,5644.0,...,857.0,547.0,552.0,508.0,480.0,738.0,15500.0,12.0,2010.0,755000.0


### `axes`
打印行列信息。

In [3]:
print(data.axes[0])
print()
print(data.axes[1])

RangeIndex(start=0, stop=1460, step=1)

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'Garag

# 遍历记录

In [10]:
for index, row in list(data.iteritems())[:10]:
    print(index, ' - ', type(row))

Id  -  <class 'pandas.core.series.Series'>
MSSubClass  -  <class 'pandas.core.series.Series'>
MSZoning  -  <class 'pandas.core.series.Series'>
LotFrontage  -  <class 'pandas.core.series.Series'>
LotArea  -  <class 'pandas.core.series.Series'>
Street  -  <class 'pandas.core.series.Series'>
Alley  -  <class 'pandas.core.series.Series'>
LotShape  -  <class 'pandas.core.series.Series'>
LandContour  -  <class 'pandas.core.series.Series'>
Utilities  -  <class 'pandas.core.series.Series'>


In [27]:
# 打印记录索引
for index in data.index[:30]:
    print(index, end=' ')

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 

In [26]:
for index in data.index[:3]:
    print(data.loc[index]) # 打印每条记录信息

Id                     1
MSSubClass            60
MSZoning              RL
LotFrontage           65
LotArea             8450
Street              Pave
Alley                NaN
LotShape             Reg
LandContour          Lvl
Utilities         AllPub
LotConfig         Inside
LandSlope            Gtl
Neighborhood     CollgCr
Condition1          Norm
Condition2          Norm
BldgType            1Fam
HouseStyle        2Story
OverallQual            7
OverallCond            5
YearBuilt           2003
YearRemodAdd        2003
RoofStyle          Gable
RoofMatl         CompShg
Exterior1st      VinylSd
Exterior2nd      VinylSd
MasVnrType       BrkFace
MasVnrArea           196
ExterQual             Gd
ExterCond             TA
Foundation         PConc
                  ...   
BedroomAbvGr           3
KitchenAbvGr           1
KitchenQual           Gd
TotRmsAbvGrd           8
Functional           Typ
Fireplaces             0
FireplaceQu          NaN
GarageType        Attchd
GarageYrBlt         2003


In [24]:
# 打印前三条记录的值
for index in data.index[:3]:
    print(data.loc[index].values)

[1 60 'RL' 65.0 8450 'Pave' nan 'Reg' 'Lvl' 'AllPub' 'Inside' 'Gtl'
 'CollgCr' 'Norm' 'Norm' '1Fam' '2Story' 7 5 2003 2003 'Gable' 'CompShg'
 'VinylSd' 'VinylSd' 'BrkFace' 196.0 'Gd' 'TA' 'PConc' 'Gd' 'TA' 'No' 'GLQ'
 706 'Unf' 0 150 856 'GasA' 'Ex' 'Y' 'SBrkr' 856 854 0 1710 1 0 2 1 3 1
 'Gd' 8 'Typ' 0 nan 'Attchd' 2003.0 'RFn' 2 548 'TA' 'TA' 'Y' 0 61 0 0 0 0
 nan nan nan 0 2 2008 'WD' 'Normal' 208500]
[2 20 'RL' 80.0 9600 'Pave' nan 'Reg' 'Lvl' 'AllPub' 'FR2' 'Gtl' 'Veenker'
 'Feedr' 'Norm' '1Fam' '1Story' 6 8 1976 1976 'Gable' 'CompShg' 'MetalSd'
 'MetalSd' 'None' 0.0 'TA' 'TA' 'CBlock' 'Gd' 'TA' 'Gd' 'ALQ' 978 'Unf' 0
 284 1262 'GasA' 'Ex' 'Y' 'SBrkr' 1262 0 0 1262 0 1 2 0 3 1 'TA' 6 'Typ' 1
 'TA' 'Attchd' 1976.0 'RFn' 2 460 'TA' 'TA' 'Y' 298 0 0 0 0 0 nan nan nan 0
 5 2007 'WD' 'Normal' 181500]
[3 60 'RL' 68.0 11250 'Pave' nan 'IR1' 'Lvl' 'AllPub' 'Inside' 'Gtl'
 'CollgCr' 'Norm' 'Norm' '1Fam' '2Story' 7 5 2001 2002 'Gable' 'CompShg'
 'VinylSd' 'VinylSd' 'BrkFace' 162.0 'Gd' 'TA'

In [37]:
# 打印前三条记录指定列的值
for index in data.index[:3]:
    # type(data.loc[index]) -> <class 'pandas.core.series.Series'>
    print(data.loc[index].iat[0], end=' ')    # 指定第一列
    print(data.loc[index].at['Id'])  # 指定列名

1 1
2 2
3 3
