---
title: Pandas基本语法
date: 2019-06-24
id: 3
categories: [基本语法]
---

## Series

In [2]:
# -*- coding: utf-8 -*- 
from pandas import Series

print("===用数组生成Series===")
obj = Series([4, 7, -5, 3])
print(obj)
print(obj.values)
print(obj.index)
print("")

print("===指定Series的index===")
obj2 = Series([4, 7, -5, 3], index = ['d', 'b', 'a', 'c'])
print(obj2)
print(obj2.index)
print(obj2['a'])
obj2['d'] = 6
print(obj2[['c', 'a', 'd']])
print(obj2[obj2 > 0])  # 找出大于0的元素
print('b' in obj2) # 判断索引是否存在
print('e' in obj2)
print("")

print("===使用字典生成Series===")
sdata = {'Ohio':45000, 'Texas':71000, 'Oregon':16000, 'Utah':5000}
obj3 = Series(sdata)
print(obj3)
print("")

print("===使用字典生成Series，并额外指定index，不匹配部分为NaN===")
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index = states)
print(obj4)
print("")

print("===Series相加，相同索引部分相加===")
print(obj3 + obj4)
print("")

print("===指定Series及其索引的名字===")
obj4.name = 'population'
obj4.index.name = 'state'
print( obj4)
print("")

print("===替换index===")
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
print(obj)

===用数组生成Series===
0    4
1    7
2   -5
3    3
dtype: int64
[ 4  7 -5  3]
RangeIndex(start=0, stop=4, step=1)

===指定Series的index===
d    4
b    7
a   -5
c    3
dtype: int64
Index(['d', 'b', 'a', 'c'], dtype='object')
-5
c    3
a   -5
d    6
dtype: int64
d    6
b    7
c    3
dtype: int64
True
False

===使用字典生成Series===
Ohio      45000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

===使用字典生成Series，并额外指定index，不匹配部分为NaN===
California        NaN
Ohio          45000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

===Series相加，相同索引部分相加===
California         NaN
Ohio           90000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

===指定Series及其索引的名字===
state
California        NaN
Ohio          45000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

===替换index===
Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64


## 填充

In [5]:

# -*- coding: utf-8 -*- 
import numpy as np
from numpy import nan as NA
import pandas as pd
from pandas import Series, DataFrame, Index

print("===填充0===")
df = DataFrame(np.random.randn(7, 3))
df.iloc[:4, 1] = NA
df.iloc[:2, 2] = NA
print(df.fillna(0))
df.fillna(0, inplace = True)
print(df)
print('')

print("===不同行列填充不同的值===")
print(df.fillna({1:0.5, 3:-1}))  # 第3列不存在
print('')

print("===不同的填充方式===")
df = DataFrame(np.random.randn(6, 3))
df.iloc[2:, 1] = NA
df.iloc[4:, 2] = NA
print(df)
print(df.fillna(method = 'ffill'))
print(df.fillna(method = 'ffill', limit = 2))
print('')

print("===用统计数据填充===")
data = Series([1., NA, 3.5, NA, 7])
print(data.fillna(data.mean()))


===填充0===
          0         1         2
0  0.619043  0.000000  0.000000
1  1.262121  0.000000  0.000000
2 -0.536828  0.000000  1.354608
3  0.273867  0.000000  1.046143
4  0.648504  0.366829 -0.122517
5  0.773085 -0.547392 -0.289962
6  0.070365 -1.258029  0.144977
          0         1         2
0  0.619043  0.000000  0.000000
1  1.262121  0.000000  0.000000
2 -0.536828  0.000000  1.354608
3  0.273867  0.000000  1.046143
4  0.648504  0.366829 -0.122517
5  0.773085 -0.547392 -0.289962
6  0.070365 -1.258029  0.144977

===不同行列填充不同的值===
          0         1         2
0  0.619043  0.000000  0.000000
1  1.262121  0.000000  0.000000
2 -0.536828  0.000000  1.354608
3  0.273867  0.000000  1.046143
4  0.648504  0.366829 -0.122517
5  0.773085 -0.547392 -0.289962
6  0.070365 -1.258029  0.144977

===不同的填充方式===
          0         1         2
0  0.755876  1.438577  0.986349
1  0.457114 -0.435046 -0.365518
2  0.631733       NaN  1.537871
3  1.217393       NaN  0.636592
4 -1.095912       NaN       N

## DataFrame

In [9]:
# -*- coding: utf-8 -*- 
import numpy as np
from pandas import Series, DataFrame

print("===指定索引，在列中指定不存在的列，默认数据用NaN===")
frame2 = DataFrame(data,
                    columns = ['year', 'state', 'pop', 'debt'],
                    index = ['one', 'two', 'three', 'four', 'five'])
print(frame2)
print(frame2['state'])
print(frame2.year)
print(frame2.loc['three'])
frame2['debt'] = 16.5 # 修改一整列
print(frame2)
frame2.debt = np.arange(5)  # 用numpy数组修改元素
print(frame2)
print('')

print("===用Series指定要修改的索引及其对应的值，没有指定的默认数据用NaN===")
val = Series([-1.2, -1.5, -1.7], index = ['two', 'four', 'five'])
frame2['debt'] = val
print(frame2)
print('')

print("===赋值给新列===")
frame2['eastern'] = (frame2.state == 'Ohio')  # 如果state等于Ohio为True
print(frame2)
print(frame2.columns)
print('')

print("===DataFrame转置===")
pop = {'Nevada':{2001:2.4, 2002:2.9},
        'Ohio':{2000:1.5, 2001:1.7, 2002:3.6}}
frame3 = DataFrame(pop)
print(frame3)
print(frame3.T)
print('')

print("===指定索引顺序，以及使用切片初始化数据===")
print(DataFrame(pop))
pdata = {'Ohio':frame3['Ohio'][:-1], 'Nevada':frame3['Nevada'][:2]}
print(DataFrame(pdata))
print('')

print("===指定索引和列的名称===")
frame3.index.name = 'year'
frame3.columns.name = 'state'
print(frame3)
print(frame3.values)
print(frame2.values)

===用字典生成DataFrame，key为列的名字===
    state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
   year   state  pop
0  2000    Ohio  1.5
1  2001    Ohio  1.7
2  2002    Ohio  3.6
3  2001  Nevada  2.4
4  2002  Nevada  2.9

===指定索引，在列中指定不存在的列，默认数据用NaN===
       year   state  pop debt
one    2000    Ohio  1.5  NaN
two    2001    Ohio  1.7  NaN
three  2002    Ohio  3.6  NaN
four   2001  Nevada  2.4  NaN
five   2002  Nevada  2.9  NaN
one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object
one      2000
two      2001
three    2002
four     2001
five     2002
Name: year, dtype: int64
year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object
       year   state  pop  debt
one    2000    Ohio  1.5  16.5
two    2001    Ohio  1.7  16.5
three  2002    Ohio  3.6  16.5
four   2001  Nevada  2.4  16.5
five   2002  Nevada  2.9  16.5
       year   state  pop  debt
one 

> 参考：

1. [廖雪峰Python数据分析](https://www.julyedu.com/course/getDetail/66/)