## pandas 数据结构

#### Series
s = pd.Series(data,index=index)
Series数据类型：list,ndarray,字典，常量

In [2]:
import pandas as pd
s = pd.Series([1.3456,2.444,3.222,5.3222],index=["a","b","c","d"])
# [] list 对象
print(s)

a    1.3456
b    2.4440
c    3.2220
d    5.3222
dtype: float64


In [3]:
s = pd.Series(["a",2.444,3.222,5.3222],index=["a","b","c","d"])
# [] list包含string，类型转化为object
print(s)

a         a
b     2.444
c     3.222
d    5.3222
dtype: object


In [4]:
s = pd.Series([1.3456,2.444,3.222,5.3222],index=["a","b","c","d"],dtype="int8")
# dtype指定对象类型
print(s)

a    1
b    2
c    3
d    5
dtype: int8


In [5]:
#ndarray是numpy提供的数据对象
import numpy as np
s = pd.Series(np.random.randn(5),index=["a","b","c","d","e"])
print(s)

a   -0.478604
b   -0.139564
c    0.588565
d    1.394299
e    1.032450
dtype: float64


In [6]:
s = pd.Series(np.random.randn(5))
print(s)

0   -0.509932
1    0.911006
2    0.849540
3   -1.287050
4   -0.725084
dtype: float64


In [9]:
#字典创建series对象
s1 = pd.Series({"a":0,"b":1,"c":1})
s2 = pd.Series({"a":0,"b":1,"c":1},index=["a","b","c"])
s3 = pd.Series({"a":0,"b":1,"c":1},index=["b","c"])
print(s1)
print(s2)
print(s3)
# 通过index实现行选择

a    0
b    1
c    1
dtype: int64
a    0
b    1
c    1
dtype: int64
b    1
c    1
dtype: int64


In [10]:
s  = pd.Series(5.,index=["c","b","a"])
print(s)

c    5.0
b    5.0
a    5.0
dtype: float64


#### 访问Series对象

In [12]:
s = pd.Series({"a":0,"b":1,"c":1},index=["a","b","c"])
print(s.values)

[0 1 1]


In [13]:
print(s.index)

Index(['a', 'b', 'c'], dtype='object')


In [16]:
print(s.a)
print(s["b"])

0
1


In [17]:
print(s[["b","c"]])

b    1
c    1
dtype: int64


In [18]:
print(s[:2])

a    0
b    1
dtype: int64


## DataFrame 对象

#### 生成方法 df = pd.DataFrame(data,index,column)
data数据类型，列表组成的字典，嵌套列表，Series组成的字典，字典组成的字典，二维ndarray

In [19]:
df = pd.DataFrame({"one":[1.,2.,3.,5.],"two":[2.,3.,4.,5.]})
print(df)

   one  two
0  1.0  2.0
1  2.0  3.0
2  3.0  4.0
3  5.0  5.0


In [22]:
df = pd.DataFrame([[1.,2.,3.,5.],[2.,3.,4.,5.]],index=["a","b"],columns=["one","two","three","four"])
print(df)

   one  two  three  four
a  1.0  2.0    3.0   5.0
b  2.0  3.0    4.0   5.0


In [26]:
# 用numpy函数生成 ndarray数据结构
data = np.zeros((2,),dtype=[("A","i4"),("B","f4"),("C","a10")])
print(data)
data

[(0, 0., b'') (0, 0., b'')]


array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

# <font color="red">ndarray slice访问方法</font> ？

In [59]:
# 用 ndarray对象生成 DataFrame结构
data[:] = [(1.,2.,"hello"),(3.,4.,"world")]
# 注意 ndarray 赋值 [[1.,2.,"hello"],[3.,4.,"world"]] 会报错
df = pd.DataFrame(data)
print(df)

   A    B         C
0  1  2.0  b'hello'
1  3  4.0  b'world'


In [61]:
df = pd.DataFrame(data,index=['first',"second"])
print(df)
df = pd.DataFrame(data,index=['first',"second"],columns=['B','A','C'])
print(df)
# columns可以指定列索引的位置

        A    B         C
first   1  2.0  b'hello'
second  3  4.0  b'world'
          B  A         C
first   2.0  1  b'hello'
second  4.0  3  b'world'



#### <font color="red">以字典形式创建dataframe</font>
- series与list形式很像，series可以指定index。
- list 和 字典 区别：字典生成的df是按列索引的；list生成df按行索引。 
- 当生成pd时，会融合行index。

In [64]:
data = {'one':pd.Series([1.,2.,3.],index=["a","b","c"]),
        "two":pd.Series([2.,3.,4.],index=["b","c","d"])}
df = pd.DataFrame(data)
print(df)
df = pd.DataFrame(data,index=["a","b","c"])
print(df)
df = pd.DataFrame(data,index=["a","b","c"])
print(df)
df = pd.DataFrame(data,index=["a","b","c"],columns=["two","three"])
print(df)

   one  two
a  1.0  NaN
b  2.0  2.0
c  3.0  3.0
d  NaN  4.0
   one  two
a  1.0  NaN
b  2.0  2.0
c  3.0  3.0
   one  two
a  1.0  NaN
b  2.0  2.0
c  3.0  3.0
   two three
a  NaN   NaN
b  2.0   NaN
c  3.0   NaN


In [68]:
# 字典 的 列表 形式 创建 df
# 生成的df 可以按照 list 生成的dataframe数据结构形式
data = [{"a":1,"b":2},{"a":2,"b":3,"c":5}]
df = pd.DataFrame(data)
print(df)
df = pd.DataFrame(data,index=["first","second"],columns=["a","b"])
print(df)

   a  b    c
0  1  2  NaN
1  2  3  5.0
        a  b
first   1  2
second  2  3
