# pandas数据结构

## 1. series对象

> series 是dataframe的列对象,本身也具有索引,如果没有指定索引,那么默认索引是 0 - le()-1

In [59]:
import pandas as pd

### 1.1 创建series对象

#### 1.1.1 通过列表创建series对象

In [60]:
list1 = [1,2,3,4,5]
pd.Series(list1)

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [61]:
pd.Series(list1,index=['a','b','c','d','e'])

a    1
b    2
c    3
d    4
e    5
dtype: int64

#### 1.1.2 通过字典创建series对象

In [62]:
# 通过数据字典创建的series对象,索引为字典key
dict1 = {'a':1,'b':2,'c':3,'d':4,'e':5}
pd.Series(dict1)

a    1
b    2
c    3
d    4
e    5
dtype: int64

#### 1.1.3 通过元组创建series对象

In [63]:
tuple1 = ('a','b','c','d','e')
pd.Series(tuple1)

0    a
1    b
2    c
3    d
4    e
dtype: object

#### 1.1.4 通过numpy创建series对象

In [64]:
import numpy as np
pd.Series(np.arange(10),index=[np.arange(10)])

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

### 1.2 series对象属性

In [65]:
# 列表推导式
list1 = [i for i in range(5)]
index1 = [i for i in 'ABCDE']
s1 = pd.Series(data=list1,index=index1)
print(s1)

A    0
B    1
C    2
D    3
E    4
dtype: int64


#### 1.2.1 获取s对象的元素

In [66]:
s1.values

array([0, 1, 2, 3, 4])

#### 1.2.2 修改s的值

In [67]:
s1[0] = 10
s1

  s1[0] = 10


A    10
B     1
C     2
D     3
E     4
dtype: int64

In [68]:
s1['B']=99
s1

A    10
B    99
C     2
D     3
E     4
dtype: int64

## 2 创建DATAFRAME对象

```javascript
1. 通过读取文件返回df对象
2. 通过字典,元组,列表等进行创建df对象
```

### 2.1 通过字典+列表的方式进行创建

In [69]:
# 场景1 通过字典+列表的方式进行创建
dict1 = {'name':['张三','李四','王五'],'age':[18,19,20],'gender':['男','女','女']}
df = pd.DataFrame(dict1)
df

Unnamed: 0,name,age,gender
0,张三,18,男
1,李四,19,女
2,王五,20,女


### 2.2 通过元组+列表的方式进行创建

In [70]:
list1 = [('张三',18,'男'),('李四',19,'女'),('王五',20,'女')]
df = pd.DataFrame(list1)
df

Unnamed: 0,0,1,2
0,张三,18,男
1,李四,19,女
2,王五,20,女


In [71]:
df1 = pd.DataFrame(list1,columns=['name','age','gender'])
df1

Unnamed: 0,name,age,gender
0,张三,18,男
1,李四,19,女
2,王五,20,女


### 2.3 通过ndarray对象进行创建

In [72]:
arr1 = np.arange(12).reshape(3, 4)
arr1
pd.DataFrame(arr1,columns=['a','b','c','d'],index=['A','B','C'])

Unnamed: 0,a,b,c,d
A,0,1,2,3
B,4,5,6,7
C,8,9,10,11


### 2.4 学生成绩案例 


In [73]:
ndarray = np.random.randint(40,100,(10,5))
df = pd.DataFrame(ndarray,columns=['语文','数学','英语','物理','化学'],index=[f'同学{i+1}' for i in range(ndarray.shape[0])])
df

Unnamed: 0,语文,数学,英语,物理,化学
同学1,94,95,73,47,78
同学2,86,60,48,77,70
同学3,89,51,75,59,86
同学4,50,47,85,76,41
同学5,97,44,45,65,63
同学6,50,89,72,53,68
同学7,75,42,65,84,90
同学8,63,60,75,85,94
同学9,89,71,79,77,73
同学10,73,62,80,92,87


In [74]:
# 方式二:
index_name = [f'同学{i+1}' for i in range(ndarray.shape[0])]
columns_name = ['语文','数学','英语','物理','化']
ndarray = np.random.randint(40,100,(10,5))
df = pd.DataFrame(ndarray)
# 想使用rename的前提是创建了dataframe对象,且进行了实例化
df.rename(
    index = {i:index_name[i] for i in range(ndarray.shape[0])},
    columns={i:columns_name[i] for i in range(ndarray.shape[1])},
    inplace=True
)
df

Unnamed: 0,语文,数学,英语,物理,化
同学1,68,99,43,74,53
同学2,68,92,97,93,84
同学3,97,89,41,94,81
同学4,58,41,64,47,78
同学5,53,53,61,84,74
同学6,76,68,96,90,49
同学7,60,88,58,84,45
同学8,94,90,69,50,40
同学9,54,47,95,41,70
同学10,68,67,52,57,71


## 3.Dataframe对象属性

### 3.1 datatime日期类型

In [90]:
dataFrame = pd.DataFrame({'date':['2020-01-01','2020-01-02','2020-01-03']},dtype='datetime64[ns]')
dataFrame.dtypes

date    datetime64[ns]
dtype: object

### 3.2 timedetla 日期类型

In [94]:
start_time = pd.to_datetime('2020-01-01')
end_time = pd.to_datetime('2020-01-05')
time = end_time-start_time
time

Timedelta('4 days 00:00:00')

## 4. dataframe对象方法

### 4.4 索引重置

In [75]:
# reset_index(drop=False) 不删除元索引
df3 = df.reset_index(drop=False)
df3

Unnamed: 0,index,语文,数学,英语,物理,化
0,同学1,68,99,43,74,53
1,同学2,68,92,97,93,84
2,同学3,97,89,41,94,81
3,同学4,58,41,64,47,78
4,同学5,53,53,61,84,74
5,同学6,76,68,96,90,49
6,同学7,60,88,58,84,45
7,同学8,94,90,69,50,40
8,同学9,54,47,95,41,70
9,同学10,68,67,52,57,71


In [76]:
# 重置索引删除原索引
df2 = df.reset_index(drop=True)
df2

Unnamed: 0,语文,数学,英语,物理,化
0,68,99,43,74,53
1,68,92,97,93,84
2,97,89,41,94,81
3,58,41,64,47,78
4,53,53,61,84,74
5,76,68,96,90,49
6,60,88,58,84,45
7,94,90,69,50,40
8,54,47,95,41,70
9,68,67,52,57,71


### 4.5 索引设置

In [82]:
df4 = df.set_index("语文")
df4

Unnamed: 0_level_0,数学,英语,物理,化
语文,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
68,99,43,74,53
68,92,97,93,84
97,89,41,94,81
58,41,64,47,78
53,53,61,84,74
76,68,96,90,49
60,88,58,84,45
94,90,69,50,40
54,47,95,41,70
68,67,52,57,71
