In [1]:
import pandas as pd
import numpy as np

### 使用list构造Series

In [2]:
course = ['语文', '数学', '英语', '计算机']
data = pd.Series(course)
data

0     语文
1     数学
2     英语
3    计算机
dtype: object

### 用dict构造Series

In [3]:
grades = {'语文': 80, '数学': 90, '英语': 100, '计算机': 100}
data = pd.Series(grades)
data

语文      80
数学      90
英语     100
计算机    100
dtype: int64

### Series转为list

In [4]:
course = ['语文', '数学', '英语', '计算机']
data = pd.Series(course)
lst = data.to_list()
lst

['语文', '数学', '英语', '计算机']

### Series转为DataFrame

In [5]:
grades = {'语文': 80, '数学': 90, '英语': 100, '计算机': 100}
data = pd.Series(grades)
data

语文      80
数学      90
英语     100
计算机    100
dtype: int64

- 第一种方法：直接用DataFrame转换

In [6]:
df1 = pd.DataFrame(data, columns=['grades'])
df1

Unnamed: 0,grades
语文,80
数学,90
英语,100
计算机,100


In [7]:
type(df1)

pandas.core.frame.DataFrame

- 第二种方法：用reset_index转换

In [8]:
df2 = data.reset_index()
df2.columns = ['Course', 'Grades']
df2

Unnamed: 0,Course,Grades
0,语文,80
1,数学,90
2,英语,100
3,计算机,100


In [9]:
type(df2)

pandas.core.frame.DataFrame

### 借助numpy创建Series

In [10]:
ser = pd.Series(np.arange(10, 100, 10),
                index=np.arange(101, 110),
                dtype='float')
ser

101    10.0
102    20.0
103    30.0
104    40.0
105    50.0
106    60.0
107    70.0
108    80.0
109    90.0
dtype: float64

### 转换Series的数据类型

In [11]:
ser = pd.Series(data=['001', '002', '003', '004'], index=list('abcd'))
ser

a    001
b    002
c    003
d    004
dtype: object

In [12]:
ser = ser.astype(int)
ser

a    1
b    2
c    3
d    4
dtype: int64

### 给Series添加元素

In [13]:
grades = {'语文': 80, '数学': 90, '英语': 100, '计算机': 100}
data = pd.Series(grades)
data

语文      80
数学      90
英语     100
计算机    100
dtype: int64

In [14]:
data = data.append(pd.Series({'物理': 85, '化学': 99}))
data

语文      80
数学      90
英语     100
计算机    100
物理      85
化学      99
dtype: int64

### 使用字典创建DataFrame

In [15]:
grades = {'语文': [80, 90, 100], '数学': [90, 88, 70], '英语': [100, 90, 100]}
data = pd.DataFrame(grades)
data

Unnamed: 0,语文,数学,英语
0,80,90,100
1,90,88,90
2,100,70,100


### 设置DateFrame的索引列

In [16]:
data={
    'Name':['Alex','Tom','Rose'],
    'Gender':['M','M','F'],
    'Age':[18,20,30]
}
df = pd.DataFrame(data)
df.set_index('Name', inplace=True)
df

Unnamed: 0_level_0,Gender,Age
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alex,M,18
Tom,M,20
Rose,F,30


### 生成一个月所有日期

In [17]:
data_range = pd.date_range(start='2021-10-01', end='2021-10-31', freq='D')
data_range

DatetimeIndex(['2021-10-01', '2021-10-02', '2021-10-03', '2021-10-04',
               '2021-10-05', '2021-10-06', '2021-10-07', '2021-10-08',
               '2021-10-09', '2021-10-10', '2021-10-11', '2021-10-12',
               '2021-10-13', '2021-10-14', '2021-10-15', '2021-10-16',
               '2021-10-17', '2021-10-18', '2021-10-19', '2021-10-20',
               '2021-10-21', '2021-10-22', '2021-10-23', '2021-10-24',
               '2021-10-25', '2021-10-26', '2021-10-27', '2021-10-28',
               '2021-10-29', '2021-10-30', '2021-10-31'],
              dtype='datetime64[ns]', freq='D')

### 生成一年的所有周一日期

In [18]:
data_range = pd.date_range(start='2021-01-01', end='2021-12-31', freq='W-MON')
data_range

DatetimeIndex(['2021-01-04', '2021-01-11', '2021-01-18', '2021-01-25',
               '2021-02-01', '2021-02-08', '2021-02-15', '2021-02-22',
               '2021-03-01', '2021-03-08', '2021-03-15', '2021-03-22',
               '2021-03-29', '2021-04-05', '2021-04-12', '2021-04-19',
               '2021-04-26', '2021-05-03', '2021-05-10', '2021-05-17',
               '2021-05-24', '2021-05-31', '2021-06-07', '2021-06-14',
               '2021-06-21', '2021-06-28', '2021-07-05', '2021-07-12',
               '2021-07-19', '2021-07-26', '2021-08-02', '2021-08-09',
               '2021-08-16', '2021-08-23', '2021-08-30', '2021-09-06',
               '2021-09-13', '2021-09-20', '2021-09-27', '2021-10-04',
               '2021-10-11', '2021-10-18', '2021-10-25', '2021-11-01',
               '2021-11-08', '2021-11-15', '2021-11-22', '2021-11-29',
               '2021-12-06', '2021-12-13', '2021-12-20', '2021-12-27'],
              dtype='datetime64[ns]', freq='W-MON')

### 生成一天的所有小时

In [19]:
data_range = pd.date_range(start='2021-10-01', periods=24, freq='H')
data_range

DatetimeIndex(['2021-10-01 00:00:00', '2021-10-01 01:00:00',
               '2021-10-01 02:00:00', '2021-10-01 03:00:00',
               '2021-10-01 04:00:00', '2021-10-01 05:00:00',
               '2021-10-01 06:00:00', '2021-10-01 07:00:00',
               '2021-10-01 08:00:00', '2021-10-01 09:00:00',
               '2021-10-01 10:00:00', '2021-10-01 11:00:00',
               '2021-10-01 12:00:00', '2021-10-01 13:00:00',
               '2021-10-01 14:00:00', '2021-10-01 15:00:00',
               '2021-10-01 16:00:00', '2021-10-01 17:00:00',
               '2021-10-01 18:00:00', '2021-10-01 19:00:00',
               '2021-10-01 20:00:00', '2021-10-01 21:00:00',
               '2021-10-01 22:00:00', '2021-10-01 23:00:00'],
              dtype='datetime64[ns]', freq='H')

### 生成日期DataFrame

In [20]:
data_range = pd.date_range(start='2021-10-01', periods=31)
data = pd.DataFrame(data_range, columns=['day'])
data['day_of_year'] = data['day'].dt.dayofyear
data.head()

Unnamed: 0,day,day_of_year
0,2021-10-01,274
1,2021-10-02,275
2,2021-10-03,276
3,2021-10-04,277
4,2021-10-05,278


### 生成日期和随机分布DataFrame

In [21]:
date_range = pd.date_range(start='2021-01-01', periods=1000)
data = {
    'norm': np.random.normal(loc=0, scale=1, size=1000),  # 正态分布
    'uniform': np.random.uniform(low=0, high=1, size=1000),  # 均匀分布
    'binomial': np.random.binomial(n=1, p=0.2, size=1000)  # 二项分布
}
df = pd.DataFrame(data, index=date_range)
df.head()

Unnamed: 0,norm,uniform,binomial
2021-01-01,0.854932,0.848574,1
2021-01-02,-0.281776,0.091878,1
2021-01-03,-0.448835,0.221809,0
2021-01-04,-0.31448,0.338779,0
2021-01-05,-0.89699,0.130161,0


### 查看DateFrame相关信息

- 显示前几行

In [22]:
df.head(10)

Unnamed: 0,norm,uniform,binomial
2021-01-01,0.854932,0.848574,1
2021-01-02,-0.281776,0.091878,1
2021-01-03,-0.448835,0.221809,0
2021-01-04,-0.31448,0.338779,0
2021-01-05,-0.89699,0.130161,0
2021-01-06,-0.663579,0.255092,1
2021-01-07,1.465039,0.504756,0
2021-01-08,0.436749,0.872666,0
2021-01-09,-0.764699,0.895701,0
2021-01-10,1.140323,0.673973,0


- 显示末几行

In [23]:
df.tail(10)

Unnamed: 0,norm,uniform,binomial
2023-09-18,1.148176,0.0922,1
2023-09-19,-0.543696,0.191729,0
2023-09-20,0.000603,0.260261,1
2023-09-21,-0.556431,0.847025,0
2023-09-22,-0.926265,0.336303,0
2023-09-23,0.356072,0.73645,1
2023-09-24,1.083811,0.288254,1
2023-09-25,0.178809,0.12849,0
2023-09-26,1.919913,0.684574,0
2023-09-27,-1.440704,0.986465,1


- 显示数据信息

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1000 entries, 2021-01-01 to 2023-09-27
Freq: D
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   norm      1000 non-null   float64
 1   uniform   1000 non-null   float64
 2   binomial  1000 non-null   int64  
dtypes: float64(2), int64(1)
memory usage: 31.2 KB


- 显示数学统计

In [25]:
df.describe()

Unnamed: 0,norm,uniform,binomial
count,1000.0,1000.0,1000.0
mean,0.021145,0.492878,0.193
std,1.018739,0.281471,0.39485
min,-3.774934,0.000968,0.0
25%,-0.644579,0.249878,0.0
50%,0.031267,0.503162,0.0
75%,0.745596,0.730276,0.0
max,3.127332,0.998236,1.0


### 统计数据列的值出现次数

In [26]:
df['binomial'].value_counts()

0    807
1    193
Name: binomial, dtype: int64

### 筛选出某列最大值所在的行数据

In [27]:
grades = {'语文': [80, 90, 100], '数学': [90, 88, 70], '英语': [100, 90, 100]}
data = pd.DataFrame(grades)
data.loc[data['语文'].idxmax()]  # 输出Series形式

语文    100
数学     70
英语    100
Name: 2, dtype: int64

In [28]:
data.loc[[data['语文'].idxmax()]]  # 输出DateFrame形式

Unnamed: 0,语文,数学,英语
2,100,70,100
