## Pandas加载数据 

In [1]:
import numpy as np
import pandas as pd

### csv数据

In [2]:
data = np.random.randint(0, 50, size=(10, 5))
df = pd.DataFrame(data=data, columns=['Python', 'Golang', 'Java', 'NumPy', 'Pandas'])
df

Unnamed: 0,Python,Golang,Java,NumPy,Pandas
0,13,7,32,9,19
1,34,9,27,41,30
2,46,43,13,7,22
3,40,32,1,1,45
4,27,9,23,15,42
5,14,47,35,10,31
6,46,32,45,37,10
7,20,1,49,14,4
8,35,1,19,43,18
9,37,35,12,23,44


- df.to_csv: 保存到csv

In [3]:
# sep: 分隔符，默认是逗号
# header: 是否保存列索引
# index: 是否保留行索引
df.to_csv('data.csv', sep=',', header=True, index=True)

- df.read_csv: 加载csv数据

In [4]:
pd.read_csv('data.csv', sep=',', header=[0], index_col=0)

Unnamed: 0,Python,Golang,Java,NumPy,Pandas
0,13,7,32,9,19
1,34,9,27,41,30
2,46,43,13,7,22
3,40,32,1,1,45
4,27,9,23,15,42
5,14,47,35,10,31
6,46,32,45,37,10
7,20,1,49,14,4
8,35,1,19,43,18
9,37,35,12,23,44


不获取列: header=None

In [5]:
pd.read_csv('data.csv', sep=',', header=None, index_col=0)

Unnamed: 0_level_0,1,2,3,4,5
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,Python,Golang,Java,NumPy,Pandas
0.0,13,7,32,9,19
1.0,34,9,27,41,30
2.0,46,43,13,7,22
3.0,40,32,1,1,45
4.0,27,9,23,15,42
5.0,14,47,35,10,31
6.0,46,32,45,37,10
7.0,20,1,49,14,4
8.0,35,1,19,43,18


- pd.read_table

In [6]:
# read_table : 
#    默认分隔符sep='\t'
pd.read_table('data.csv', sep=',', index_col=0)

Unnamed: 0,Python,Golang,Java,NumPy,Pandas
0,13,7,32,9,19
1,34,9,27,41,30
2,46,43,13,7,22
3,40,32,1,1,45
4,27,9,23,15,42
5,14,47,35,10,31
6,46,32,45,37,10
7,20,1,49,14,4
8,35,1,19,43,18
9,37,35,12,23,44


### excel数据

In [7]:
data = np.random.randint(0, 50, size=(10, 5))
df = pd.DataFrame(data=data, columns=['Python', 'Matlib', 'Java', 'NumPy', 'Pandas'])
df

Unnamed: 0,Python,Matlib,Java,NumPy,Pandas
0,6,14,38,23,12
1,35,10,22,3,4
2,21,36,6,6,43
3,30,19,14,26,29
4,9,11,38,18,35
5,9,39,45,39,37
6,35,2,45,2,13
7,38,44,48,24,48
8,48,23,39,0,0
9,44,17,21,3,18


- df1.to_excel: 保存到excel文件

In [8]:
# sheet_name： 工作表名称
# header: 是否保存列索引
# index: 是否保存行索引
df.to_excel('data.xlsx', sheet_name='Sheet1', header=True, index=False)

- df1.read_excel: 读取excel

In [9]:
pd.read_excel('data.xlsx', sheet_name='Sheet1', header=[0, 1])

# sheet_name=0： 读取第1个工作表
# names : 替换原来的列名
pd.read_excel('data.xlsx', sheet_name=0, header=0, names=list('ABCDE'))

Unnamed: 0,A,B,C,D,E
0,6,14,38,23,12
1,35,10,22,3,4
2,21,36,6,6,43
3,30,19,14,26,29
4,9,11,38,18,35
5,9,39,45,39,37
6,35,2,45,2,13
7,38,44,48,24,48
8,48,23,39,0,0
9,44,17,21,3,18


### MySQL数据

需要安装pymysql

- pip install pymysql -i https://pypi.tuna.tsinghua.edu.cn/simple


In [10]:
pip install pymysql -i https://pypi.tuna.tsinghua.edu.cn/simple

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Note: you may need to restart the kernel to use updated packages.


需要安装sqlalchemy: 

- pip install sqlalchemy -i https://pypi.tuna.tsinghua.edu.cn/simple
- sqlalchemy是Python语言下的数据库引擎库


In [11]:
pip install sqlalchemy -i https://pypi.tuna.tsinghua.edu.cn/simple

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Note: you may need to restart the kernel to use updated packages.


In [12]:
from sqlalchemy import create_engine

In [13]:
# 创建数据
data = np.random.randint(0, 150, size=(150, 3))
df = pd.DataFrame(data=data, columns=['Python', 'Pandas', 'PyTorch'])
df.head()

Unnamed: 0,Python,Pandas,PyTorch
0,16,109,62
1,73,41,90
2,89,110,34
3,50,86,52
4,89,105,23


- 先连接MySQL

In [14]:
# mysql+pymysql ：数据库类型+驱动
# root:root ：数据库用户名和密码
# localhost:3306 ：数据库地址和MySQL端口
# db: 数据库名
conn = create_engine('mysql+pymysql://root:root@localhost:3306/db')


- df.to_sql保存到MySQL

In [15]:
df.to_sql(
    name='score',  # 数据库中表名字
    con=conn,  # 数据库连接对象
    index=False,  # 是否保存行索引
    if_exists='append'  # 如果表存在，则追加数据
)

OperationalError: (pymysql.err.OperationalError) (1045, "Access denied for user 'root'@'localhost' (using password: YES)")
(Background on this error at: https://sqlalche.me/e/14/e3q8)

- pd.read_sql: 从MySQL中加载数据

In [None]:
pd.read_sql(
    sql='select * from score',  # sql语句
    con=conn,   # 数据库连接对象
#     index_col='Python'  # 指定行索引的列名
)