In [2]:
import numpy as np
import pandas as pd

# 导出


In [2]:
df = pd.DataFrame({"values": [5, 10, 15, 20, 25]})

## csv

In [3]:
df.to_csv("file_for_read/df.csv") 

## excel

In [4]:
df.to_excel("file_for_read/df.xlsx") 

## sql

In [5]:
# df.to_sql("df", connection_object) 

## json

In [46]:
df = pd.DataFrame(
    {"name": ["mashiro", "satori", "koishi"], "age": [17, 17, 16]}
)


In [47]:
df.to_json("file_for_read/df.json")

### 移除索引


In [6]:
df.to_json("file_for_read/df_no_index.json", index=False, orient="split")

###  orient

In [48]:
import json

json.loads(df.to_json(index=False, orient="split"))

{'columns': ['name', 'age'],
 'data': [['mashiro', 17], ['satori', 17], ['koishi', 16]]}

In [49]:
json.loads(df.to_json(orient="split"))

{'columns': ['name', 'age'],
 'index': [0, 1, 2],
 'data': [['mashiro', 17], ['satori', 17], ['koishi', 16]]}

In [50]:
json.loads(df.to_json(orient="records"))

[{'name': 'mashiro', 'age': 17},
 {'name': 'satori', 'age': 17},
 {'name': 'koishi', 'age': 16}]

In [51]:
json.loads(df.to_json(orient="index"))

{'0': {'name': 'mashiro', 'age': 17},
 '1': {'name': 'satori', 'age': 17},
 '2': {'name': 'koishi', 'age': 16}}

In [52]:
json.loads(df.to_json(orient="columns"))

{'name': {'0': 'mashiro', '1': 'satori', '2': 'koishi'},
 'age': {'0': 17, '1': 17, '2': 16}}

In [53]:
json.loads(df.to_json(orient="values"))

[['mashiro', 17], ['satori', 17], ['koishi', 16]]

In [54]:
json.loads(df.to_json(orient="table"))

{'schema': {'fields': [{'name': 'index', 'type': 'integer'},
   {'name': 'name', 'type': 'string'},
   {'name': 'age', 'type': 'integer'}],
  'primaryKey': ['index'],
  'pandas_version': '1.4.0'},
 'data': [{'index': 0, 'name': 'mashiro', 'age': 17},
  {'index': 1, 'name': 'satori', 'age': 17},
  {'index': 2, 'name': 'koishi', 'age': 16}]}

# 导入


## csv


In [7]:
"""
read_csv函数的参数非常多,下面接受几个比较重要的参数:
    index_col:用作行索引（标签）的列
    sep / delimiter:分隔符,默认是,
    header:表头（列索引）的位置,默认值是infer,用第一行的内容作为表头（列索引）
    usecols:需要加载的列,可以使用序号或者列名
    true_values / false_values:哪些值被视为布尔值True / False
    skiprows:通过行号、索引或函数指定需要跳过的行
    skipfooter:要跳过的末尾行数
    nrows:需要读取的行数
    na_values:哪些值被视为空值
"""
dataframe_from_csv = pd.read_csv("file_for_read/df.csv")
dataframe_from_csv

Unnamed: 0.1,Unnamed: 0,values
0,0,5
1,1,10
2,2,15
3,3,20
4,4,25


## excel


In [8]:
"""
read_excel与上面的read_csv非常相近,多了一个sheet_name参数来指定数据表的名称,
不同于 CSV 文件,没有sep或delimiter这样的参数。
下面的代码中,read_excel函数的skiprows参数是一个 Lambda 函数,通过该 Lambda 函数指定只读取 Excel 文件的表头和其中10%的数据,跳过其他的数据。
"""
dataframe_from_excel = pd.read_excel(
    io="file_for_read/df.xlsx"
)
dataframe_from_excel

Unnamed: 0.1,Unnamed: 0,values
0,0,5
1,1,10
2,2,15
3,3,20
4,4,25


## sql


In [9]:
# pd.read_sql(query, connection_object)  # 从SQL表/数据库中读取

## json


In [10]:
pd.read_json("file_for_read/df.json") 

Unnamed: 0,values
0,5
1,10
2,15
3,20
4,25


## html


In [11]:
# pd.read_html(url) # 解析html URL，字符串或文件，并将表提取到数据帧列表

## clipboard


In [12]:
pd.read_clipboard()  # 获取剪贴板的内容并将其传递给 read_table()

Unnamed: 0,古明地觉


# 转换

## DataFrame-> Dict


In [43]:
df_for_dict = pd.DataFrame(
    {"name": ["mashiro", "satori", "koishi"], "age": [17, 17, 16]}
)

### orient

In [44]:
df_for_dict.to_dict(orient="dict")

{'name': {0: 'mashiro', 1: 'satori', 2: 'koishi'},
 'age': {0: 17, 1: 17, 2: 16}}

In [45]:
df_for_dict.to_dict(orient="split")

{'index': [0, 1, 2],
 'columns': ['name', 'age'],
 'data': [['mashiro', 17], ['satori', 17], ['koishi', 16]]}

In [55]:
df_for_dict.to_dict(orient="list")

{'name': ['mashiro', 'satori', 'koishi'], 'age': [17, 17, 16]}

In [56]:
df_for_dict.to_dict(orient="series")

{'name': 0    mashiro
 1     satori
 2     koishi
 Name: name, dtype: object,
 'age': 0    17
 1    17
 2    16
 Name: age, dtype: int64}

In [57]:
df_for_dict.to_dict(orient="records")

[{'name': 'mashiro', 'age': 17},
 {'name': 'satori', 'age': 17},
 {'name': 'koishi', 'age': 16}]

In [58]:
df_for_dict.to_dict(orient="index")

{0: {'name': 'mashiro', 'age': 17},
 1: {'name': 'satori', 'age': 17},
 2: {'name': 'koishi', 'age': 16}}

##  Dict -> DataFrame

In [59]:
data = {
    0: {"age": 17, "name": "mashiro"},
    1: {"age": 17, "name": "satori"}
}

In [63]:
# 默认是columns
pd.DataFrame.from_dict(data, orient="columns")

Unnamed: 0,0,1
age,17,17
name,mashiro,satori


In [62]:
pd.DataFrame.from_dict(data, orient="index")

Unnamed: 0,age,name
0,17,mashiro
1,17,satori


## list[dict]-> DataFrame

In [66]:
data = [
    {"age": 17, "name": "mashiro"},
    {"age": 17, "name": "satori"}
]
pd.DataFrame.from_records(data)

Unnamed: 0,age,name
0,17,mashiro
1,17,satori
