# Pandas 基础数据结构

- Pandas 处理以下三个数据结构
    - 系列 (Series)
    - 数据帧 (DataFrame)
    - 数据集 (Dataset)

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import pandas as pd
import numpy as np
import xarray as xr

def show_component(cmp):
    s = '\n{}'.format(cmp)
    return s.replace('\n', '\n\t')

def show_df(df):
    from IPython.display import display, HTML
    html = '<div style="margin-left:55px">{}</div>'.format(df.to_html())
    display(HTML(html))

## Series

-  1D 数据结构，表示一个数据列

In [None]:
data = [1, 3, 5, np.nan, 6, 8]

series = pd.Series(data, index=list(range(1, len(data) + 1)))
print('* Series is: {}'.format(show_component(series)))
print('\n  then values are: {}'.format(show_component(series.values)))
print('\n  and summary is: {}'.format(show_component(series.describe())))
print('\n  and dtypes of series is: {}'.format(show_component(series.dtypes)))

## DataFrame

- 2D 数据结构，表示一个表格

### DataFrame 基本

- 设置 DataFrame 的列名和索引

In [None]:
rows = 7
columns = list('ABCD')
indecs = pd.date_range('20190101', periods=rows)

- 准备一个矩阵

In [None]:
data = np.reshape(np.arange(rows * len(columns)) + 1, (rows, len(columns)))
print('* data is: {} (shape={})'.format(show_component(data), data.shape))

- 生成 DataFrame 对象

In [None]:
df = pd.DataFrame(data=data, index=indecs, columns=columns)
print('* data frame "df" is:')
show_df(df)

- 查看 DataFrame 属性

In [None]:
print('* dtypes of "df" is: {}'.format(show_component(df.dtypes)))
print('\n* index of "df" is: {}'.format(show_component(df.index)))
print('\n* columns of "df" is: {}'.format(show_component(df.columns)))
print('\n* values of "df" is: {} (shape={})'.format(show_component(df.values), df.values.shape))

- DataFrame 内容概览

In [None]:
print('* describe of "df" is: ')
show_df(df.describe())

print('\n* head of "df" is:')
show_df(df.head())

print('\n* tail of "df" is:')
show_df(df.tail())

print('\n* transpos of "df" is: ')
show_df(df.T)

## xArray

- 表示一个 Dataset 组成的数据集

In [None]:
df1 = pd.DataFrame(data=np.reshape(np.arange(10) + 1, (2, 5)))
print('* when dataframe "df1" is:')
show_df(df1)

da_one = xr.DataArray(df1,
                      dims=['n1', 'n2'],
                      coords={'n1': ['x', 'y'], 'n2': ['a', 'b', 'c', 'd', 'e']},
                      name='one')
print('  then convert to DataArray "da_one" is: {}'.format(da_one))

df2 = pd.DataFrame(data=np.reshape(np.arange(18) + 1, (3, 6)))
print('\n  and dataframe "df2" is:')
show_df(df2)

da_two = xr.DataArray(df2,
                      dims=['n1', 'n2'],
                      coords={'n1': ['x', 'y', 'z'], 'n2': ['a', 'b', 'c', 'd', 'e', 'f']},
                      name='two')
print('  then convert to DataArray "da_one" is: {}'.format(da_one))

ds = xr.Dataset(data_vars={'one': da_one, 'two': da_two})
print('\n  then dataset "ds" is: {}'.format(show_component(ds)))
print('  then ds is: ')
show_df(ds.to_dataframe())