# Estrutura de dados em pandas

pandas.Series()          - 1 Dimensão  - 1D

pandas.DataFrame()  - 2 Dimensões - 2D

pandas.Panel()           - 3 Dimensões - 3D

numpy.ndarray()     - x Dimensões 

### Panel

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
stockData = np.array([[[63.03,61.48,75],
                      [62.05,62.75,46],
                      [62.74,62.19,53]],
                     [[411.90, 404.38, 2.9],
                     [405.45, 405.91, 2.6],
                     [403.15, 404.42, 2.4]]])
stockData

array([[[ 63.03,  61.48,  75.  ],
        [ 62.05,  62.75,  46.  ],
        [ 62.74,  62.19,  53.  ]],

       [[411.9 , 404.38,   2.9 ],
        [405.45, 405.91,   2.6 ],
        [403.15, 404.42,   2.4 ]]])

In [3]:
type(stockData)

numpy.ndarray

In [4]:
# Construçõo de cubo
stockHistoricalPrinces = pd.Panel(stockData,
                                 items = ['FB', 'NFLX'],
                                 major_axis = pd.date_range('2/3/2016', periods=3),
                                 minor_axis = ['open price', 'closing price', 'volume'])

stockHistoricalPrinces

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 3 (major_axis) x 3 (minor_axis)
Items axis: FB to NFLX
Major_axis axis: 2016-02-03 00:00:00 to 2016-02-05 00:00:00
Minor_axis axis: open price to volume

### Criando um DataFrame a partir de um dicionario

In [5]:
USData = pd.DataFrame(np.array([[249.62 , 8900],
                               [282.16,12680],
                               [309.35,14940]]),
                     columns = ['População(M)','PIB($B)'],
                     index = [1995,2005,2015])
USData

Unnamed: 0,População(M),PIB($B)
1995,249.62,8900.0
2005,282.16,12680.0
2015,309.35,14940.0


In [6]:
ChinaData = pd.DataFrame(np.array([[1133.68, 390.28],
                                  [1266.83,1198.48],
                                  [1339.72, 9923.47]]),
                        columns = ['População(M)','PIB($B)'],
                        index = [1995,2005,2015])
ChinaData

Unnamed: 0,População(M),PIB($B)
1995,1133.68,390.28
2005,1266.83,1198.48
2015,1339.72,9923.47


In [7]:
US_ChinaData = {'US': USData,
               'China': ChinaData}
US_ChinaData

{'US':       População(M)  PIB($B)
 1995        249.62   8900.0
 2005        282.16  12680.0
 2015        309.35  14940.0, 'China':       População(M)  PIB($B)
 1995       1133.68   390.28
 2005       1266.83  1198.48
 2015       1339.72  9923.47}

In [8]:
pd.Panel(US_ChinaData)

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 3 (major_axis) x 2 (minor_axis)
Items axis: US to China
Major_axis axis: 1995 to 2015
Minor_axis axis: População(M) to PIB($B)

In [9]:
type(pd.Panel(US_ChinaData))

pandas.core.panel.Panel

### Convertendo DataFrame para Panel

In [10]:
mIdx = pd.MultiIndex(levels = [['US', 'China'], [1995,2005, 2015]],
                    labels = [[1,1,1,0,0,0], [0,1,2,0,1,2]])
mIdx

MultiIndex(levels=[['US', 'China'], [1995, 2005, 2015]],
           labels=[[1, 1, 1, 0, 0, 0], [0, 1, 2, 0, 1, 2]])

In [11]:
ChinaUSDF = pd.DataFrame({'População(M)' : [1133.68, 1266.83, 1339.72, 249.62, 282.16,309.35], 
                          'PIB($B)': [390.28, 1198.48, 6988.47, 8900,12680,14940]},
                        index = mIdx)
ChinaUSDF

Unnamed: 0,Unnamed: 1,População(M),PIB($B)
China,1995,1133.68,390.28
China,2005,1266.83,1198.48
China,2015,1339.72,6988.47
US,1995,249.62,8900.0
US,2005,282.16,12680.0
US,2015,309.35,14940.0


In [12]:
ChinaUSDF.to_panel()

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 2 (major_axis) x 3 (minor_axis)
Items axis: População(M) to PIB($B)
Major_axis axis: China to US
Minor_axis axis: 1995 to 2015

In [13]:
!pip install xarray



You are using pip version 18.0, however version 19.0.1 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [14]:
ChinaUSDF.to_xarray()

<xarray.Dataset>
Dimensions:       (level_0: 2, level_1: 3)
Coordinates:
  * level_0       (level_0) object 'US' 'China'
  * level_1       (level_1) int64 1995 2005 2015
Data variables:
    População(M)  (level_0, level_1) float64 249.6 282.2 ... 1.267e+03 1.34e+03
    PIB($B)       (level_0, level_1) float64 8.9e+03 1.268e+04 ... 6.988e+03