https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#dsintro  
https://numpy.org/devdocs/user/quickstart.html

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## numpy

In [11]:
a = np.array([0, 1, 2, 7.43])

In [12]:
print('a.ndim =', a.ndim)
print('a.shape =', a.shape)
print('a.size =', a.size)
print('a.dtype =', a.dtype)
print('a.itemsize =', a.itemsize)
print('a.data =', a.data)
print('a =', a)

a.ndim = 1
a.shape = (4,)
a.size = 4
a.dtype = float64
a.itemsize = 8
a.data = <memory at 0x00000223229C2400>
a = [0.   1.   2.   7.43]


In [13]:
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [14]:
np.zeros(20)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [15]:
np.empty(5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [19]:
np.arange(1, 10, 0.5)

array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. ,
       7.5, 8. , 8.5, 9. , 9.5])

In [20]:
np.linspace(0, 2, 9)

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

* basic operations apply element-wise like R
* matrix multiplication: use the `@` operator, e.g., `a @ b`
> Not a Number (NaN), positive infinity and negative infinity evaluate to True because these are not equal to zero

### NumPy Functions and Methods Overview
https://numpy.org/devdocs/user/quickstart.html#functions-and-methods-overview

## pandas
> Here is a basic tenet to keep in mind: data alignment is intrinsic. The link between labels and data will not be broken unless done so explicitly by you.

### Series

* 1-dimensional
* labelled
* array
* like an R atomic vector
* index is like rownames or positional names in R
* uses `NaN` like R's `NA`
* `s.to_numpy()` converts to an `ndarray`
* they are `ndarray`-like in some respects and dictionary-like in others

In [24]:
pluto = pd.read_csv('pluto20rows.csv')

In [87]:
pluto.index.to_list()

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [88]:
pluto.columns

Index(['borough', 'block', 'lot', 'bbl', 'numfloors', 'yearbuilt', 'bldgclass',
       'lotarea', 'bldgarea', 'xcoord', 'ycoord', 'count'],
      dtype='object')

In [89]:
pluto['count'] = 1

In [91]:
g = pluto.groupby(['yearbuilt', 'borough'])

In [92]:
type(g)
print(g)
print(g.count())
print(g.count().index)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000223257F3340>
                   block  lot  bbl  numfloors  bldgclass  lotarea  bldgarea  \
yearbuilt borough                                                             
0         BK           9    9    9          9          9        9         9   
          BX           3    3    3          3          3        3         3   
          QN           3    3    3          3          3        3         3   
1899      MN           1    1    1          1          1        1         1   
1920      BK           1    1    1          1          1        1         1   
1928      MN           1    1    1          1          1        1         1   
1931      BK           1    1    1          1          1        1         1   
1965      BK           1    1    1          1          1        1         1   

                   xcoord  ycoord  count  
yearbuilt borough                         
0         BK            9       9      9  
    

In [103]:
gc = g['count'].sum()

In [108]:
pluto.groupby(['yearbuilt', 'numfloors']).count()['count'].reset_index()

Unnamed: 0,yearbuilt,numfloors,count
0,0,0,15
1,1899,3,1
2,1920,2,1
3,1928,5,1
4,1931,2,1
5,1965,3,1


In [109]:
print(gc)

yearbuilt  borough
0          BK         9
           BX         3
           QN         3
1899       MN         1
1920       BK         1
1928       MN         1
1931       BK         1
1965       BK         1
Name: count, dtype: int64


In [112]:
gcn = gc.index.to_numpy()

In [113]:
gcn.shape

(8,)

In [114]:
gcn.dtype

dtype('O')

In [115]:
type(gcn[0])

tuple

In [116]:
type(gc.index)

pandas.core.indexes.multi.MultiIndex

In [117]:
gci = gc.index

In [118]:
type(gci)

pandas.core.indexes.multi.MultiIndex

In [119]:
gci

MultiIndex([(   0, 'BK'),
            (   0, 'BX'),
            (   0, 'QN'),
            (1899, 'MN'),
            (1920, 'BK'),
            (1928, 'MN'),
            (1931, 'BK'),
            (1965, 'BK')],
           names=['yearbuilt', 'borough'])

In [120]:
gci.levels

FrozenList([[0, 1899, 1920, 1928, 1931, 1965], ['BK', 'BX', 'MN', 'QN']])

In [121]:
type(gci.levels)

pandas.core.indexes.frozen.FrozenList

In [122]:
gci.codes

FrozenList([[0, 0, 0, 1, 2, 3, 4, 5], [0, 1, 3, 2, 0, 2, 0, 0]])

In [123]:
gci.names

FrozenList(['yearbuilt', 'borough'])

In [124]:
gci.nlevels

2

In [125]:
gci.levshape

(6, 4)

In [126]:
gcif = gci.to_frame()

In [127]:
gcif.shape

(8, 2)

In [128]:
gcif.shape[0]

8

In [129]:
gcif.index = np.arange(0, gcif.shape[0])

In [130]:
gcif

Unnamed: 0,yearbuilt,borough
0,0,BK
1,0,BX
2,0,QN
3,1899,MN
4,1920,BK
5,1928,MN
6,1931,BK
7,1965,BK


In [131]:
g.aggregate(np.sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,block,lot,bbl,numfloors,lotarea,bldgarea,xcoord,ycoord,count
yearbuilt,borough,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,BK,29888,757,27298880757,0,11607,0,8932252,1625406,9
0,BX,10484,57,6104840057,0,4016,0,3046287,744350,3
0,QN,18313,78,12183130078,0,6992,0,3084298,621192,3
1899,MN,1949,50,1019490050,3,1110,3360,996992,234157,1
1920,BK,6436,44,3064360044,2,1651,2508,982473,158966,1
1928,MN,1472,33,1014720033,5,2554,9000,997599,220113,1
1931,BK,6746,110,3067460110,2,1155,1870,996091,164469,1
1965,BK,6372,69,3063720069,3,1837,2821,984030,159620,1
