# Numpy

In [1]:
# now apply it to ndarray object
import numpy as np

dt = np.dtype([('age', np.int8)])
a = np.array([(10,), (20,), (30,)])
a

In [4]:
# file name can be used to access content of age column
dt = np.dtype([('age', np.int8)])
a = np.array([(10,), (20,), (30,)], dtype = dt)
a['age']

array([10, 20, 30], dtype=int8)

In [6]:
# dtype
student = np.dtype([('name', 'S20'), ('age', 'i1'), ('marks', 'f4')])
student

dtype([('name', 'S20'), ('age', 'i1'), ('marks', '<f4')])

In [7]:
# array dimensions
# an array of enenly spaced numbers 
a = np.arange(24)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [8]:
a.ndim

1

In [9]:
b = a.reshape(2, 4, 3)
b

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]],

       [[12, 13, 14],
        [15, 16, 17],
        [18, 19, 20],
        [21, 22, 23]]])

In [10]:
c = b.reshape(2, 3, 4)
c

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [11]:
c.itemsize

4

In [13]:
# empty()
x = np.empty([3, 2], dtype = int)
x

array([[4325456, 4456543],
       [7602273,      97],
       [5177416, 4522061]])

In [14]:
# zeros()
x = np.zeros(5)
x

array([0., 0., 0., 0., 0.])

In [17]:
x = np.zeros((5,), dtype = int)
x

array([0, 0, 0, 0, 0])

In [18]:
# ones()
x = np.ones(5)
x

array([1., 1., 1., 1., 1.])

In [19]:
x = np.ones((5,), dtype = int)
x 

array([1, 1, 1, 1, 1])

In [22]:
# frombuffer
s = 'Hello World'.encode()
a = np.frombuffer(s, dtype = 'S1')
a

array([b'H', b'e', b'l', b'l', b'o', b' ', b'W', b'o', b'r', b'l', b'd'],
      dtype='|S1')

In [29]:
# arange()
s = np.arange(10, 20, 30)
s

array([10])

In [24]:
# linspace()
s = np.linspace(10, 20 , 5)
s

array([10. , 12.5, 15. , 17.5, 20. ])

In [26]:
s = np.linspace(10, 20 , 5, endpoint = False)
s

array([10., 12., 14., 16., 18.])

In [27]:
# logspace()
a = np.logspace(1.0, 2.0, num = 10)
a

array([ 10.        ,  12.91549665,  16.68100537,  21.5443469 ,
        27.82559402,  35.93813664,  46.41588834,  59.94842503,
        77.42636827, 100.        ])

In [28]:
a = np.logspace(1, 10, num = 10, base = 2)
a

array([   2.,    4.,    8.,   16.,   32.,   64.,  128.,  256.,  512.,
       1024.])

In [31]:
# slice()
a = np.arange(10)
s = slice(2, 7, 2)
a[s]

array([2, 4, 6])

In [34]:
a = np.arange(10)
s = a[2: 7: 2]
s

array([2, 4, 6])

In [36]:
# slice items staring from index
a = np.arange(10)
a[2:]

array([2, 3, 4, 5, 6, 7, 8, 9])

In [38]:
a = np.array([[1,2,3], [3,4,5], [4,5,6]])
a

array([[1, 2, 3],
       [3, 4, 5],
       [4, 5, 6]])

In [39]:
a[1:]

array([[3, 4, 5],
       [4, 5, 6]])

In [40]:
x = np.array([[1,2], [3,4], [5,6]])
y = x[[0,1,2], [0,1,0]]
y

array([1, 4, 5])

In [41]:
a = np.array([1,2,3,4])
b = np.array([10, 20, 30, 40])
a + b

array([11, 22, 33, 44])

# Pandas

In [42]:
import pandas as pd

In [44]:
# 2차원
data = {'AAA' : [4, 5, 6, 7],
        'BBB' : [10, 20, 30, 40],
        'CCC' : [100, 50, -30, -50]}
df = pd.DataFrame(data, index = ['a', 'b', 'c', 'd'], columns = list(data.keys()))
df                                                                    

Unnamed: 0,AAA,BBB,CCC
a,4,10,100
b,5,20,50
c,6,30,-30
d,7,40,-50


In [47]:
# 3차원
data = {'AA' :  pd.DataFrame(np.random.randn(3, 3)),
        'BB' :  pd.DataFrame(np.random.randn(3, 3))}
data

{'AA':           0         1         2
 0  2.014170  0.366893 -2.618769
 1  0.646555  1.117764  0.259343
 2  0.386645  0.150462 -0.840213,
 'BB':           0         1         2
 0  0.586630 -0.030543  1.921158
 1  0.315853 -2.134161  1.416777
 2 -0.648493 -1.026471 -1.402053}

In [48]:
print(np.random.randn(5))

[-0.32600149 -0.88131791  0.36716672 -1.1141078  -0.23339745]


In [49]:
# Series()
s = pd.Series(np.random.randn(5))
s1 = pd.Series(np.random.randn(5), index = ['a', 'b', 'c', 'd', 'e'])
print(s, type(s))
print(s1, type(s1))

0    0.049264
1    0.860323
2   -0.924535
3    0.634452
4    0.602195
dtype: float64 <class 'pandas.core.series.Series'>
a    0.518055
b   -0.342736
c   -0.637458
d    0.879413
e   -0.865826
dtype: float64 <class 'pandas.core.series.Series'>


In [51]:
d = {'a':1, 'b':2, 'c':3}
s2 = pd.Series(d)
s2

a    1
b    2
c    3
dtype: int64

In [53]:
a = pd.Series(['aaa', 'bbb', 'ccc', 'ddd'], index = ['a', 'b', 'c', 'd'])
print(type(a.values))
print(type(a.index))

<class 'numpy.ndarray'>
<class 'pandas.core.indexes.base.Index'>


In [55]:
#
sr = pd.Series(np.arange(4))
sr1 = pd.Series(np.arange(4), index=[0, 1, 2, 3])
sr2 = pd.Series(np.arange(4), index=['a', 'b', 'c', 'd'])

print(sr.index)
print(sr1.index)
print(sr2.index)

print(sr[0])
print(sr1[0])
print(sr2[0])
print(sr2['a'])

RangeIndex(start=0, stop=4, step=1)
Int64Index([0, 1, 2, 3], dtype='int64')
Index(['a', 'b', 'c', 'd'], dtype='object')
0
0
0
0


In [57]:
s = pd.Series([0, 1, 2, 3, 4])
s1 = pd.Series([0, 1, 2, 3, 4], index = ['a', 'b', 'c', 'd', 'e'])
print(s[0:3])
print(s1['a':'c'])

0    0
1    1
2    2
dtype: int64
a    0
b    1
c    2
dtype: int64


In [61]:
# DataFrame
names = ['Bob', 'Jessica', 'Mary', 'John', 'Mel']
d = pd.DataFrame(data = names, columns=['Names'])
d

Unnamed: 0,Names
0,Bob
1,Jessica
2,Mary
3,John
4,Mel


In [59]:
# zip()
names = ['Bob', 'Jessica', 'Mary', 'John', 'Mel']
births = [968, 155, 77, 578, 973]
BabyDataSet = list(zip(names, births))

df = pd.DataFrame(data = BabyDataSet, columns = ['Names', 'Births'])
df

Unnamed: 0,Names,Births
0,Bob,968
1,Jessica,155
2,Mary,77
3,John,578
4,Mel,973


In [71]:
# dict
data = {'AAA' : [4, 5, 6, 7],
        'BBB' : [10, 20, 30, 40],
        'CCC' : [100, 50, -30, -50]}

df = pd.DataFrame(data, 
                  index = ['a', 'b', 'c', 'd'], 
                  columns=list(data.keys()))
df

Unnamed: 0,AAA,BBB,CCC
a,4,10,100
b,5,20,50
c,6,30,-30
d,7,40,-50


In [73]:
data = {'AAA' : {'a':4, 'b':5, 'c':6, 'd':7},
        'BBB' : {'a':10, 'b':20, 'c':30, 'd':40},
        'CCC' : {'a':100, 'b':50, 'c':-30, 'd':-50}}
df = pd.DataFrame(data)
df

Unnamed: 0,AAA,BBB,CCC
a,4,10,100
b,5,20,50
c,6,30,-30
d,7,40,-50


In [74]:
data = {'AAA' : [4, 5, 6, 7],
        'BBB' : [10, 20, 30, 40],
        'CCC' : [100, 50, -30, -50]}

df = pd.DataFrame(data, 
                  columns=['BBB', 'CCC', 'AAA'])
df

Unnamed: 0,BBB,CCC,AAA
0,10,100,4
1,20,50,5
2,30,-30,6
3,40,-50,7
