# Crash Course
Brief overviews and snippets for important to understand frameworks.

## NumPy

### Imports

In [1]:
import numpy as np

### Creating Arrays (Basics)

In [2]:
np.array([1,2,3])

array([1, 2, 3])

In [3]:
np.array([[1,2,3],[4,5,6],[7,8,9]])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [4]:
np.arange(0,11,2)

array([ 0,  2,  4,  6,  8, 10])

In [5]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [6]:
np.ones((5,5)) + 4

array([[5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.]])

In [7]:
# Evenly spaced numbers between X and Y.
np.linspace(0,10,3)

array([ 0.,  5., 10.])

In [8]:
# Identity / masking matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### Creating Arrays (Continued)

In [9]:
np.random.rand(4,4)

array([[2.97770630e-01, 5.92288967e-01, 2.22138848e-01, 7.25238298e-01],
       [5.78358643e-01, 2.50087906e-01, 7.15910950e-01, 3.87044500e-01],
       [4.34091343e-05, 4.81161874e-01, 3.33693837e-01, 7.81514506e-01],
       [7.72415584e-01, 6.92097319e-01, 9.37535459e-01, 4.20624693e-01]])

In [10]:
np.random.randn(10)

array([-0.61356749, -0.31937312, -0.59461221,  2.33554656,  0.36877517,
        0.68447158, -0.9184899 , -1.47950088, -0.0924706 , -0.01301354])

In [11]:
np.random.randint(10, 100, 10)

array([55, 63, 87, 37, 94, 35, 24, 39, 50, 37])

In [12]:
np.random.seed(42)
np.random.rand(4)

array([0.37454012, 0.95071431, 0.73199394, 0.59865848])

In [13]:
np.random.seed(42)
np.random.rand(4)

array([0.37454012, 0.95071431, 0.73199394, 0.59865848])

In [14]:
arr = np.arange(25)

### Useful Functions

In [15]:
# Get the max value in the array.
arr.max()
# Get the index of the max value.
arr.argmax()

24

### Selection

In [16]:
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [17]:
arr[5]

5

In [18]:
# Slice (points to original array)
arr[1:5]

array([1, 2, 3, 4])

In [19]:
# Broadcasting
arr + 100 # New array (not-in-place)

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

In [20]:
arr = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [21]:
arr[1][1]

5

In [22]:
arr[1,1]

5

In [23]:
arr[:,2:]

array([[3],
       [6],
       [9]])

### Conditional Selection

In [24]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [25]:
arr > 4

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [26]:
arr[arr > 4]

array([ 5,  6,  7,  8,  9, 10])

### Operations

In [27]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [28]:
arr + arr

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [29]:
np.sin(arr)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427,
       -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111])

In [30]:
arr.sum()

55

In [31]:
arr.mean()

5.5

In [32]:
arr_2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr_2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [33]:
# Sum ACROSS the rows.
arr_2d.sum(axis=0)

array([12, 15, 18])

In [34]:
# Sum ACROSS the columns.
arr_2d.sum(axis=1)

array([ 6, 15, 24])

## Pandas

### Imports

In [35]:
import pandas as pd

### Series

In [36]:
series = pd.Series(data=[4,5,6], index=['One', 'Two', 'Three'])
series

One      4
Two      5
Three    6
dtype: int64

In [37]:
series['One']

4

In [38]:
series + series

One       8
Two      10
Three    12
dtype: int64

### DataFrames

In [39]:
random_matrix = np.random.randn(5,4)
random_matrix

array([[-0.23415337, -0.23413696,  1.57921282,  0.76743473],
       [-0.46947439,  0.54256004, -0.46341769, -0.46572975],
       [ 0.24196227, -1.91328024, -1.72491783, -0.56228753],
       [-1.01283112,  0.31424733, -0.90802408, -1.4123037 ],
       [ 1.46564877, -0.2257763 ,  0.0675282 , -1.42474819]])

In [40]:
df = pd.DataFrame(data=random_matrix)
df

Unnamed: 0,0,1,2,3
0,-0.234153,-0.234137,1.579213,0.767435
1,-0.469474,0.54256,-0.463418,-0.46573
2,0.241962,-1.91328,-1.724918,-0.562288
3,-1.012831,0.314247,-0.908024,-1.412304
4,1.465649,-0.225776,0.067528,-1.424748


In [41]:
df[[0,1]]

Unnamed: 0,0,1
0,-0.234153,-0.234137
1,-0.469474,0.54256
2,0.241962,-1.91328
3,-1.012831,0.314247
4,1.465649,-0.225776


In [42]:
df['new_column'] = df[0] + df[1]
df

Unnamed: 0,0,1,2,3,new_column
0,-0.234153,-0.234137,1.579213,0.767435,-0.46829
1,-0.469474,0.54256,-0.463418,-0.46573,0.073086
2,0.241962,-1.91328,-1.724918,-0.562288,-1.671318
3,-1.012831,0.314247,-0.908024,-1.412304,-0.698584
4,1.465649,-0.225776,0.067528,-1.424748,1.239872


In [43]:
df.drop('new_column', axis=1)

Unnamed: 0,0,1,2,3
0,-0.234153,-0.234137,1.579213,0.767435
1,-0.469474,0.54256,-0.463418,-0.46573
2,0.241962,-1.91328,-1.724918,-0.562288
3,-1.012831,0.314247,-0.908024,-1.412304
4,1.465649,-0.225776,0.067528,-1.424748


In [44]:
df.iloc[[1,2]]

Unnamed: 0,0,1,2,3,new_column
1,-0.469474,0.54256,-0.463418,-0.46573,0.073086
2,0.241962,-1.91328,-1.724918,-0.562288,-1.671318


### Conditional Selection

In [45]:
df > 0

Unnamed: 0,0,1,2,3,new_column
0,False,False,True,True,False
1,False,True,False,False,True
2,True,False,False,False,False
3,False,True,False,False,False
4,True,False,True,False,True


In [46]:
df[df > 0]

Unnamed: 0,0,1,2,3,new_column
0,,,1.579213,0.767435,
1,,0.54256,,,0.073086
2,0.241962,,,,
3,,0.314247,,,
4,1.465649,,0.067528,,1.239872


In [47]:
df[df[1] > 0]

Unnamed: 0,0,1,2,3,new_column
1,-0.469474,0.54256,-0.463418,-0.46573,0.073086
3,-1.012831,0.314247,-0.908024,-1.412304,-0.698584


In [48]:
# To use multiple conditions, use &, |. Not 'and' and 'or'.
df[(df[1] > 0) & (df[0] > 0)]

Unnamed: 0,0,1,2,3,new_column


### Additional 

In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   0           5 non-null      float64
 1   1           5 non-null      float64
 2   2           5 non-null      float64
 3   3           5 non-null      float64
 4   new_column  5 non-null      float64
dtypes: float64(5)
memory usage: 328.0 bytes


In [50]:
df[0].value_counts()

-0.234153    1
-0.469474    1
 0.241962    1
-1.012831    1
 1.465649    1
Name: 0, dtype: int64

In [51]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
0,5.0,-0.00177,0.936445,-1.012831,-0.469474,-0.234153,0.241962,1.465649
1,5.0,-0.303277,0.961727,-1.91328,-0.234137,-0.225776,0.314247,0.54256
2,5.0,-0.289924,1.23418,-1.724918,-0.908024,-0.463418,0.067528,1.579213
3,5.0,-0.619527,0.898258,-1.424748,-1.412304,-0.562288,-0.46573,0.767435
new_column,5.0,-0.305047,1.06981,-1.671318,-0.698584,-0.46829,0.073086,1.239872


### Operations

In [52]:
df[0].unique()

array([-0.23415337, -0.46947439,  0.24196227, -1.01283112,  1.46564877])

In [53]:
df[0].nunique()

5

In [54]:
df[0].value_counts()

-0.234153    1
-0.469474    1
 0.241962    1
-1.012831    1
 1.465649    1
Name: 0, dtype: int64

In [55]:
def times_x(number, times):
    return number * times

df['times'] = df[0].apply(lambda v: times_x(v, 2))
df

Unnamed: 0,0,1,2,3,new_column,times
0,-0.234153,-0.234137,1.579213,0.767435,-0.46829,-0.468307
1,-0.469474,0.54256,-0.463418,-0.46573,0.073086,-0.938949
2,0.241962,-1.91328,-1.724918,-0.562288,-1.671318,0.483925
3,-1.012831,0.314247,-0.908024,-1.412304,-0.698584,-2.025662
4,1.465649,-0.225776,0.067528,-1.424748,1.239872,2.931298


In [56]:
df.columns

Index([0, 1, 2, 3, 'new_column', 'times'], dtype='object')

In [57]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [58]:
column_index = 0
df.sort_values(by=column_index, ascending=False)

Unnamed: 0,0,1,2,3,new_column,times
4,1.465649,-0.225776,0.067528,-1.424748,1.239872,2.931298
2,0.241962,-1.91328,-1.724918,-0.562288,-1.671318,0.483925
0,-0.234153,-0.234137,1.579213,0.767435,-0.46829,-0.468307
1,-0.469474,0.54256,-0.463418,-0.46573,0.073086,-0.938949
3,-1.012831,0.314247,-0.908024,-1.412304,-0.698584,-2.025662
