# Crash Course
Brief overviews and snippets for important to understand frameworks.

## NumPy

### Imports

In [2]:
import numpy as np

### Creating Arrays (Basics)

In [13]:
np.array([1,2,3])

array([1, 2, 3])

In [12]:
np.array([[1,2,3],[4,5,6],[7,8,9]])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [9]:
np.arange(0,11,2)

array([ 0,  2,  4,  6,  8, 10])

In [14]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [15]:
np.ones((5,5)) + 4

array([[5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.]])

In [17]:
# Evenly spaced numbers between X and Y.
np.linspace(0,10,3)

array([ 0.,  5., 10.])

In [18]:
# Identity / masking matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### Creating Arrays (Continued)

In [22]:
np.random.rand(4,4)

array([[0.46998373, 0.93273372, 0.4757758 , 0.35290374],
       [0.95397388, 0.94784292, 0.42507932, 0.75661024],
       [0.2661666 , 0.14894174, 0.16837186, 0.63142416],
       [0.72196128, 0.51622524, 0.25171133, 0.21549936]])

In [24]:
np.random.randn(10)

array([ 1.13250281,  1.32082897,  0.23781971, -0.97349032, -0.34602218,
       -0.2701096 , -1.2617592 , -0.73843111,  0.07277288, -1.17029426])

In [27]:
np.random.randint(10, 100, 10)

array([54, 64, 62, 55, 75, 91, 73, 21, 60, 41])

In [34]:
np.random.seed(42)
np.random.rand(4)

array([0.37454012, 0.95071431, 0.73199394, 0.59865848])

In [35]:
np.random.seed(42)
np.random.rand(4)

array([0.37454012, 0.95071431, 0.73199394, 0.59865848])

In [36]:
arr = np.arange(25)

### Useful Functions

In [None]:
# Get the max value in the array.
arr.max()
# Get the index of the max value.
arr.argmax()

### Selection

In [41]:
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [39]:
arr[5]

5

In [40]:
# Slice (points to original array)
arr[1:5]

array([1, 2, 3, 4])

In [42]:
# Broadcasting
arr + 100 # New array (not-in-place)

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

In [45]:
arr = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [47]:
arr[1][1]

5

In [48]:
arr[1,1]

5

In [49]:
arr[:,2:]

array([[3],
       [6],
       [9]])

### Conditional Selection

In [51]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [52]:
arr > 4

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [53]:
arr[arr > 4]

array([ 5,  6,  7,  8,  9, 10])

### Operations

In [54]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [55]:
arr + arr

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [56]:
np.sin(arr)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427,
       -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111])

In [57]:
arr.sum()

55

In [58]:
arr.mean()

5.5

In [61]:
arr_2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr_2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [62]:
# Sum ACROSS the rows.
arr_2d.sum(axis=0)

array([12, 15, 18])

In [63]:
# Sum ACROSS the columns.
arr_2d.sum(axis=1)

array([ 6, 15, 24])

## Pandas

### Imports

In [64]:
import pandas as pd

### Series

In [68]:
series = pd.Series(data=[4,5,6], index=['One', 'Two', 'Three'])
series

One      4
Two      5
Three    6
dtype: int64

In [69]:
series['One']

4

In [70]:
series + series

One       8
Two      10
Three    12
dtype: int64

### DataFrames

In [73]:
random_matrix = np.random.randn(5,4)
random_matrix

array([[-0.54438272,  0.11092259, -1.15099358,  0.37569802],
       [-0.60063869, -0.29169375, -0.60170661,  1.85227818],
       [-0.01349722, -1.05771093,  0.82254491, -1.22084365],
       [ 0.2088636 , -1.95967012, -1.32818605,  0.19686124],
       [ 0.73846658,  0.17136828, -0.11564828, -0.3011037 ]])

In [75]:
df = pd.DataFrame(data=random_matrix)
df

Unnamed: 0,0,1,2,3
0,-0.544383,0.110923,-1.150994,0.375698
1,-0.600639,-0.291694,-0.601707,1.852278
2,-0.013497,-1.057711,0.822545,-1.220844
3,0.208864,-1.95967,-1.328186,0.196861
4,0.738467,0.171368,-0.115648,-0.301104


In [76]:
df[[0,1]]

Unnamed: 0,0,1
0,-0.544383,0.110923
1,-0.600639,-0.291694
2,-0.013497,-1.057711
3,0.208864,-1.95967
4,0.738467,0.171368


In [79]:
df['new_column'] = df[0] + df[1]
df

Unnamed: 0,0,1,2,3,new_column
0,-0.544383,0.110923,-1.150994,0.375698,-0.43346
1,-0.600639,-0.291694,-0.601707,1.852278,-0.892332
2,-0.013497,-1.057711,0.822545,-1.220844,-1.071208
3,0.208864,-1.95967,-1.328186,0.196861,-1.750807
4,0.738467,0.171368,-0.115648,-0.301104,0.909835


In [80]:
df.drop('new_column', axis=1)

Unnamed: 0,0,1,2,3
0,-0.544383,0.110923,-1.150994,0.375698
1,-0.600639,-0.291694,-0.601707,1.852278
2,-0.013497,-1.057711,0.822545,-1.220844
3,0.208864,-1.95967,-1.328186,0.196861
4,0.738467,0.171368,-0.115648,-0.301104


In [84]:
df.iloc[[1,2]]

Unnamed: 0,0,1,2,3,new_column
1,-0.600639,-0.291694,-0.601707,1.852278,-0.892332
2,-0.013497,-1.057711,0.822545,-1.220844,-1.071208


### Conditional Selection

In [85]:
df > 0

Unnamed: 0,0,1,2,3,new_column
0,False,True,False,True,False
1,False,False,False,True,False
2,False,False,True,False,False
3,True,False,False,True,False
4,True,True,False,False,True


In [86]:
df[df > 0]

Unnamed: 0,0,1,2,3,new_column
0,,0.110923,,0.375698,
1,,,,1.852278,
2,,,0.822545,,
3,0.208864,,,0.196861,
4,0.738467,0.171368,,,0.909835


In [87]:
df[df[1] > 0]

Unnamed: 0,0,1,2,3,new_column
0,-0.544383,0.110923,-1.150994,0.375698,-0.43346
4,0.738467,0.171368,-0.115648,-0.301104,0.909835


In [89]:
# To use multiple conditions, use &, |. Not 'and' and 'or'.
df[(df[1] > 0) & (df[0] > 0)]

Unnamed: 0,0,1,2,3,new_column
4,0.738467,0.171368,-0.115648,-0.301104,0.909835


### Additional 

In [90]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
0             5 non-null float64
1             5 non-null float64
2             5 non-null float64
3             5 non-null float64
new_column    5 non-null float64
dtypes: float64(5)
memory usage: 280.0 bytes


In [92]:
df[0].value_counts()

-0.600639    1
 0.738467    1
-0.544383    1
-0.013497    1
 0.208864    1
Name: 0, dtype: int64

In [94]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
0,5.0,-0.042238,0.556179,-0.600639,-0.544383,-0.013497,0.208864,0.738467
1,5.0,-0.605357,0.901877,-1.95967,-1.057711,-0.291694,0.110923,0.171368
2,5.0,-0.474798,0.868001,-1.328186,-1.150994,-0.601707,-0.115648,0.822545
3,5.0,0.180578,1.121262,-1.220844,-0.301104,0.196861,0.375698,1.852278
new_column,5.0,-0.647594,0.990942,-1.750807,-1.071208,-0.892332,-0.43346,0.909835


### Operations

In [96]:
df[0].unique()

array([-0.54438272, -0.60063869, -0.01349722,  0.2088636 ,  0.73846658])

In [97]:
df[0].nunique()

5

In [98]:
df[0].value_counts()

-0.600639    1
 0.738467    1
-0.544383    1
-0.013497    1
 0.208864    1
Name: 0, dtype: int64

In [99]:
def times_x(number, times):
    return number * times

df['times'] = df[0].apply(lambda v: times_x(v, 2))
df

Unnamed: 0,0,1,2,3,new_column,times
0,-0.544383,0.110923,-1.150994,0.375698,-0.43346,-1.088765
1,-0.600639,-0.291694,-0.601707,1.852278,-0.892332,-1.201277
2,-0.013497,-1.057711,0.822545,-1.220844,-1.071208,-0.026994
3,0.208864,-1.95967,-1.328186,0.196861,-1.750807,0.417727
4,0.738467,0.171368,-0.115648,-0.301104,0.909835,1.476933


In [100]:
df.columns

Index([0, 1, 2, 3, 'new_column', 'times'], dtype='object')

In [101]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [105]:
column_index = 0
df.sort_values(by=column_index, ascending=False)

Unnamed: 0,0,1,2,3,new_column,times
4,0.738467,0.171368,-0.115648,-0.301104,0.909835,1.476933
3,0.208864,-1.95967,-1.328186,0.196861,-1.750807,0.417727
2,-0.013497,-1.057711,0.822545,-1.220844,-1.071208,-0.026994
0,-0.544383,0.110923,-1.150994,0.375698,-0.43346,-1.088765
1,-0.600639,-0.291694,-0.601707,1.852278,-0.892332,-1.201277
