# Method Chaining with NumPy and Intro to Pandas
If you want to type along with me, use [this notebook](https://humboldt.cloudbank.2i2c.cloud/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2Fbethanyj0%2Fdata271_sp24&branch=main&urlpath=tree%2Fdata271_sp24%2Fdemos%2Fdata271_demo11_live.ipynb) instead. 
If you don't want to type and want to follow along just by executing the cells, stay in this notebook. 

In [1]:
# Whenever you want to use numpy import it with the following code
import numpy as np

### NumPy Methods

In [23]:
arr = np.random.randint(1,21,15)
arr

array([ 8,  7, 13, 13,  4,  5, 13, 20, 11,  5,  4, 13, 19,  9, 17])

In [146]:
# find the max
arr.max()

20

In [147]:
# find the min
arr.min()

4

In [148]:
# find the index of the maximum
arr.argmax()

7

In [149]:
# find the index of the maximum
arr.argmin()

4

In [150]:
# find the mean
arr.mean()

10.733333333333333

In [151]:
# find the standard deviation
arr.std()

5.131168374638362

In [152]:
# add everything up
arr.sum()

161

In [153]:
# get cumulative sum element by element
arr.cumsum()

array([  8,  15,  28,  41,  45,  50,  63,  83,  94,  99, 103, 116, 135,
       144, 161])

### NumPy Methods on 2d arrays

In [154]:
arr2d = np.random.randint(1,20,(4,5))
arr2d

array([[ 8, 11,  1, 16,  9],
       [17,  5, 13, 18, 17],
       [12,  6,  8, 12, 17],
       [12, 12,  6, 16,  7]])

In [155]:
# get max
arr2d.max()

18

In [156]:
# get max along axis of each column
arr2d.max(axis=0)

array([17, 12, 13, 18, 17])

In [157]:
# get max of each row
arr2d.max(axis=1)

array([16, 18, 17, 16])

In [158]:
# get the index of the maximum element
arr2d.argmax()

8

In [159]:
# get the index of the maximum element in each column
arr2d.argmax(axis=0)

array([1, 3, 1, 1, 1])

In [46]:
# get the index of the maximum element in each row
arr2d.argmax(axis=1)

array([2, 0, 2, 4])

### Method chaining

In [53]:
# start by reshaping
arr2d.reshape((2,10))

array([[ 6, 11, 13, 10, 13, 19,  6, 18, 11,  4],
       [ 4,  3, 16,  7, 11,  3, 14,  4,  3, 19]])

In [54]:
# chain: reshape and then take the max of each column
arr2d.reshape((2,10)).max(axis=0)

array([ 6, 11, 16, 10, 13, 19, 14, 18, 11, 19])

In [57]:
# chain: reshape and then take the max of each column then get the average of those
arr2d.reshape((2,10)).max(axis=0).mean()

13.7

In [160]:
# when your chains start getting long 
(arr2d.reshape((2,10))
 .max(axis=0)
 .mean())

14.1

In [161]:
# reminder that none of this changes the original array
arr2d

array([[ 8, 11,  1, 16,  9],
       [17,  5, 13, 18, 17],
       [12,  6,  8, 12, 17],
       [12, 12,  6, 16,  7]])

## Pandas

In [62]:
# whenever we want to use Pandas
import pandas as pd

In [177]:
evens_array = np.arange(1,20,2)
evens_array

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19])

In [178]:
evens_series = pd.Series(evens_array,name = "even numbers")
evens_series

0     1
1     3
2     5
3     7
4     9
5    11
6    13
7    15
8    17
9    19
Name: even numbers, dtype: int64

In [126]:
lst = ['data','science','math']

In [127]:
pd.Series(lst)

0       data
1    science
2       math
dtype: object

In [170]:
tup = (2,3,5)
tup_series = pd.Series(tup)

In [172]:
tup_series.index = ['two','three','five']
tup_series

two      2
three    3
five     5
dtype: int64

In [173]:
tup_series['two']

2

### Series attributes

In [174]:
evens_series.dtype

dtype('int64')

In [175]:
evens_series.shape

(10,)

In [179]:
evens_series.index

RangeIndex(start=0, stop=10, step=1)

In [180]:
evens_series.values

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19])

In [181]:
my_dict = {'fruit':['apple','banana','orange'],
          'color':['red','yellow','orange'],
          'yum score':[5,5,5],
          'in fridge':[True, False, True],
          'number':[3,4,0]}

In [182]:
fruit_df = pd.DataFrame(my_dict)
fruit_df

Unnamed: 0,fruit,color,yum score,in fridge,number
0,apple,red,5,True,3
1,banana,yellow,5,False,4
2,orange,orange,5,True,0


In [139]:
list_of_tups = [(i,i**2,i**3) for i in range(10)]
list_of_tups

[(0, 0, 0),
 (1, 1, 1),
 (2, 4, 8),
 (3, 9, 27),
 (4, 16, 64),
 (5, 25, 125),
 (6, 36, 216),
 (7, 49, 343),
 (8, 64, 512),
 (9, 81, 729)]

In [145]:
squares_and_cubes = pd.DataFrame(list_of_tups,columns = ['n','squared','cubed'])
squares_and_cubes

Unnamed: 0,n,squared,cubed
0,0,0,0
1,1,1,1
2,2,4,8
3,3,9,27
4,4,16,64
5,5,25,125
6,6,36,216
7,7,49,343
8,8,64,512
9,9,81,729


### Dictionary attributes

In [106]:
fruit_df.dtypes

fruit        object
color        object
yum score     int64
in fridge      bool
number        int64
dtype: object

In [110]:
fruit_df.shape

(3, 5)

In [107]:
fruit_df.index

RangeIndex(start=0, stop=3, step=1)

In [108]:
fruit_df.values

array([['apple', 'red', 5, True, 3],
       ['banana', 'yellow', 5, False, 4],
       ['orange', 'orange', 5, True, 0]], dtype=object)

In [109]:
fruit_df.columns

Index(['fruit', 'color', 'yum score', 'in fridge', 'number'], dtype='object')

In [111]:
fruit_df.empty

False

In [125]:
fruit_df.set_index('fruit')

Unnamed: 0_level_0,color,yum score,in fridge,number
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
apple,red,5,True,3
banana,yellow,5,False,4
orange,orange,5,True,0


In [135]:
fruit_df['color']

0       red
1    yellow
2    orange
Name: color, dtype: object