http://swcarpentry.github.io/python-novice-inflammation/02-numpy/index.html

In [1]:
 import os # library() in R, loads the package

In [2]:
help(os)

Help on module os:

NAME
    os - OS routines for NT or Posix depending on what system we're on.

DESCRIPTION
    This exports:
      - all functions from posix or nt, e.g. unlink, stat, etc.
      - os.path is either posixpath or ntpath
      - os.name is either 'posix' or 'nt'
      - os.curdir is a string representing the current directory (always '.')
      - os.pardir is a string representing the parent directory (always '..')
      - os.sep is the (or a most common) pathname separator ('/' or '\\')
      - os.extsep is the extension separator (always '.')
      - os.altsep is the alternate pathname separator (None or '/')
      - os.pathsep is the component separator used in $PATH etc
      - os.linesep is the line separator in text files ('\r' or '\n' or '\r\n')
      - os.defpath is the default search path for executables
      - os.devnull is the file path of the null device ('/dev/null', etc.)
    
    Programs that import and use 'os' stand a better chance of being
    porta

In [3]:
import numpy

In [4]:
help(numpy)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [5]:
import numpy as np # abbreviates the name of the library / package

In [6]:
help(np.loadtxt)

Help on function loadtxt in module numpy:

loadtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None)
    Load data from a text file.
    
    Each row in the text file must have the same number of values.
    
    Parameters
    ----------
    fname : file, str, or pathlib.Path
        File, filename, or generator to read.  If the filename extension is
        ``.gz`` or ``.bz2``, the file is first decompressed. Note that
        generators should return byte strings for Python 3k.
    dtype : data-type, optional
        Data-type of the resulting array; default: float.  If this is a
        structured data-type, the resulting array will be 1-dimensional, and
        each row will be interpreted as an element of the array.  In this
        case, the number of columns used must match the number of fields in
        the data-type.
    comments : str or sequence of str, optional
  

In [9]:
data = np.loadtxt(fname = "../data/inflammation-01.csv", delimiter = ",")

In [10]:
data.shape # dim(data) in R

(60, 40)

In [11]:
type(data) # data.frame() in R

numpy.ndarray

In [12]:
data[0, 0] # starts at 0

0.0

In [13]:
data[100, 100]

IndexError: index 100 is out of bounds for axis 0 with size 60

In [14]:
data[59, 39] # last obs, remember counting starts at 0

0.0

In [16]:
data[:, 10] # all of column 10

array([ 3., 10.,  5., 10.,  4.,  7.,  6.,  7.,  2.,  6.,  5.,  2.,  9.,
        5.,  3.,  7.,  3.,  4.,  4.,  3.,  2.,  7.,  2.,  9.,  7.,  3.,
       10.,  4.,  4.,  4.,  2., 10.,  6.,  9.,  4.,  8.,  3.,  5., 10.,
        2.,  6.,  2.,  9.,  5.,  9.,  5.,  7., 10.,  6., 10., 10.,  8.,
       10.,  4.,  9.,  2.,  5.,  6.,  9.,  9.])

In [19]:
data[0:59,].shape # all but the last row

(59, 40)

In [18]:
data[0:1, 10:20].shape # does not include final index

(1, 10)

In [21]:
data[10:10, 10:20].shape # not selecting any rows

(0, 10)

In [22]:
np.mean(data) # gives overall mean

6.14875

In [23]:
np.mean(data, axis = 0) # generates mean for each column

array([ 0.        ,  0.45      ,  1.11666667,  1.75      ,  2.43333333,
        3.15      ,  3.8       ,  3.88333333,  5.23333333,  5.51666667,
        5.95      ,  5.9       ,  8.35      ,  7.73333333,  8.36666667,
        9.5       ,  9.58333333, 10.63333333, 11.56666667, 12.35      ,
       13.25      , 11.96666667, 11.03333333, 10.16666667, 10.        ,
        8.66666667,  9.15      ,  7.25      ,  7.33333333,  6.58333333,
        6.06666667,  5.95      ,  5.11666667,  3.6       ,  3.3       ,
        3.56666667,  2.48333333,  1.5       ,  1.13333333,  0.56666667])

In [24]:
np.mean(data, axis = 0).shape #one dimentional array

(40,)

In [25]:
np.mean(data, axis = 1).shape # means over each row

(60,)

In [26]:
import time
print(time.ctime()) # sys.time() in R provides today's date and time

Mon Jan  6 09:53:16 2020


In [27]:
print(time.ctime) # need to call the funciton with () to execute it

<built-in function ctime>


In [28]:
maxval, minval, stdval = np.max(data), np.min(data), np.std(data)
print("maximum inflamation:", maxval)
print("minimum inflamation:", minval)
print("standard deviation:", stdval)

maximum inflamation: 20.0
minimum inflamation: 0.0
standard deviation: 4.613833197118566


In [29]:
# rows are patients and columns are observations over time
np.max(data, axis = 1) # max inflamation for each patient

array([18., 18., 19., 17., 17., 18., 17., 20., 17., 18., 18., 18., 17.,
       16., 17., 18., 19., 19., 17., 19., 19., 16., 17., 15., 17., 17.,
       18., 17., 20., 17., 16., 19., 15., 15., 19., 17., 16., 17., 19.,
       16., 18., 19., 16., 19., 18., 16., 19., 15., 16., 18., 14., 20.,
       17., 15., 17., 16., 17., 19., 18., 18.])

In [30]:
per_patient_max_inflammation = np.mean(data, axis = 1)
per_patient_max_inflammation.shape

(60,)

In [34]:
per_day_max_inflammation = np.max(data, axis = 0)
per_day_max_inflammation.shape

(40,)

In [35]:
# slicing a string
element = "oxygen"
print("first three characters", element[0:3]) # like substring() in R
print("last three characters", element[3:6])

first three characters oxy
last three characters gen


In [36]:
element[:4] # osyg 0, 1, 2, 3

'oxyg'

In [37]:
element[4:] # en 4, 5

'en'

In [38]:
len(element) # length of the variable

6

In [39]:
element[4:4]

''

In [40]:
element[-1] # gives you the last character

'n'

In [41]:
element[-2]

'e'

In [42]:
element[-6]

'o'

In [43]:
element[-7]

IndexError: string index out of range

In [44]:
element[1:-1] # element x, len + -1 = 5
# everything except the first and last characters

'xyge'

In [45]:
element[3:3] # last index - first index = length of result = 0

''

In [46]:
len(element[3:3])

0

In [47]:
data[3:3, 4:4]

array([], shape=(0, 0), dtype=float64)

In [50]:
data[3:3, :]

array([], shape=(0, 40), dtype=float64)

In [53]:
A = np.array([[1, 2, 3], [4, 5, 6] , [7, 8, 9]]) #list in a list
print("A = ")
print(A)

A = 
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [54]:
B = np.hstack([A, A]) # horizontal stacking
print("B = ")
print(B)

B = 
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]
 [7 8 9 7 8 9]]


In [55]:
C = np.vstack([A, A]) # vertical stacking
print("C = ")
print(C)

C = 
[[1 2 3]
 [4 5 6]
 [7 8 9]
 [1 2 3]
 [4 5 6]
 [7 8 9]]


In [56]:
A.T # transpose A

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [57]:
AT = A.T
A * AT

array([[ 1,  8, 21],
       [ 8, 25, 48],
       [21, 48, 81]])

In [58]:
data[0, :5].shape

(5,)

In [59]:
data[0:1, :5].shape

(1, 5)

In [63]:
patient_1_days_1_thru_5 = data[0:1, :5]
np.diff(patient_1_days_1_thru_5) # only 4 differences between the 5 days

array([[ 0.,  1.,  2., -2.]])

In [64]:
patient_1_days_1_thru_5

array([[0., 0., 1., 3., 1.]])

In [65]:
data.shape

(60, 40)

In [69]:
#What is the shape of np.diff(data, axis = 1)? # 40 - 1 = 39
np.diff(data, axis = 1).shape

(60, 39)

In [72]:
np.diff(data).shape

(59, 40)

In [77]:
# How would you find the largest change in inflamation for each patient
# 1. Calculate the difference between successive observations?
# 2. Find the max over those differences?
np.max(np.abs(np.diff(data, axis = 1)), axis = 1)

array([12., 14., 11., 13., 11., 13., 10., 12., 10., 10., 10., 12., 13.,
       10., 11., 10., 12., 13.,  9., 10., 13.,  9., 12.,  9., 12., 11.,
       10., 13.,  9., 13., 11., 11.,  8., 11., 12., 13.,  9., 10., 13.,
       11., 11., 13., 11., 13., 13., 10.,  9., 10., 10.,  9.,  9., 13.,
       10.,  9., 10., 11., 13., 10., 10., 12.])

In [1]:
# 3 dimentional array
test = np.array([
    [[1,2,3], [5,6,7]],
    [[8,9,0], [11, 22, 33]],
    [[44,55,66],[77,88,99]]
])
test
# increase the axis = number

NameError: name 'np' is not defined