**Numpy Tutorial**

**First**: Lists of lists for CSV data

In [215]:
import csv

with open("winequality-red.csv", 'r') as f: # with file open
    wines = list(csv.reader(f, delimiter=";")) # create new csv.reader obj
    # pass the keyword arg delimiter ; instead of default ,
    # call the list type to get all thr rows from the file
    # assign result to wines

In [216]:
print(wines[:2]) # print out first 2 rows

[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality'], ['7.4', '0.7', '0', '1.9', '0.076', '11', '34', '0.9978', '3.51', '0.56', '9.4', '5']]


In [217]:
qualities = [float(item[-1]) for item in wines[1:]]
sum(qualities) / len(qualities)

5.6360225140712945

**Numpy 2-Dimensional Arrays**

In [218]:
import numpy as np

wines = np.array(wines[1:], dtype=np.float)

wines

array([[  7.4  ,   0.7  ,   0.   , ...,   0.56 ,   9.4  ,   5.   ],
       [  7.8  ,   0.88 ,   0.   , ...,   0.68 ,   9.8  ,   5.   ],
       [  7.8  ,   0.76 ,   0.04 , ...,   0.65 ,   9.8  ,   5.   ],
       ..., 
       [  6.3  ,   0.51 ,   0.13 , ...,   0.75 ,  11.   ,   6.   ],
       [  5.9  ,   0.645,   0.12 , ...,   0.71 ,  10.2  ,   5.   ],
       [  6.   ,   0.31 ,   0.47 , ...,   0.66 ,  11.   ,   6.   ]])

In [219]:
wines.shape

(1599, 12)

**Alternative NumPy Array Creation Methods**

In [220]:
empty_array = np.zeros((3,4)) # Zeros
empty_array

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [221]:
np.random.rand(3,4) # Rand

array([[ 0.29692813,  0.87283324,  0.27999192,  0.85596533],
       [ 0.58858463,  0.67627902,  0.4716044 ,  0.18159008],
       [ 0.06754806,  0.40492891,  0.97673273,  0.52435774]])

**Use NumPy to directly read csv (or other files) into array**

In [222]:
wines_orig = wines

In [223]:
wines = np.genfromtxt("winequality-red.csv", delimiter=";", skip_header=1)

In [224]:
wines_orig

array([[  7.4  ,   0.7  ,   0.   , ...,   0.56 ,   9.4  ,   5.   ],
       [  7.8  ,   0.88 ,   0.   , ...,   0.68 ,   9.8  ,   5.   ],
       [  7.8  ,   0.76 ,   0.04 , ...,   0.65 ,   9.8  ,   5.   ],
       ..., 
       [  6.3  ,   0.51 ,   0.13 , ...,   0.75 ,  11.   ,   6.   ],
       [  5.9  ,   0.645,   0.12 , ...,   0.71 ,  10.2  ,   5.   ],
       [  6.   ,   0.31 ,   0.47 , ...,   0.66 ,  11.   ,   6.   ]])

In [225]:
wines

array([[  7.4  ,   0.7  ,   0.   , ...,   0.56 ,   9.4  ,   5.   ],
       [  7.8  ,   0.88 ,   0.   , ...,   0.68 ,   9.8  ,   5.   ],
       [  7.8  ,   0.76 ,   0.04 , ...,   0.65 ,   9.8  ,   5.   ],
       ..., 
       [  6.3  ,   0.51 ,   0.13 , ...,   0.75 ,  11.   ,   6.   ],
       [  5.9  ,   0.645,   0.12 , ...,   0.71 ,  10.2  ,   5.   ],
       [  6.   ,   0.31 ,   0.47 , ...,   0.66 ,  11.   ,   6.   ]])

**Indexing NumPy Arrays**

In [226]:
wines[2,0] # NumPy is zero indexed

7.7999999999999998

In [227]:
wines[2,:3]

array([ 7.8 ,  0.76,  0.04])

In [228]:
wines[1,5] = 10

In [229]:
wines[:,10] = 50

In [230]:
third_wine = wines[3,:]

In [231]:
third_wine

array([ 11.2  ,   0.28 ,   0.56 ,   1.9  ,   0.075,  17.   ,  60.   ,
         0.998,   3.16 ,   0.58 ,  50.   ,   6.   ])

In [232]:
third_wine[0]

11.199999999999999

In [233]:
np.random.rand(3)

array([ 0.0150125 ,  0.89912801,  0.50100068])

In [234]:
earnings = [
            [
                [500,505,490],
                [810,450,678],
                [234,897,430],
                [560,1023,640]
            ],
            [
                [600,605,490],
                [345,900,1000],
                [780,730,710],
                [670,540,324]
            ]
          ]

In [235]:
earnings

[[[500, 505, 490], [810, 450, 678], [234, 897, 430], [560, 1023, 640]],
 [[600, 605, 490], [345, 900, 1000], [780, 730, 710], [670, 540, 324]]]

In [236]:
earnings = np.array(earnings)
earnings[0,0,0]

500

In [237]:
earnings.shape

(2, 4, 3)

In [238]:
earnings[:,0,0]

array([500, 600])

In [239]:
earnings[:,0,:]

array([[500, 505, 490],
       [600, 605, 490]])

In [240]:
wines.dtype

dtype('float64')

In [241]:
int_wines = wines.astype(int)

In [242]:
int_wines


array([[ 7,  0,  0, ...,  0, 50,  5],
       [ 7,  0,  0, ...,  0, 50,  5],
       [ 7,  0,  0, ...,  0, 50,  5],
       ..., 
       [ 6,  0,  0, ...,  0, 50,  6],
       [ 5,  0,  0, ...,  0, 50,  5],
       [ 6,  0,  0, ...,  0, 50,  6]])

In [243]:
int_wines.dtype.name

'int64'

In [244]:
np.int32

numpy.int32

In [245]:
wines.astype(np.int32)

array([[ 7,  0,  0, ...,  0, 50,  5],
       [ 7,  0,  0, ...,  0, 50,  5],
       [ 7,  0,  0, ...,  0, 50,  5],
       ..., 
       [ 6,  0,  0, ...,  0, 50,  6],
       [ 5,  0,  0, ...,  0, 50,  5],
       [ 6,  0,  0, ...,  0, 50,  6]], dtype=int32)

In [246]:
wines[:,11] + 10

array([ 15.,  15.,  15., ...,  16.,  15.,  16.])

In [247]:

wines[:,11] += 10

In [248]:
wines[:,11]

array([ 15.,  15.,  15., ...,  16.,  15.,  16.])

In [249]:
wines[:,11] * 2

array([ 30.,  30.,  30., ...,  32.,  30.,  32.])

In [250]:
wines[:,11] + wines[:,11]

array([ 30.,  30.,  30., ...,  32.,  30.,  32.])

In [251]:
A: (50,3)
B  (3,)

SyntaxError: invalid syntax (<ipython-input-251-532936274191>, line 1)

In [None]:
wines * np.array([1,2])

In [None]:
array_one = np.array(
    [
        [1,2],
        [3,4]
    ]
)
array_two = np.array([4,5])

array_one + array_two

In [None]:
rand_array = np.random.rand(12)
wines + rand_array

In [252]:
wines[:,11].sum()

25002.0

In [257]:
wines.sum(axis=0)

array([ 13303.1    ,    843.985  ,    433.29   ,   4059.55   ,
          139.859  ,  25369.     ,  74302.     ,   1593.79794,
         5294.47   ,   1052.38   ,  79950.     ,  25002.     ])

In [258]:
wines.sum(axis=0).shape

(12,)

In [259]:
wines.sum(axis=1)

array([ 125.1438 ,  158.2548 ,  149.899  , ...,  149.48174,  155.01547,
        141.49249])

In [264]:

wines.mean()

12.056672500521159

In [262]:
wines.std()


19.782235232935108

In [268]:
wines.min()

0.0

In [269]:
wines.max()


289.0

In [277]:

wines[:,11] > 15

array([False, False, False, ...,  True, False,  True], dtype=bool)

In [278]:
wines[:,11] == 10

array([False, False, False, ..., False, False, False], dtype=bool)

In [283]:
high_quality = wines[:,11] > 7
high_quality

array([ True,  True,  True, ...,  True,  True,  True], dtype=bool)

In [284]:
wines[high_quality,:][:3,:]

array([[  7.40000000e+00,   7.00000000e-01,   0.00000000e+00,
          1.90000000e+00,   7.60000000e-02,   1.10000000e+01,
          3.40000000e+01,   9.97800000e-01,   3.51000000e+00,
          5.60000000e-01,   5.00000000e+01,   1.50000000e+01],
       [  7.80000000e+00,   8.80000000e-01,   0.00000000e+00,
          2.60000000e+00,   9.80000000e-02,   1.00000000e+01,
          6.70000000e+01,   9.96800000e-01,   3.20000000e+00,
          6.80000000e-01,   5.00000000e+01,   1.50000000e+01],
       [  7.80000000e+00,   7.60000000e-01,   4.00000000e-02,
          2.30000000e+00,   9.20000000e-02,   1.50000000e+01,
          5.40000000e+01,   9.97000000e-01,   3.26000000e+00,
          6.50000000e-01,   5.00000000e+01,   1.50000000e+01]])

In [285]:
high_quality_and_alcohol = (wines[:,10] > 10) & (wines[:,11] > 7)
wines[high_quality_and_alcohol,10:]

array([[ 50.,  15.],
       [ 50.,  15.],
       [ 50.,  15.],
       ..., 
       [ 50.,  16.],
       [ 50.,  15.],
       [ 50.,  16.]])

In [286]:
np.transpose(wines).shape

(12, 1599)

In [288]:
wines.ravel() # flatten array to 1-dimension

array([  7.4 ,   0.7 ,   0.  , ...,   0.66,  50.  ,  16.  ])

In [289]:
array_one = np.array(
    [
        [1, 2, 3, 4], 
        [5, 6, 7, 8]
    ]
)

array_one.ravel()

array([1, 2, 3, 4, 5, 6, 7, 8])

In [291]:
wines[1,:].reshape((2,6))

array([[  7.8   ,   0.88  ,   0.    ,   2.6   ,   0.098 ,  10.    ],
       [ 67.    ,   0.9968,   3.2   ,   0.68  ,  50.    ,  15.    ]])

In [294]:
white_wines = np.genfromtxt("winequality-white.csv", delimiter=";", skip_header=1)
white_wines.shape

(4898, 12)

In [295]:
all_wines = np.vstack((wines, white_wines))
all_wines.shape

(6497, 12)

In [297]:
np.concatenate((wines, white_wines), axis=0)

array([[  7.40000000e+00,   7.00000000e-01,   0.00000000e+00, ...,
          5.60000000e-01,   5.00000000e+01,   1.50000000e+01],
       [  7.80000000e+00,   8.80000000e-01,   0.00000000e+00, ...,
          6.80000000e-01,   5.00000000e+01,   1.50000000e+01],
       [  7.80000000e+00,   7.60000000e-01,   4.00000000e-02, ...,
          6.50000000e-01,   5.00000000e+01,   1.50000000e+01],
       ..., 
       [  6.50000000e+00,   2.40000000e-01,   1.90000000e-01, ...,
          4.60000000e-01,   9.40000000e+00,   6.00000000e+00],
       [  5.50000000e+00,   2.90000000e-01,   3.00000000e-01, ...,
          3.80000000e-01,   1.28000000e+01,   7.00000000e+00],
       [  6.00000000e+00,   2.10000000e-01,   3.80000000e-01, ...,
          3.20000000e-01,   1.18000000e+01,   6.00000000e+00]])