# Step 1:

In [1]:
#Import the csv library
#Open the csv file
#With the file open, create a new csv.reader object
#Pass in the keyword argument delimiter=";" 
#to make sure that the records are split up on the semicolon character instead of the default comma character
#Call the list type to get all the rows from the file.
#Assign the result to wines.

In [2]:
import csv
with open('winequality-red.csv', 'r') as f:
    wines = list(csv.reader(f, delimiter=';'))

In [10]:
print(wines[:3])

[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality'], ['7.4', '0.7', '0', '1.9', '0.076', '11', '34', '0.9978', '3.51', '0.56', '9.4', '5'], ['7.8', '0.88', '0', '2.6', '0.098', '25', '67', '0.9968', '3.2', '0.68', '9.8', '5']]


In [None]:
#Extract the last element from each row after the header row.
#Convert each extracted element to a float.
#Assign all the extracted elements to the list qualities
#Divide the sum of all the elements in qualities by the total number of elements in qualities to the get the mean

In [8]:
qualities =[float(item[-1]) for item in wines[1:]]
sum(qualities) / len(qualities)

5.6360225140712945

# Numpy 2-Dimensional Arrays

In [11]:
#A 2-dimensional array is also known as a matrix
#By specifying a row number and a column number, we’re able to extract an element from a matrix.
#number of dimensions is called the rank, and each dimension is called an axis

In [12]:
#Import the numpy package.
#Pass the list of lists wines into the array function, which converts it into a NumPy array.
#Exclude the header row with list slicing.
#Specify the keyword argument dtype to make sure each element is converted to a float

import csv
with open("winequality-red.csv", 'r') as f:
    wines = list(csv.reader(f, delimiter=";"))
import numpy as np
wines = np.array(wines[1:], dtype=np.float)

In [13]:
wines

array([[ 7.4  ,  0.7  ,  0.   , ...,  0.56 ,  9.4  ,  5.   ],
       [ 7.8  ,  0.88 ,  0.   , ...,  0.68 ,  9.8  ,  5.   ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  0.65 ,  9.8  ,  5.   ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  0.75 , 11.   ,  6.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  0.71 , 10.2  ,  5.   ],
       [ 6.   ,  0.31 ,  0.47 , ...,  0.66 , 11.   ,  6.   ]])

In [14]:
#check the number of rows and columns in our data using the shape property of NumPy arrays:
wines.shape

(1599, 12)

# Alternative NumPy Array Creation Methods

In [15]:
#you can create an array where every element is zero
#we going to create 3*4
import numpy as np
empty_array = np.zeros((3,4))
empty_array

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [16]:
#you can also create a random number array
random_array=np.random.rand(3,4)
random_array

array([[0.26363503, 0.33582407, 0.60323057, 0.33505363],
       [0.25544723, 0.33990166, 0.58065859, 0.08796327],
       [0.46835536, 0.86970743, 0.18667185, 0.26901198]])

# Using NumPy To Read In Files

In [18]:
#read csv or other files into arrays with numpy.genfromtxt function
wines = np.genfromtxt("winequality-red.csv", delimiter=";", skip_header=1)

In [19]:
wines

array([[ 7.4  ,  0.7  ,  0.   , ...,  0.56 ,  9.4  ,  5.   ],
       [ 7.8  ,  0.88 ,  0.   , ...,  0.68 ,  9.8  ,  5.   ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  0.65 ,  9.8  ,  5.   ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  0.75 , 11.   ,  6.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  0.71 , 10.2  ,  5.   ],
       [ 6.   ,  0.31 ,  0.47 , ...,  0.66 , 11.   ,  6.   ]])

In [20]:
#numpy having 0 indexing like python
#pick a single value
wines[0,2]


0.0

# Slicing NumPy Arrays


In [21]:
#selecting first 3 element of 4 column
wines[0:3,3]

array([1.9, 2.6, 2.3])

In [22]:
#selecting all element of 4 column
wines[:,3]


array([1.9, 2.6, 2.3, ..., 2.3, 2. , 3.6])

In [24]:
#selected an entire column above, but we can also extract an entire row:
wines[1,:]

array([ 7.8   ,  0.88  ,  0.    ,  2.6   ,  0.098 , 25.    , 67.    ,
        0.9968,  3.2   ,  0.68  ,  9.8   ,  5.    ])

In [26]:
#selecting all rows all column
wines[:,:]

array([[ 7.4  ,  0.7  ,  0.   , ...,  0.56 ,  9.4  ,  5.   ],
       [ 7.8  ,  0.88 ,  0.   , ...,  0.68 ,  9.8  ,  5.   ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  0.65 ,  9.8  ,  5.   ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  0.75 , 11.   ,  6.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  0.71 , 10.2  ,  5.   ],
       [ 6.   ,  0.31 ,  0.47 , ...,  0.66 , 11.   ,  6.   ]])

# Assigning Values To NumPy Arrays

In [29]:
wines[0,0]=10


In [30]:
#check wether it's assigned or not
wines

array([[10.   ,  0.7  ,  0.   , ...,  0.56 ,  9.4  ,  5.   ],
       [ 7.8  ,  0.88 ,  0.   , ...,  0.68 ,  9.8  ,  5.   ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  0.65 ,  9.8  ,  5.   ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  0.75 , 11.   ,  6.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  0.71 , 10.2  ,  5.   ],
       [ 6.   ,  0.31 ,  0.47 , ...,  0.66 , 11.   ,  6.   ]])

In [36]:
wines[:,10] = 5.0
#verwrite an entire column

In [37]:
wines

array([[10.   ,  0.7  ,  0.   , ...,  0.56 ,  5.   ,  5.   ],
       [ 7.8  ,  0.88 ,  0.   , ...,  0.68 ,  5.   ,  5.   ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  0.65 ,  5.   ,  5.   ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  0.75 ,  5.   ,  6.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  0.71 ,  5.   ,  5.   ],
       [ 6.   ,  0.31 ,  0.47 , ...,  0.66 ,  5.   ,  6.   ]])

#  1-Dimensional NumPy Arrays


In [38]:
#A 1-dimensional array only needs a single index to retrieve an element
#If we slice wines and only retrieve the third row, we get a 1-dimensional array:

In [40]:
third_wine = wines[3,:]
third_wine

array([11.2  ,  0.28 ,  0.56 ,  1.9  ,  0.075, 17.   , 60.   ,  0.998,
        3.16 ,  0.58 ,  5.   ,  6.   ])

In [41]:
third_wine[1]
#grabbing single item from third wine.

0.28

In [44]:
# numpy.random.rand, can be used with multidimensional arrays. 
#numpy.random.rand to generate a random vector:
wines_red=np.random.rand(3)
wines_red

array([0.09536921, 0.87641621, 0.11140788])

In [45]:
#A shape of (3,) will be a 1-dimensional array with 3 elements.
wines_red.shape

(3,)

# N-Dimensional NumPy Arrays


In [49]:
#here we have 1 year salary in quartly in 2D
year_one = [
    [500,505,490],
    [810,450,678],
    [234,897,430],
    [560,1023,640]
]
#Here we a salary quartly in 3D
earnings = [
    [
        [500,505,490],
        [810,450,678],
        [234,897,430],
        [560,1023,640]
    ],
    [
        [600,605,490],
        [345,900,1000],
        [780,730,710],
        [670,540,324]
    ]
]

In [54]:
earnings = np.array(earnings)
earnings[0,0,1]

505

In [48]:
earnings.shape

(2, 4, 3)

In [50]:
year_one[0][0]

500

In [51]:
year_one[1]


[810, 450, 678]

In [56]:
#first quarter earnings from both years
#we get 1 rows element of first year and 1 roes elemnt of 2nd year
earnings[:,0,:]

array([[ 810,  450,  678],
       [ 345,  900, 1000]])

In [57]:
#NumPy stores values using its own data types,
#which are distinct from Python types like float and str
#it is because the core of NumPy is written in language C, which stores data differently than the Python data types
wines.dtype

dtype('float64')

In [58]:
#Data types additionally end with a suffix that indicates how many bits of memory they take up. 
#int32 is a 32 bit integer data type, and float64 is a 64 bit float data type.

# Converting Data Types

In [59]:
wines.astype(int)


array([[10,  0,  0, ...,  0,  5,  5],
       [ 7,  0,  0, ...,  0,  5,  5],
       [ 7,  0,  0, ...,  0,  5,  5],
       ...,
       [ 6,  0,  0, ...,  0,  5,  6],
       [ 5,  0,  0, ...,  0,  5,  5],
       [ 6,  0,  0, ...,  0,  5,  6]])

In [60]:
#The array has been converted to a 64-bit integer data type
int_wines = wines.astype(int)
int_wines.dtype.name

'int32'

In [61]:
#directly convert between types:
wines.astype(np.int32)

array([[10,  0,  0, ...,  0,  5,  5],
       [ 7,  0,  0, ...,  0,  5,  5],
       [ 7,  0,  0, ...,  0,  5,  5],
       ...,
       [ 6,  0,  0, ...,  0,  5,  6],
       [ 5,  0,  0, ...,  0,  5,  5],
       [ 6,  0,  0, ...,  0,  5,  6]])

# NumPy Array Operations


# # Single Array Math

In [65]:
#add 10 points to each quality
wines[:,11] + 10

array([25., 25., 25., ..., 26., 25., 26.])

In [63]:
wines[:,11] += 10

In [64]:
wines

array([[10.   ,  0.7  ,  0.   , ...,  0.56 ,  5.   , 15.   ],
       [ 7.8  ,  0.88 ,  0.   , ...,  0.68 ,  5.   , 15.   ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  0.65 ,  5.   , 15.   ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  0.75 ,  5.   , 16.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  0.71 ,  5.   , 15.   ],
       [ 6.   ,  0.31 ,  0.47 , ...,  0.66 ,  5.   , 16.   ]])

In [66]:
wines[:,11] * 2

array([30., 30., 30., ..., 32., 30., 32.])

# Multiple Array Math

In [69]:
#operations between arrays
#add the quality column to itself
wines[:,11] + wines[:,11]

array([30., 30., 30., ..., 32., 30., 32.])

In [70]:
wines[:,10] * wines[:,11]

array([75., 75., 75., ..., 80., 75., 80.])

# NumPy Array Methods


In [71]:
wines[:,11].sum()

25002.0

In [72]:
#sum of all the values in every column
wines.sum(axis=0)


array([13305.7    ,   843.985  ,   433.29   ,  4059.55   ,   139.859  ,
       25384.     , 74302.     ,  1593.79794,  5294.47   ,  1052.38   ,
        7995.     , 25002.     ])

In [74]:
wines.sum(axis=1)

1599

In [75]:
#check for corresponding number of columns:
wines.sum(axis=0).shape

(12,)

In [79]:
wines.mean(axis=0)

array([ 8.32126329,  0.52782051,  0.27097561,  2.5388055 ,  0.08746654,
       15.87492183, 46.46779237,  0.99674668,  3.3111132 ,  0.65814884,
        5.        , 15.63602251])

In [80]:
#minimum values of each col
wines.min(axis=0)

array([4.6000e+00, 1.2000e-01, 0.0000e+00, 9.0000e-01, 1.2000e-02,
       1.0000e+00, 6.0000e+00, 9.9007e-01, 2.7400e+00, 3.3000e-01,
       5.0000e+00, 1.3000e+01])

In [81]:
#max value of each col
wines.max(axis=0)

array([ 15.9    ,   1.58   ,   1.     ,  15.5    ,   0.611  ,  72.     ,
       289.     ,   1.00369,   4.01   ,   2.     ,   5.     ,  18.     ])

# NumPy Array Comparisons


In [82]:
#we want to see which wines have a quality rating higher than 5
wines[:,11] > 5

array([ True,  True,  True, ...,  True,  True,  True])

In [89]:
wines[:,11] == 10


array([False, False, False, ..., False, False, False])

In [90]:
#Subsetting

In [91]:
#with a Boolean array and a NumPy array is select only certain rows or columns in the NumPy array

In [92]:
#will only select rows in wines where the quality is over 7
#high_quality contains a True value, and all of the columns selected
high_quality = wines[:,11] > 7
wines[high_quality,:][:3,:]


array([[1.000e+01, 7.000e-01, 0.000e+00, 1.900e+00, 7.600e-02, 1.100e+01,
        3.400e+01, 9.978e-01, 3.510e+00, 5.600e-01, 5.000e+00, 1.500e+01],
       [7.800e+00, 8.800e-01, 0.000e+00, 2.600e+00, 9.800e-02, 2.500e+01,
        6.700e+01, 9.968e-01, 3.200e+00, 6.800e-01, 5.000e+00, 1.500e+01],
       [7.800e+00, 7.600e-01, 4.000e-02, 2.300e+00, 9.200e-02, 1.500e+01,
        5.400e+01, 9.970e-01, 3.260e+00, 6.500e-01, 5.000e+00, 1.500e+01]])

# Reshaping NumPy Arrays

In [93]:
#change the shape of arrays while still preserving all of their elements by flip the axes
np.transpose(wines).shape

(12, 1599)

In [94]:
# numpy.ravel function to turn an array into a one-dimensional representation
wines.ravel()


array([10.  ,  0.7 ,  0.  , ...,  0.66,  5.  , 16.  ])

In [95]:
wines.ravel().shape

(19188,)

In [96]:
#Example 2 of ravel

In [97]:
array_one = np.array(
    [
        [1, 2, 3, 4],
        [5, 6, 7, 8]
    ]
)
array_one.ravel()


array([1, 2, 3, 4, 5, 6, 7, 8])

In [98]:
#reshape an array to a certain shape we specify

In [106]:
#converting into 2d 2*6
wines[5,:].reshape((2,6))

array([[ 7.4   ,  0.66  ,  0.    ,  1.8   ,  0.075 , 13.    ],
       [40.    ,  0.9978,  3.51  ,  0.56  ,  5.    , 15.    ]])

# Combining NumPy Arrays

In [108]:
white_wines = np.genfromtxt("winequality-white.csv", delimiter=";", skip_header=1)
white_wines.shape

(4898, 12)

In [109]:
all_wines = np.vstack((wines, white_wines))
all_wines.shape

(6497, 12)

In [110]:
np.concatenate((wines, white_wines), axis=0)


array([[10.  ,  0.7 ,  0.  , ...,  0.56,  5.  , 15.  ],
       [ 7.8 ,  0.88,  0.  , ...,  0.68,  5.  , 15.  ],
       [ 7.8 ,  0.76,  0.04, ...,  0.65,  5.  , 15.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]])

In [None]:
#https://webcourses.ucf.edu/courses/1249560/pages/python-lists-vs-numpy-arrays-what-is-the-difference