# Lesson 1 - Numpy

## Import Numpy Library

In [1]:
import numpy as np

## Create numpy array creation methods

### Unidimensional array

#### Create an unidimensional numpy array from list


In [2]:
list = [1, 2, 3]
uni_numpy_array = np.array(list)
uni_numpy_array

array([1, 2, 3])

#### Get size and length

In [3]:
uni_numpy_array.shape

(3,)

### Multimensional array

#### Create an multidimensional numpy array from list


In [4]:
# Create a multidimensional numpy array from list
list = [[1, 2, 3], [4, 5, 6]]
multi_numpy_array = np.array(list)
multi_numpy_array

array([[1, 2, 3],
       [4, 5, 6]])

#### Get size and length

In [6]:
multi_numpy_array.shape

(2, 3)

 #### Create an array with 3 rows and 4 columns, where every element is 0

In [10]:
zeros_array = np.zeros((3, 4))
zeros_array

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

 #### Create an array with 3 rows and 4 columns, where every element is random

In [11]:
random_array = np.random.rand(3, 4)
random_array

array([[ 0.92627746,  0.64672722,  0.11207139,  0.37520659],
       [ 0.65368141,  0.04497911,  0.87179133,  0.35134906],
       [ 0.41315981,  0.7053516 ,  0.37921211,  0.55739951]])

## Extract info from data file

#### Read csv file 'winequality-red.csv'

In [12]:
# Read SSV file placed in data folder and create a numpy array skipping first row (columns names)
wines_df = np.genfromtxt("./data/winequality-red.csv", delimiter=";", dtype=float, skip_header=1)

#### Print df

In [13]:
print wines_df

[[  7.4     0.7     0.    ...,   0.56    9.4     5.   ]
 [  7.8     0.88    0.    ...,   0.68    9.8     5.   ]
 [  7.8     0.76    0.04  ...,   0.65    9.8     5.   ]
 ..., 
 [  6.3     0.51    0.13  ...,   0.75   11.      6.   ]
 [  5.9     0.645   0.12  ...,   0.71   10.2     5.   ]
 [  6.      0.31    0.47  ...,   0.66   11.      6.   ]]


#### Get the number of rows and columns from df

In [14]:
wines_df.shape

(1599, 12)

## Extract data from numpy array

#### Select first row

In [16]:
first_line = wines_df[:1]
first_line

array([[  7.4   ,   0.7   ,   0.    ,   1.9   ,   0.076 ,  11.    ,
         34.    ,   0.9978,   3.51  ,   0.56  ,   9.4   ,   5.    ]])

#### Select the second element from the third row

In [24]:
second_third = wines_df[2, 1:2]
second_third

array([ 0.76])

#### Select the first three items from the fourth column

In [30]:
first_three_items = wines_df[:3, 3]
first_three_items

array([ 1.9,  2.6,  2.3])

#### Select the entire fourth column

In [32]:
fourth_column = wines_df[:, 3]
fourth_column

array([ 1.9,  2.6,  2.3, ...,  2.3,  2. ,  3.6])

#### Select the value 1

In [33]:
numbers = [
            [
                [1, 2, 3],
                [4, 5, 6],
                [7, 8, 9],
                [10, 11, 12]
            ],
            [
                [13, 14, 15],
                [16, 17, 18],
                [19, 20, 21],
                [22, 23, 24]
            ]
          ]

numbers_array = np.array(numbers)
one = numbers_array[0][0][0]
one

1

#### Get the shape

In [34]:
numbers_array.shape

(2, 4, 3)

#### Select an array which contains [1, 13]

In [35]:
element = numbers_array[:, 0, 0]
element

array([ 1, 13])

#### Select an array which contains [[1, 2, 3][13, 14, 15]]

In [36]:
element = numbers_array[:, 0, :]
element

array([[ 1,  2,  3],
       [13, 14, 15]])

## Numpy data types

#### NumPy has several different data types, which mostly map to Python data types, like float, and str. You can find a full listing of NumPy data types here, but here are a few important ones:

1. float – numeric floating point data.
2. int – integer data.
3. string – character data.
4. object – Python objects.

####  Get wines data type 

In [37]:
wines_df.dtype

dtype('float64')

####  Convert wines data type to int

In [38]:
wines_df.astype(int)

array([[ 7,  0,  0, ...,  0,  9,  5],
       [ 7,  0,  0, ...,  0,  9,  5],
       [ 7,  0,  0, ...,  0,  9,  5],
       ..., 
       [ 6,  0,  0, ...,  0, 11,  6],
       [ 5,  0,  0, ...,  0, 10,  5],
       [ 6,  0,  0, ...,  0, 11,  6]])

## NumPy array operations

#### Sum up 10 to the 11th column

In [39]:
wines_sum = wines_df[:, 11] + 10
wines_sum

array([ 15.,  15.,  15., ...,  16.,  15.,  16.])

#### Sum up the 11th column to itself

In [40]:
twelveth_column_to_itself = wines_df[:, 11] + wines_df[:, 11]
twelveth_column_to_itself

array([ 10.,  10.,  10., ...,  12.,  10.,  12.])

## NumPy array methods

#### Sum up the whole 11th column

In [None]:
twelveth_column_sum = wines_df[:, 11].sum()
twelveth_column_sum

####  Sum up the whole first row

In [None]:
first_row_sum = wines_df[:1].sum()
first_row_sum

####  Sum up all the columns

In [None]:
all_columns_sum = wines_df.sum(axis=0)
all_columns_sum

####  Sum up all the rows

In [None]:
all_rows_sum = wines_df.sum(axis=1)
all_rows_sum

####  Mean of first row

In [None]:
first_row_mean = wines_df[:1].mean()
first_row_mean

## NumPy array comparison

####  Return a bool array where the position value of the 11th column is True if the value was minor than 5 and False in other case

In [None]:
bool_array = wines_df[:,11] > 5
bool_array

####  Return a bool array where the position value of the 11th column is True if the value was equal than 5 and False in other case

In [None]:
bool_array = wines_df[:,11] == 5
bool_array

####  Select all the rows where the 11th column value is mayor than 6

In [None]:
higher_than_six = wines_df[:,11] > 6
wines_df[higher_than_six,:][:, 11]

##  NumPy array reshape

####  Get the traspose matrix of wines matrix

In [None]:
traspose = np.transpose(wines_df)
traspose.shape

####  Get the flatten array of wines

In [None]:
flatten = wines_df.ravel()
flatten.shape

####   Turn the 12th row of wines into a 2-dimensional array with 3 rows and 4 columns

In [None]:
wines_df[1:2].reshape((3,4))

##  NumPy array combination

####  Read white_wine csv and combine it with red wine data using vstack function

In [None]:
all_wines = np.vstack((wines_df, white_wines))
all_wines.shape

####  Read white_wine csv and concatenate it with wine data using concatenate function


In [None]:
concatenation = np.concatenate((wines, white_wines), axis=0)
concatenation.shape