# NUMPY

## I. The Native Ways

### 1. Import data 

In [7]:
import csv
world_alcohol = list(csv.reader(open("world_alcohol.csv", encoding="utf8")))
print(world_alcohol[0:2])


[['Year', 'WHO region', 'Country', 'Beverage Types', 'Display Value'], ['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0']]


### 2. Find average year 

Extract years:

In [96]:
years = []

for row in world_alcohol:
    years.append(row[0])

print(years[0:5])

years = years [1:len(years)]
print(years[0:5])

['1986', '1986', '1985', '1986', '1987']
['1986', '1985', '1986', '1987', '1987']


Convert years to float:

In [9]:
for i, year in enumerate(years):
    years[i] = float(year)

print(years)

[1986.0, 1986.0, 1985.0, 1986.0, 1987.0, 1987.0, 1987.0, 1985.0, 1986.0, 1984.0, 1987.0, 1989.0, 1985.0, 1984.0, 1985.0, 1987.0, 1984.0, 1989.0, 1984.0, 1984.0, 1986.0, 1989.0, 1984.0, 1984.0, 1985.0, 1984.0, 1985.0, 1984.0, 1987.0, 1986.0, 1986.0, 1986.0, 1989.0, 1985.0, 1986.0, 1985.0, 1987.0, 1986.0, 1987.0, 1987.0, 1987.0, 1986.0, 1984.0, 1984.0, 1985.0, 1989.0, 1987.0, 1986.0, 1987.0, 1986.0, 1985.0, 1987.0, 1986.0, 1984.0, 1984.0, 1989.0, 1987.0, 1989.0, 1984.0, 1989.0, 1987.0, 1984.0, 1987.0, 1985.0, 1989.0, 1989.0, 1987.0, 1989.0, 1989.0, 1986.0, 1986.0, 1985.0, 1987.0, 1986.0, 1986.0, 1989.0, 1985.0, 1985.0, 1989.0, 1989.0, 1985.0, 1985.0, 1987.0, 1986.0, 1986.0, 1985.0, 1986.0, 1989.0, 1987.0, 1986.0, 1989.0, 1989.0, 1986.0, 1987.0, 1985.0, 1984.0, 1985.0, 1984.0, 1984.0, 1985.0, 1987.0, 1985.0, 1989.0, 1989.0, 1986.0, 1986.0, 1989.0, 1986.0, 1987.0, 1984.0, 1985.0, 1986.0, 1987.0, 1987.0, 1989.0, 1986.0, 1987.0, 1984.0, 1985.0, 1985.0, 1987.0, 1985.0, 1987.0, 1989.0, 1985.0,

Find average year:

(Add all years to each other and then divide it to the number of years.)

In [10]:
avg_year = sum(years) / len(years)
print(avg_year)

1986.2081670248695


## The Numpy Way

### 1. Import data with numpy as an array
**numpy.genfromtxt()**

In [11]:
import numpy
world_alcohol = numpy.genfromtxt("world_alcohol.csv", delimiter=",")

In [12]:
type(world_alcohol)

numpy.ndarray

### 2. Create vectors and matrices with numpy
**numpy.array()**

In [13]:
vector = numpy.array([10, 20, 30])
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])

print(vector)
print(matrix)

[10 20 30]
[[ 5 10 15]
 [20 25 30]
 [35 40 45]]


### 3. Get dimensions of an array
**numpy.shape()**

In [14]:
vector_shape = vector.shape
matrix_shape = matrix.shape

vector_shape, matrix_shape

((3,), (3, 3))

### 4. Display variable type in numpy
**numpy.dtype()**

In [15]:
world_alcohol_dtype = world_alcohol.dtype
world_alcohol_dtype

dtype('float64')

### 5. Read data more accurately

- Data is read wrong by numpy because we did not enter enough arguments.
- Each value in a numpy array must be of same type.
- Numpy guesses the type of variables in a dataset while importing, and marks everythin else (if present) as nan (not a number) or nan 

In [16]:
print(world_alcohol)

[[             nan              nan              nan              nan
               nan]
 [  1.98600000e+03              nan              nan              nan
    0.00000000e+00]
 [  1.98600000e+03              nan              nan              nan
    5.00000000e-01]
 ..., 
 [  1.98600000e+03              nan              nan              nan
    2.54000000e+00]
 [  1.98700000e+03              nan              nan              nan
    0.00000000e+00]
 [  1.98600000e+03              nan              nan              nan
    5.15000000e+00]]


In [17]:
world_alcohol = numpy.genfromtxt("world_alcohol.csv", dtype="U75", skip_header=1, delimiter=",")
# "U75" specifies that we want to read in each value as a 75 byte unicode data type. 

print(world_alcohol)

[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ..., 
 ['1986' 'Europe' 'Switzerland' 'Spirits' '2.54']
 ['1987' 'Western Pacific' 'Papua New Guinea' 'Other' '0']
 ['1986' 'Africa' 'Swaziland' 'Other' '5.15']]


### 6. Indexing in numpy Matrices

Uruguay's alchohol consumption in other beverages in 1986:

In [18]:
uruguay_other_1986 = world_alcohol[1,4] # 2nd row, and 5th column
third_country = world_alcohol[2,2]     # 3rd row and 3rd column

print(uruguay_other_1986, third_country)

0.5 Cte d'Ivoire


### 7. Slicing

Extract the countries column:

In [30]:
countries           = world_alcohol[:,2]
alcohol_consumption = world_alcohol[:,4]

print(countries)
print(alcohol_consumption)

['Viet Nam' 'Uruguay' "Cte d'Ivoire" ..., 'Switzerland' 'Papua New Guinea'
 'Swaziland']
['0' '0.5' '1.62' ..., '2.54' '0' '5.15']


First x'th columns or rows:

In [105]:
#First two columns:
first_two_columns = world_alcohol[:,0:2]

# First 10 rows of the first column:
first_ten_years = world_alcohol[0:10, 0]

# First 10 rows:
first_ten_rows = world_alcohol[0:10,:]

print(first_two_columns)
print("")
print(first_ten_years)
print("")
print(first_ten_rows)


[['1986' 'Western Pacific']
 ['1986' 'Americas']
 ['1985' 'Africa']
 ..., 
 ['1986' 'Europe']
 ['1987' 'Western Pacific']
 ['1986' 'Africa']]

['1986' '1986' '1985' '1986' '1987' '1987' '1987' '1985' '1986' '1984']

[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ['1986' 'Americas' 'Colombia' 'Beer' '4.27']
 ['1987' 'Americas' 'Saint Kitts and Nevis' 'Beer' '1.98']
 ['1987' 'Americas' 'Guatemala' 'Other' '0']
 ['1987' 'Africa' 'Mauritius' 'Wine' '0.13']
 ['1985' 'Africa' 'Angola' 'Spirits' '0.39']
 ['1986' 'Americas' 'Antigua and Barbuda' 'Spirits' '1.55']
 ['1984' 'Africa' 'Nigeria' 'Other' '6.1']]


Double slicing:

In [112]:
first_twenty_regions = world_alcohol[0:20, 1:3]
first_twenty_regions

array([['Western Pacific', 'Viet Nam'],
       ['Americas', 'Uruguay'],
       ['Africa', "Cte d'Ivoire"],
       ['Americas', 'Colombia'],
       ['Americas', 'Saint Kitts and Nevis'],
       ['Americas', 'Guatemala'],
       ['Africa', 'Mauritius'],
       ['Africa', 'Angola'],
       ['Americas', 'Antigua and Barbuda'],
       ['Africa', 'Nigeria'],
       ['Africa', 'Botswana'],
       ['Americas', 'Guatemala'],
       ['Western Pacific', "Lao People's Democratic Republic"],
       ['Eastern Mediterranean', 'Afghanistan'],
       ['Western Pacific', 'Viet Nam'],
       ['Africa', 'Guinea-Bissau'],
       ['Americas', 'Costa Rica'],
       ['Africa', 'Seychelles'],
       ['Europe', 'Norway'],
       ['Africa', 'Kenya']], 
      dtype='<U75')