# Pandas and NumPy Fundamentals

### NumPy

- Import numpy and assign to the alias np.

In [1]:
import numpy as np

- Create a vector from the list [10, 20, 30].
    - Assign the result to the variable vector.

In [2]:
vector = np.array([10, 20, 30])

- Create a matrix from the list of lists [[5, 10, 15], [20, 25, 30], [35, 40, 45]].
    - Assign the result to the variable matrix.

In [3]:
matrix = np.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])

- Assign the shape of vector to vector_shape.

In [4]:
vector_shape = vector.shape

- Assign the shape of matrix to matrix_shape.

In [5]:
matrix_shape = matrix.shape

- Display both vector_shape and matrix_shape using the print() function.

In [6]:
print(vector_shape)
print(matrix_shape)

(3,)
(3, 3)


#### The Dataset

Year -- the year the data in the row is for.<br>
WHO Region -- the region in which the country is located.<br>
Country -- the country the data is for.<br>
Beverage Types -- the type of beverage the data is for.<br>
Display Value -- the number of liters, on average, of the beverage type a citizen of the country drank in the given year.<br>

- Use the numpy.genfromtxt() function to read "world_alcohol.csv" into a NumPy array named world_alcohol.

In [7]:
world_alcohol = np.genfromtxt("world_alcohol.csv", delimiter = ",")

- Use the type() and print() functions to display the type for world_alcohol.

In [8]:
print(type(world_alcohol))

<class 'numpy.ndarray'>


- Assign the data type of world_alcohol to the variable world_alcohol_dtype.

In [9]:
world_alcohol_dtype = world_alcohol.dtype

- Display world_alcohol_dtype using the print() function.

In [10]:
print(world_alcohol_dtype)

float64


- When reading in world_alcohol.csv using numpy.genfromtxt():
    - Use the "U75" data type
    - Skip the first line in the dataset
    - Use the comma delimiter.
- Assign the result to world_alcohol.

In [11]:
world_alcohol = np.genfromtxt("world_alcohol.csv", delimiter = ",", dtype = "U75", skip_header = 1)

- Use the print() function to display world_alcohol.

In [12]:
print(world_alcohol)

[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ...
 ['1986' 'Europe' 'Switzerland' 'Spirits' '2.54']
 ['1987' 'Western Pacific' 'Papua New Guinea' 'Other' '0']
 ['1986' 'Africa' 'Swaziland' 'Other' '5.15']]


- Assign the amount of alcohol Uruguayans drank in other beverages per capita in 1986 to uruguay_other_1986. This is the second row and fifth column.

In [13]:
uruguay_other_1986 = world_alcohol[1][4]

uruguay_other_1986

'0.5'

- Assign the country in the third row to third_country. Country is the third column.

In [14]:
third_country = world_alcohol[2][2]

third_country

"Cte d'Ivoire"

- Assign the whole third column from world_alcohol to the variable countries.

In [15]:
countries = world_alcohol[:,2]
countries

array(['Viet Nam', 'Uruguay', "Cte d'Ivoire", ..., 'Switzerland',
       'Papua New Guinea', 'Swaziland'], dtype='<U75')

- Assign the whole fifth column from world_alcohol to the variable alcohol_consumption.

In [16]:
alcohol_consumption = world_alcohol[:,4]
alcohol_consumption

array(['0', '0.5', '1.62', ..., '2.54', '0', '5.15'], dtype='<U75')

- Assign all the rows and the first 2 columns of world_alcohol to first_two_columns.

In [17]:
first_two_columns = world_alcohol[:,:2]
first_two_columns

array([['1986', 'Western Pacific'],
       ['1986', 'Americas'],
       ['1985', 'Africa'],
       ...,
       ['1986', 'Europe'],
       ['1987', 'Western Pacific'],
       ['1986', 'Africa']], dtype='<U75')

- Assign the first 10 rows and the first column of world_alcohol to first_ten_years.

In [18]:
first_ten_years = world_alcohol[:10,0]
first_ten_years

array(['1986', '1986', '1985', '1986', '1987', '1987', '1987', '1985',
       '1986', '1984'], dtype='<U75')

- Assign the first 10 rows and all of the columns of world_alcohol to first_ten_rows.

In [19]:
first_ten_rows = world_alcohol[:10,:]
first_ten_rows

array([['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Wine', '1.62'],
       ['1986', 'Americas', 'Colombia', 'Beer', '4.27'],
       ['1987', 'Americas', 'Saint Kitts and Nevis', 'Beer', '1.98'],
       ['1987', 'Americas', 'Guatemala', 'Other', '0'],
       ['1987', 'Africa', 'Mauritius', 'Wine', '0.13'],
       ['1985', 'Africa', 'Angola', 'Spirits', '0.39'],
       ['1986', 'Americas', 'Antigua and Barbuda', 'Spirits', '1.55'],
       ['1984', 'Africa', 'Nigeria', 'Other', '6.1']], dtype='<U75')

- Assign the first 20 rows of the columns at index 1 and 2 of world_alcohol to first_twenty_regions.

In [20]:
first_twenty_regions = world_alcohol[:20,1:3]
first_twenty_regions

array([['Western Pacific', 'Viet Nam'],
       ['Americas', 'Uruguay'],
       ['Africa', "Cte d'Ivoire"],
       ['Americas', 'Colombia'],
       ['Americas', 'Saint Kitts and Nevis'],
       ['Americas', 'Guatemala'],
       ['Africa', 'Mauritius'],
       ['Africa', 'Angola'],
       ['Americas', 'Antigua and Barbuda'],
       ['Africa', 'Nigeria'],
       ['Africa', 'Botswana'],
       ['Americas', 'Guatemala'],
       ['Western Pacific', "Lao People's Democratic Republic"],
       ['Eastern Mediterranean', 'Afghanistan'],
       ['Western Pacific', 'Viet Nam'],
       ['Africa', 'Guinea-Bissau'],
       ['Americas', 'Costa Rica'],
       ['Africa', 'Seychelles'],
       ['Europe', 'Norway'],
       ['Africa', 'Kenya']], dtype='<U75')

- Extract the third column in world_alcohol, and compare it to the string Canada. Assign the result to countries_canada.

In [21]:
countries_canada = world_alcohol[:,2] == "Canada"
countries_canada

array([False, False, False, ..., False, False, False])

- Extract the first column in world_alcohol, and compare it to the string 1984. Assign the result to years_1984.

In [22]:
years_1984 = world_alcohol[:,0] == "1984"
years_1984

array([False, False, False, ..., False, False, False])

- Compare the third column of world_alcohol to the string Algeria.
- Assign the result to country_is_algeria.

In [23]:
country_is_algeria = world_alcohol[:,2] == "Algeria"
country_is_algeria

array([False, False, False, ..., False, False, False])

- Select only the rows in world_alcohol where country_is_algeria is True.
- Assign the result to country_algeria.

In [24]:
country_algeria = world_alcohol[country_is_algeria]
country_algeria

array([['1984', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1987', 'Africa', 'Algeria', 'Beer', '0.17'],
       ['1987', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1986', 'Africa', 'Algeria', 'Wine', '0.1'],
       ['1984', 'Africa', 'Algeria', 'Other', '0'],
       ['1989', 'Africa', 'Algeria', 'Beer', '0.16'],
       ['1989', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1989', 'Africa', 'Algeria', 'Wine', '0.23'],
       ['1986', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1984', 'Africa', 'Algeria', 'Wine', '0.12'],
       ['1985', 'Africa', 'Algeria', 'Beer', '0.19'],
       ['1985', 'Africa', 'Algeria', 'Other', '0'],
       ['1986', 'Africa', 'Algeria', 'Beer', '0.18'],
       ['1985', 'Africa', 'Algeria', 'Wine', '0.11'],
       ['1986', 'Africa', 'Algeria', 'Other', '0'],
       ['1989', 'Africa', 'Algeria', 'Other', '0'],
       ['1987', 'Africa', 'Algeria', 'Other', '0'],
       ['1984', 'Africa', 'Algeria', 'Beer', '0.2'],
       ['1985', 'Africa', 'A

- Perform a comparison with multiple conditions, and join the conditions with &.
    - Compare the first column of world_alcohol to the string 1986.
    - Compare the third column of world_alcohol to the string Algeria.
    - Enclose each condition in parentheses, and join the conditions with &.
    - Assign the result to is_algeria_and_1986.

In [25]:
is_algeria_and_1986 = (world_alcohol[:,0] == "1986") & (world_alcohol[:,2] == "Algeria")
is_algeria_and_1986

array([False, False, False, ..., False, False, False])

- Use is_algeria_and_1986 to select rows from world_alcohol.
- Assign the rows that is_algeria_and_1986 selects to rows_with_algeria_and_1986.

In [26]:
rows_with_algeria_and_1986 = world_alcohol[is_algeria_and_1986]
rows_with_algeria_and_1986

array([['1986', 'Africa', 'Algeria', 'Wine', '0.1'],
       ['1986', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1986', 'Africa', 'Algeria', 'Beer', '0.18'],
       ['1986', 'Africa', 'Algeria', 'Other', '0']], dtype='<U75')

- Replace all instances of the string 1986 in the first column of world_alcohol with the string 2014.

In [27]:
first_column_1986 = world_alcohol[:,0] == "1986"
world_alcohol[first_column_1986,0] = "2014"
world_alcohol

array([['2014', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['2014', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Wine', '1.62'],
       ...,
       ['2014', 'Europe', 'Switzerland', 'Spirits', '2.54'],
       ['1987', 'Western Pacific', 'Papua New Guinea', 'Other', '0'],
       ['2014', 'Africa', 'Swaziland', 'Other', '5.15']], dtype='<U75')

- Replace all instances of the string Wine in the fourth column of world_alcohol with the string Grog.

In [28]:
fourth_column_wine = world_alcohol[:,3] == "Wine"
world_alcohol[fourth_column_wine,3] = "Grog"
world_alcohol

array([['2014', 'Western Pacific', 'Viet Nam', 'Grog', '0'],
       ['2014', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Grog', '1.62'],
       ...,
       ['2014', 'Europe', 'Switzerland', 'Spirits', '2.54'],
       ['1987', 'Western Pacific', 'Papua New Guinea', 'Other', '0'],
       ['2014', 'Africa', 'Swaziland', 'Other', '5.15']], dtype='<U75')

- Compare all the items in the fifth column of world_alcohol with an empty string ''. Assign the result to is_value_empty.

In [29]:
world_alcohol = np.genfromtxt("world_alcohol.csv", delimiter = ",", dtype = "U75", skip_header = 1)

In [30]:
is_value_empty = world_alcohol[:, 4] == ""
is_value_empty

array([False, False, False, ..., False, False, False])

- Select all the values in the fifth column of world_alcohol where is_value_empty is True, and replace them with the string 0.

In [31]:
world_alcohol[is_value_empty, 4] = "0"
world_alcohol

array([['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Wine', '1.62'],
       ...,
       ['1986', 'Europe', 'Switzerland', 'Spirits', '2.54'],
       ['1987', 'Western Pacific', 'Papua New Guinea', 'Other', '0'],
       ['1986', 'Africa', 'Swaziland', 'Other', '5.15']], dtype='<U75')

- Extract the fifth column from world_alcohol, and assign it to the variable alcohol_consumption.

In [32]:
alcohol_consumption = world_alcohol[:,4]
alcohol_consumption

array(['0', '0.5', '1.62', ..., '2.54', '0', '5.15'], dtype='<U75')

- Use the astype() method to convert alcohol_consumption to the float data type.

In [33]:
alcohol_consumption = alcohol_consumption.astype(float)
alcohol_consumption.dtype

dtype('float64')

- Use the sum() method to calculate the sum of the values in alcohol_consumption. Assign the result to total_alcohol.

In [34]:
total_alcohol = alcohol_consumption.sum()
total_alcohol

3908.96

- Use the mean() method to calculate the average of the values in alcohol_consumption. Assign the result to average_alcohol.

In [35]:
average_alcohol = alcohol_consumption.mean()
average_alcohol

1.2001719373656738

- Create a matrix called canada_1986 that only contains the rows in world_alcohol where the first column is the string 1986 and the third column is the string Canada.

In [36]:
canada_1986 = world_alcohol[(world_alcohol[:,0] == "1986") & (world_alcohol[:,2] == "Canada")]
canada_1986

array([['1986', 'Americas', 'Canada', 'Other', '0'],
       ['1986', 'Americas', 'Canada', 'Spirits', '3.11'],
       ['1986', 'Americas', 'Canada', 'Beer', '4.87'],
       ['1986', 'Americas', 'Canada', 'Wine', '1.33']], dtype='<U75')

- Extract the fifth column of canada_1986, replace any empty strings ('') with the string 0, and convert the column to the float data type. Assign the result to canada_alcohol.

In [37]:
canada_alcohol = canada_1986[:,4]
empty_strings = canada_alcohol == ""
canada_alcohol[empty_strings] = "0"
canada_alcohol = canada_alcohol.astype(float)
canada_alcohol

array([0.  , 3.11, 4.87, 1.33])

- Compute the sum of canada_alcohol. Assign the result to total_canadian_drinking.

In [39]:
total_canadian_drinking = canada_alcohol.sum()
total_canadian_drinking

9.31

In [42]:
countries = list()
for country in world_alcohol[:,2]:
    if country not in countries:
        countries.append(country)
        
countries

['Viet Nam',
 'Uruguay',
 "Cte d'Ivoire",
 'Colombia',
 'Saint Kitts and Nevis',
 'Guatemala',
 'Mauritius',
 'Angola',
 'Antigua and Barbuda',
 'Nigeria',
 'Botswana',
 "Lao People's Democratic Republic",
 'Afghanistan',
 'Guinea-Bissau',
 'Costa Rica',
 'Seychelles',
 'Norway',
 'Kenya',
 'Myanmar',
 'Romania',
 'Turkey',
 'Comoros',
 'Tunisia',
 'United Kingdom of Great Britain and Northern Ireland',
 'Bahrain',
 'Italy',
 'Sierra Leone',
 'Micronesia (Federated States of)',
 'Mauritania',
 'Russian Federation',
 'Egypt',
 'Sweden',
 'Qatar',
 'Burkina Faso',
 'Austria',
 'Czech Republic',
 'Ukraine',
 'China',
 'Lithuania',
 'Zimbabwe',
 'Trinidad and Tobago',
 'Mexico',
 'Nicaragua',
 'Malta',
 'Switzerland',
 'Finland',
 'Saudi Arabia',
 'Kuwait',
 'El Salvador',
 'Suriname',
 'Croatia',
 'Somalia',
 'Syrian Arab Republic',
 'Iran (Islamic Republic of)',
 'Papua New Guinea',
 'Libya',
 'Bolivia (Plurinational State of)',
 'Iraq',
 'Namibia',
 'Uganda',
 'Togo',
 'Madagascar',
 'M

- Find the total consumption for each country in countries for the year 1989.

In [44]:
totals = {}
year = world_alcohol[world_alcohol[:,0] == "1989"]

for country in countries:
    country_consumption = year[year[:,2] == country]
    alcohol_column = country_consumption[:,4]
    alcohol_column[alcohol_column == ""] = "0"
    alcohol_column = alcohol_column.astype(float)
    totals[country] = alcohol_column.sum()
    
totals

{'Viet Nam': 0.16,
 'Uruguay': 7.4399999999999995,
 "Cte d'Ivoire": 2.2,
 'Colombia': 6.960000000000001,
 'Saint Kitts and Nevis': 4.65,
 'Guatemala': 2.47,
 'Mauritius': 3.54,
 'Angola': 2.28,
 'Antigua and Barbuda': 4.69,
 'Nigeria': 6.74,
 'Botswana': 4.63,
 "Lao People's Democratic Republic": 5.95,
 'Afghanistan': 0.0,
 'Guinea-Bissau': 2.67,
 'Costa Rica': 5.3999999999999995,
 'Seychelles': 3.3000000000000003,
 'Norway': 5.08,
 'Kenya': 2.82,
 'Myanmar': 0.16,
 'Romania': 8.41,
 'Turkey': 0.72,
 'Comoros': 0.10999999999999999,
 'Tunisia': 0.95,
 'United Kingdom of Great Britain and Northern Ireland': 9.99,
 'Bahrain': 4.89,
 'Italy': 10.899999999999999,
 'Sierra Leone': 4.380000000000001,
 'Micronesia (Federated States of)': 0.0,
 'Mauritania': 0.02,
 'Russian Federation': 5.35,
 'Egypt': 0.42000000000000004,
 'Sweden': 7.47,
 'Qatar': 1.4500000000000002,
 'Burkina Faso': 3.99,
 'Austria': 13.9,
 'Czech Republic': 13.009999999999998,
 'Ukraine': 5.32,
 'China': 3.33,
 'Lithuania':

- Find the country with the highest total alcohol consumption.

In [47]:
highest_value = 0
highest_key = None

for country in totals.keys():
    if totals[country] > highest_value:
        highest_value = totals[country]
        highest_key = country
        
highest_key

'Hungary'