# Introduction to Python
Run the hidden code cell below to import the data used in this course.

In [11]:
# Importing course packages; you can add more too!
import numpy as np
import math

# Import columns as numpy arrays
baseball_names = np.genfromtxt(
    fname="baseball.csv",  # This is the filename
    delimiter=",",  # The file is comma-separated
    usecols=0,  # Use the first column
    skip_header=1,  # Skip the first line
    dtype=str,  # This column contains strings
)
baseball_heights = np.genfromtxt(
    fname="baseball.csv", delimiter=",", usecols=3, skip_header=1
)
baseball_weights = np.genfromtxt(
    fname="baseball.csv", delimiter=",", usecols=4, skip_header=1
)
baseball_ages = np.genfromtxt(
    fname="baseball.csv", delimiter=",", usecols=5, skip_header=1
)

soccer_names = np.genfromtxt(
    fname="soccer.csv",
    delimiter=",",
    usecols=1,
    skip_header=1,
    dtype=str,
    encoding="utf", 
)
soccer_ratings = np.genfromtxt(
    fname="soccer.csv",
    delimiter=",",
    usecols=2,
    skip_header=1,
    encoding="utf", 
)
soccer_positions = np.genfromtxt(
    fname="soccer.csv",
    delimiter=",",
    usecols=3,
    skip_header=1,
    encoding="utf", 
    dtype=str,
)
soccer_heights = np.genfromtxt(
    fname="soccer.csv",
    delimiter=",",
    usecols=4,
    skip_header=1,
    encoding="utf", 
)
soccer_shooting = np.genfromtxt(
    fname="soccer.csv",
    delimiter=",",
    usecols=8,
    skip_header=1,
    encoding="utf", 
)

## Explore Datasets
Use the arrays imported in the first cell to explore the data and practice your skills!
- Print out the weight of the first ten baseball players. 
- What is the median weight of all baseball players in the data? 
- Print out the names of all players with a height greater than 80 (heights are in inches). 
- Who is taller on average? Baseball players or soccer players? Keep in mind that baseball heights are stored in inches!
- The values in `soccer_shooting` are decimals. Convert them to whole numbers (e.g., 0.98 becomes 98).
- Do taller players get higher ratings? Calculate the correlation between `soccer_ratings` and `soccer_heights` to find out!
- What is the average rating for attacking players (`'A'`)?

### Print out the weight of the first ten baseball players.

In [14]:
print(baseball_weights[:10])

[180. 215. 210. 210. 188. 176. 209. 200. 231. 180.]


### What is the median weight of all baseball players in the data?

In [25]:
np_baseballweight = np.array(baseball_weights)
print('median baseball weight: ' + str(np.median(np_baseballweight)))

median baseball weight: 200.0


### Print out the names of all players with a height greater than 80 (heights are in inches).

In [27]:
np_soccer_in = np.array(soccer_heights) * 0.394
np_baseball_in = np.array(baseball_heights)
np_soccer_name = np.array(soccer_names)
np_baseball_name = np.array(baseball_names)

print('Soccer: ' + str(np_soccer_name[np_soccer_in > 80]) + ', Baseball: ' + str(np_baseball_name[np_baseball_in > 80]))

Soccer: ['Kristof Van Hout'], Baseball: ['Andrew_Sisco' 'Randy_Johnson' 'Mark_Hendrickson' 'Chris_Young'
 'Jon_Rauch']


### Who is taller on average? Baseball players or soccer players? Keep in mind that baseball heights are stored in inches.

In [24]:
baseball_avg_height = np.average(np_baseball_in)
soccer_avg_height = np.average(np_soccer_in)

print('Baseball Average Height: ' + str(round(baseball_avg_height)))
print('Soccer Average Height: ' + str(round(soccer_avg_height)))

Baseball Average Height: 74
Soccer Average Height: 72


### The Values in soccer_shooting are decimnals. Convert them to whole numbers (e.g., 0.98 becomes 98).

In [35]:
whole_soccer_shooting = np.array(soccer_shooting * 100)
print(soccer_shooting)
print(whole_soccer_shooting)

[ nan 0.65 0.54 ... 0.4  0.48 0.56]
[nan 65. 54. ... 40. 48. 56.]


### Do taller players get higher ratings? Calculate the correlation between soccer_ratings and soccer_heights to find out!

In [36]:
np_soccer_ratings = np.array(soccer_ratings)

print(np.corrcoef(np_soccer_in, np_soccer_ratings))

[[ 1.         -0.00610858]
 [-0.00610858  1.        ]]


### What is the average rating for attacking players ('A')?

In [41]:
np_soccer_positions = np.array(soccer_positions)

print('Average rating for attacking players (A): ' + str(round(np.average(np_soccer_ratings[np_soccer_positions == 'A']),2)))

Average rating for attacking players (A): 67.26


# Python as a calculator
- Python can use addition, subtraction, multiplication and division.
- Some advanced operations are:
	- `**` Exponentiation
	- `%` Modulo - returns the remainder number.

# Variables and Types
Python can define variables with a specific, case-sensitive name.
- Call up values through variable name
- We can convert types using the fuctions below.
	- int()
	- float()
	- bool()
	- str()

In [13]:
# Example of defining variables.
height = 1.79
weight = 74.2
bmi = weight / height ** 2

# String type
x = "body mass index"
y = 'this works too'

# Boolean
z = True

# Call up the variable
print(bmi)
# call up the type of data it is.
type(bmi)
type(y)
type(z)

# Can combine strings together
print('ab' + 'cd')

23.157829031553323
abcd


### Ways to calculate using variables

In [14]:
# Create a variable savings
savings = 100

# Create a variable growth_multiplier
growth_multiplier = 1.1

# Calculate result
result = savings * growth_multiplier ** 7

# Print out result
print(result)

194.87171000000012


# Python Lists
A list can be built using brackets: `[a,b,c]`
- Can contain any type, different types, and can also store lists within lists.

In [16]:
fam = [1.73, 1.68, 1.71, 1.89]
fam

fam2 = [["liz", 1.73],
        ["emma", 1.68],
        ["mom", 1.71],
        ["dad", 1.89]]
fam2

[['liz', 1.73], ['emma', 1.68], ['mom', 1.71], ['dad', 1.89]]

## Subsetting Lists

In [19]:
fam = [1.73, 1.68, 1.71, 1.89]
print(fam[:2])

[1.73, 1.68]


In [20]:
fam2 = [["liz", 1.73],
        ["emma", 1.68],
        ["mom", 1.71],
        ["dad", 1.89]]
print(fam2[3][-1])

1.89


## Manipulating Lists
- Change list elements
- Add list elements
- Remove list elements

- del() can delete items from a list.

In [25]:
# Original list
fam = [1.73, 1.68, 1.71, 1.89]

# Additing string to list, and changing 1.68 to 1.70
fam = fam + ['addition']
fam[1] = 1.70

# The result
print(fam)

[1.73, 1.7, 1.71, 1.89, 'addition']


## Can duplicate or replace a list by slicing or by using `list()`

In [26]:
fam = [1.73, 1.68, 1.71, 1.89]

fam_copy = list(fam)
fam_copy = fam_copy + ['addition']
fam_copy[1] = 1.70

print(fam)
print(fam_copy)

[1.73, 1.68, 1.71, 1.89]
[1.73, 1.7, 1.71, 1.89, 'addition']


# Python Functions
- Piece of reusable code
- Solves a particular task
- Call function instead of writing code

In [3]:
fam = [1.73, 1.68, 1.71, 1.89]

tallest = max(fam)

print(tallest)
print(round(tallest, 1))

1.89
1.9


In [9]:
# We can use the `Help()` function to look up information on other functions.
help(sorted)

Help on built-in function sorted in module builtins:

sorted(iterable, /, *, key=None, reverse=False)
    Return a new list containing all items from the iterable in ascending order.
    
    A custom key function can be supplied to customize the sort order, and the
    reverse flag can be set to request the result in descending order.



In [8]:
# Create lists first and second
first = [11.25, 18.0, 20.0]
second = [10.75, 9.50]

# Paste together first and second: full
full = first + second

# Sort full in descending order: full_sorted
full_sorted = sorted(full, reverse = True)

# Print out full_sorted
print(full_sorted)

[20.0, 18.0, 11.25, 10.75, 9.5]


# Methods 
Methods are functions that belong to objects
- str
	- capitalize()
	- replace()
- float
	- bit_length()
	- conjugate()
- list
	- index()
	- count()
- Other methods are:
	- append() that adds an element to the list it is called on
	- remove() that removes the first element of a list that matches the input
	- reverse() that reverses the order of the elements in the list it is called on

In [13]:
fam = ['liz', 1.73, 'emma', 1.68, 'mom', 1.71, 'dad', 1.89]

# Call method index() on fam
index_fam = fam.index('mom')

# Call method count() on fam
count_fam = fam.count(1.73)

# Call method replace
sister = fam[0]
sister_correct = sister.replace('z', 'sa')

print(index_fam)
print(count_fam)
print(sister_correct)

4
1
lisa


# Packages
- Directory of Python Scripts
- Each script = module
- Specify functions, methods, types
- Thousands of packages available
	- NumPy
	- Matplotlib
	- scikit-learn
- Install package
	- http://pip.readthedocs.org/en/stable/installing
	- download `get-pip.py`
	- Then go to Terminal and execute, `python3 get-pip.py`
	- `pip3 install numpy`
- Import package

In [19]:
# must call the package when using a package's method.
import numpy as np
np.array([1, 2, 3])

array([1, 2, 3])

In [18]:
# We can import the method only and avoid calling the package
from numpy import array
array([1, 2, 3])

array([1, 2, 3])

In [23]:
# Definition of radius
r = 0.43

# Import the math package
import math

# Calculate C
C = 2 * math.pi * r

# Calculate A
A = math.pi * (r ** 2)

# Build printout
print("Circumference: " + str(C))
print("Area: " + str(A))

Circumference: 2.701769682087222
Area: 0.5808804816487527


In [24]:
# Definition of radius
r = 192500

# Import radians function of math package
from math import radians

# Travel distance of Moon over 12 degrees. Store in dist.
dist = r * radians(12)

# Print out dist
print(dist)

40317.10572106901


# NumPy
- Numeric Python
- Alternative to Python List: NumPy Array
- Calculations over entire arrays
- Easy and Fast
- NumPy arrays: contain only one type

In [6]:
height = [1.73, 1.68, 1.71, 1.89, 1.79]
weight = [65.4, 59.2, 63.6, 88.4, 68.7]

import numpy as np 
np_height = np.array(height)
np_weight = np.array(weight)

print('BMI Calculations: ' + str(np_weight / np_height ** 2))

bmi = np_weight / np_height ** 2

print(bmi[1])
print(bmi > 23)
print(bmi[bmi > 23])

BMI Calculations: [21.85171573 20.97505669 21.75028214 24.7473475  21.44127836]
20.97505668934241
[False False False  True False]
[24.7473475]


# 2D NumPy Arrays
Can hold multiple arrays.

In [10]:
import numpy as np 

# 2D array example
np_2d = np.array([[1.73, 1.68, 1.71, 1.89, 1.79],
                  [65.4, 59.2, 63.6, 88.4, 68.7]])
print(np_2d)
print(np_2d.shape)
print(np_2d[0, 2])
print(np_2d[:, 1:3])

[[ 1.73  1.68  1.71  1.89  1.79]
 [65.4  59.2  63.6  88.4  68.7 ]]
(2, 5)
1.71
[[ 1.68  1.71]
 [59.2  63.6 ]]


# NumPy Basic Statistics
### Data Analysis 
- `np.mean(array_name[])` gives average of selected subset
- `np.median(array_name[])` gives median of selected subset
- `np.corrcoef(array _name[])` gives information on how data is corralated
- `np.std(array_name[])` gives standard deviation
- `np.random.normal(mean, stddev, samples)`
	- distribution mean
	- distribution standard deviation
	- number of samples
- `np.column_stack((array_name1, array_name2))` combines two arrays into one