# Importing modules

In [None]:
# In general, we will import whole modules, with an optional alias.
import numpy as np
import pandas as pd

# Then use functions by referencing the module.
print(np.sign(0))

# Although we can import specific functions.
#from numpy import sign

#print(sign(42)) # Useful when we use the same function many times (less typing).

### `import x` is preferred over `from x import y`

In [None]:
# What happens if someone defines a function with the same name?
def sign(number):
    zodiac = ['a', 'b', 'c']
    return zodiac[number]

print(sign(-2))

# NumPy arrays
Efficient matrix-like structures. Contain a single data type.

In [None]:
n = 5
mean_weight = 70 # kg
mean_height = 1.70 # m
sd_weight = 10
sd_height = 0.15

weights = np.random.randn(n) * sd_weight + mean_weight
heights = np.random.randn(n) * sd_height + mean_height

bmis = weights / heights**2

print('Weights:', np.round(weights))
print('Heights:', np.round(heights, 2))
print('BMIs:', np.round(bmis, 1))

## 2D arrays

In [None]:
data = np.array([weights, heights, bmis])
print(data)

In [None]:
data.shape # That's an attribute, just the name for a variable inside an object.

In [None]:
# Selecting values.

# data[row, column]
print(data[0, 3]) # Weight of the fourth patient.
print(data[1, 0]) # Height of the fisrt patient.
print(data[2, 1]) # BMI of the second patient.
print(data[:, :3]) # All data from the first three patients.

print(data[0, :]) # First row.
print(data[:, 0]) # First column.

data[0, :] == data[0] # Rows can be selected with simplified notation.

In [None]:
# Slicing as views, not as copies.
sub = data[:, :3]
data[0, 0] = 1
print(sub)

# But doesn't happen with single entrie.
sub = data[0, 0]
data[0, 0] = 2
print(sub)

In [None]:
# Looping over values, by row.
for value in np.nditer(data[:, :3]):
    print(value)

# Pandas DataFrames
Data is usually organized with **variables in columns** and **observations in rows**. A pandas **DataFrame** can store different data types.

In [None]:
# We can create a DataFrame from a dict.
people_dict = {
    'name': ['Alice', 'Bob', 'Carol'],
    'fav_breakfast': ['spam', 'eggs', 'ice cream'],
    'age': [24, 35, 46],
    'knows_python': [True, True, False]
}

people_df = pd.DataFrame(people_dict)
people_df

In [None]:
# From a nested list.
people_list = [
    ['Alice', 'spam', 24, True],
    ['Bob', 'eggs', 35, True],
    ['Carol', 'ice cream', 46, False]
]

people_df = pd.DataFrame(people_list, columns = ['name', 'fav_breakfast', 'age', 'knows_python'])
people_df

In [None]:
# And from a NumPy array, with a single data type.
people_np = np.array(people_list)
print(people_np)
pd.DataFrame(people_np, columns = ['name', 'fav_breakfast', 'age', 'knows_python'])

In [None]:
# And from a csv (comma-separated values) file.
social = pd.read_csv('"January 3-10, 2018 - Core Trends Survey - CSV.csv"')

### Interlude: Solving the _print_ vs _return_ confusion, or
## _To print or not to print?_

In [None]:
print(social)
social

## Slicing DataFrames
- [ ]
- .loc
- .iloc

In [None]:
print(social.pial11)
print(social['pial11'])
type(social['pial11'])

In [None]:
#print(social.pial11ao@)
print(social['pial11ao@'])

column = 'pial11ao@'
social[column]

In [None]:
print(social[['pial11', 'pial11ao@']])
print(type(social[['pial11', 'pial11ao@']]))

#print(social[['pial11']])
#print(type(social[['pial11']]))

In [None]:
social[0:5]

## Filtering DataFrames

In [None]:
social[social.age < 35]

In [None]:
pd.DataFrame({'age': social.age,
              'bool_age': social.age < 35})

# NumPy functions

In [None]:
print(np.mean(social.age))
print(np.median(social.age))
print(np.corrcoef(social.age, social.books1)) # Age vs number of books read.