# Python

---

# List 
### Indexing

In [None]:
a_list = ["Python", "Data", "Science"]

a_list[0]

### List slicing

In [None]:
a_list[:2]

---

# Tuple
### Indexing

In [None]:
a_tuple = ("Python", "Data", "Science")

a_tuple[0]

### slicing

In [None]:
a_tuple[:2]

## Unpacking 

In [None]:
item_0, item_1, item_2 = ("Python", "Data", "Science")

item_0

In [None]:
item_0, (item_1, item_2) = "Python", ("Data", "Science")

item_1

In [None]:
fig, (plot1, plot2) = "Figure", ("Plot1", "Plot2")

plot1

In [None]:
fig, ((plot1, plot2),(plot3, plot4)) = "Figure", (("Plot1", "Plot2"), ("Plot3", "Plot4"))

plot2

---

## Dictionaries

#### Create a dictionary

In [None]:
d = {"color": "mediumblue", "linestyle": "dashed"}

#### Access a dictionary

In [None]:
d["color"]

---

## for Loops

In [None]:
for item in ["item 1", "item 2", "item 3"]:
    print(item)

---

## zip

In [None]:
# zip combines the respective items from each list as a tuple

list(zip(["Data", "Machine", "Artificial"], ["Science", "Learning", "Intelligence"]))

### Unpacking zipped items

In [None]:
# Unpacking the zipped tuples as we loop through

for item1, item2 in zip(["Data", "Machine", "Artificial"], ["Science", "Learning", "Intelligence"]):
    print(item1, item2)

#### What the machine sees:

In [None]:
                    # after being zipped into a list of tuples...
for item1, item2 in [('Data', 'Science'), ('Machine', 'Learning'), ('Artificial', 'Intelligence')]:
    print(item1, item2)

### Example

In [None]:
# Unpacking the created list of tuples as we loop through each zipped item

for box, color in zip(["box1", "box2", "box3"], ["lightblue", "mediumblue", "darkblue"]):
    print(box, color)

#### What the machine sees:

In [None]:
for box, color in [('box1', 'light blue'), ('box2', 'medium blue'), ('box3', 'darkblue')]:
    print(box, color)

---

## Function

#### Define the function

In [None]:
def my_plot():
    return "my data viz..."

#### Call the function

In [None]:
# No parameters

my_plot()

### Using parameters

In [None]:
def my_plot(plot_type, color):
    return f"My {color} {plot_type}"

#### Call the function

In [None]:
# Providing two required values

my_plot("histogram", "skyblue")

### Using default values

In [None]:
def my_plot(plot_type, color="mediumblue"):
    return f"My {color} {plot_type}"

In [None]:
# Using the default value for color

my_plot("histogram")

### Providing an optional color value

In [None]:
my_plot("histogram", "lightblue")

---

---

# Pandas

In [None]:
import pandas as pd

---

# Data 

## DataFrame object

In [None]:
df = pd.read_csv("heart-disease.csv")

### Preview dataset

In [None]:
# show first 5 rows

df.head()

### Access a column

In [None]:
# Dictionary notation

df['sex']

### Unique values

In [None]:
df["sex"].unique()

---

## Selection and Filtering
### Column selection

In [None]:
df[['age', 'sex', 'heart_disease']]    # providing a list selects multiple columns

### Row and Column selection with loc
Allows you to select a subset of the rows and columns using the label/name of the row/column

In [None]:
# loc (selection is inclusive) implies the name/label of the row, column

df.loc[:5, ["age", "sex"]]

---

## Boolean row selection 

In [None]:
df["sex"]=="female"

### Using boolean for row selection

In [None]:
# row selection (return the rows that are True),  col selection

df.loc[df["sex"]=="female", ["age", "sex"]]

### & (and)

In [None]:
                    # row selection,                        col selection

df.loc[(df["sex"]=="female") & (df["age"] > 65), ["sex", "age", "heart_disease"]]

## Binning
#### Convert a **continuous or interval** variable to a **categorical** variable.

In [None]:
df["age"].head(10)

In [None]:
                     # bounds: (29, 39], (39, 49], (49, 59],(59, 69],(69, 79]

df["age"] = pd.cut(df["age"], [29, 39, 49, 59, 69, 79], labels=["thirties","forties","fifties","sixties", "seventies"])
df["age"].head(10)

---

## Useful methods

### mean()

In [None]:
df["max_hr"].mean()

### median()

In [None]:
df["max_hr"].median()

### count()
#### Returns the number of rows included in a selection

In [None]:
# Count the number of ages (rows)

df["age"].count()

In [None]:
# Count the number of rows where the "age" is "forties"
# Count the number of rows that are "True"

df.loc[df["age"] == "forties", "age"].count()

### value_counts()
#### Returns the count of each unique category in a column

In [None]:
# Count the quantity of each unique category for "age"

df["age"].value_counts()

---