In [None]:
# IGNORE THIS CELL WHICH CUSTOMIZES LAYOUT AND STYLING OF THE NOTEBOOK !
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings = lambda *a, **kw: None
from IPython.core.display import HTML

HTML(open("custom.html", "r").read())

# Chapter 0: Introduction 


<div class="alert alert-block alert-warning">
    <i class="fa fa-warning"></i>&nbsp;This script introduces <code>numpy</code>, <code>pandas</code> and <code>matplotlib</code> and <code>seaborn</code> as far as we use it in the following course. 


Thus it is not a comprehensive introduction to these libraries !
    </div>

## pandas

`pandas` allows handling tabular data as so called `DataFrame`s. Tabular data means that columns have types. Within a colum values are of the same type, but types can differ between columns.

### Some basics

In [None]:
# show content of csv file
print(open("data/example.csv").read())

In [None]:
# read file with pandas

import pandas as pd

df = pd.read_csv("data/example.csv")
print(df)

<div class="alert alert-block alert-info">
<i class="fa fa-warning"></i>&nbsp;<code>pandas</code> also 
supports reading and writing of other file formats, like <code>.xlsx</code>, <code>.hdf5</code> or <code>sqlite3</code> files.
</div>





In [None]:
df.info()

You can see that the colums `a`, `b` and `c` have different types `int64`, `float64` and `object`. The latter can be read as "anything but a number".

In [None]:
# number of rows and columns
print(df.shape)

The `.shape` is numbers of rows times number of columns.

To show the first 5 rows of a data frame we can use `.head()`.

In [None]:
print(df.head())

And `.tail()` shows the last 5 rows:

In [None]:
print(df.tail())

Both accept an integer to change the number of rows to show:

In [None]:
print(df.head(3))

Compute some statistics on the columns

In [None]:
print(df.describe())

###  Accessing parts of a data frame

We can access separate columns using a column name:

In [None]:
print(df["a"])

Single columns are `Series` in `pandas`:

In [None]:
print(type(df["a"]))

In [None]:
scores = df["a"] + 2 * df["b"]
print(scores)

<div class="alert alert-block alert-warning">
<i class="fa fa-warning"></i>&nbsp;Don't forget that
    <ul>
        <li> Indexing in Python starts with <code>0</code>
        </li>
        <li> Upper limits are exclusive
            </li>
        <li> Negative indices start from the right end, <code>-1</code> is the last element, <code>-2</code> the one before, etc.</li>
        <li> <code>:</code> refers to all elements.</li>
    </ul>
</div>




`df.iloc[row_slice, col_slice]` offers index based access:

In [None]:
print(df.iloc[:, 0])



To extract rows `1` to `2` (included), and all columns up to the last one:

In [None]:
print(df.iloc[1:3, :-1])

To extract the last column:

In [None]:
print(df.iloc[1:3, -1])

### Filtering a data frame

In [None]:
# all rows where the value of a is smaller than 10:
print(df[df["a"] < 10])

This works as follows:

In [None]:
flags = df["a"] > 3

# we see that flags is a vector with logical values depending on
# the given condition "a > 3":
print(flags)

In [None]:
# when we pass these logical values to "df[...]" only the "True rows"
# remain:
print(df[flags])

Another example:

In [None]:
print(df[df["c"] == "one"])

### Extending a dataframe

Adding a new, computed column:

In [None]:
# values in new column d will be values from "a" squared:
df["d"] = df["a"] ** 2

print(df.head())

We can also overwrite a column, here we use `apply` to apply the same function on all values in the given column:

In [None]:
def increment(v):
    return v + 1


df["d"] = df["d"].apply(increment)

print(df.head())

## numpy

`numpy` offers data structures from linear algebra, e.g. vectors and matrices. 

In contrast to `pd.DataFrame` matrices contain numbers of the same type.

In [None]:
import numpy as np

x = np.array([3.0, 5.0, 8.0])
print(x)

In [None]:
print(x.shape)

In [None]:
A = np.array(
    [
        [1.0, 2.0, 3.0],
        [3.0, 4.0, 5.0],
        [3.0, 5.0, 3.0],
    ]
)
print(A)

In [None]:
print(A.shape)

Indexed access works as usual:

In [None]:
print(x[0])
print(x[-1])
print(x[1:])

In [None]:
print(A[1, 0])
print(A[:, 1])

Numpy offers element-wise function application:

In [None]:
# caveat ! not matrix-matrix multiplication
print(A * A)

In [None]:
# this is matrix-matrix multiplication:
print(A @ A)

In [None]:
# substract 3 from all elements:
print(A - 3)

In [None]:
# subtract 3 from all elements, then compute absolute
# values for every element:
print(np.abs(A - 3))

In [None]:
x = np.linspace(0, 8, 11)
print(x)
print(len(x))

In [None]:
# we can also filter values:
print(x[x < 2])

In computations like addition `True` is handled as `1` and `False` as `0`. 

In [None]:
p = np.sum(x < 2)
print(p)
print(p / len(x) * 100, "percent of entries in x are smaller than 2")

## About plotting

We use `matplotlib` and also `seaborn` in the script. `seaboarn` is a layer ontop of `matplotlib` offering some easy-to-use standard plots and also a more modern layout and styling.

In [None]:
import matplotlib.pyplot as plt

x = np.linspace(1, 4, 4)
y0 = np.mod(x, 2)
y1 = 2 * (1 - y0)
y2 = np.sqrt(x)

plt.plot(x, y0)  # default color is blue
plt.plot(x, y1, color="chocolate", marker="o")

# no lines, marker size is 150:
plt.scatter(x, y2, color="steelblue", marker="*", s=150);

In [None]:
plt.plot(x, y0, label="one")
plt.plot(x, y1, color="chocolate", marker="o", label="two")

# no lines, marker size is 150:
plt.scatter(x, y2, color="steelblue", marker="*", s=150, label="three")

plt.legend()
plt.title("with legend");

After `plt.subplot(m, n, i)` the following plot will paint into cell `i` in a `m` times `n` grid of plots. `m` is the number of rows, `n` is the number of columns and `i` is counted row wise:

In [None]:
# multiple plots

plt.figure(figsize=(12, 7))  # width, height

plt.subplot(2, 3, 1)
plt.plot(x, y0)
plt.plot(x, y1)
plt.title("plt.subplot(2, 3, 1)")

plt.subplot(2, 3, 2)
plt.plot(x, y1, "chocolate")
plt.title("plt.subplot(2, 3, 2)")

plt.subplot(2, 3, 3)
plt.plot(x, y2, "steelblue")
plt.title("plt.subplot(2, 3, 3)")

plt.subplot(2, 3, 4)
plt.plot(x, y1, ":")
plt.title("plt.subplot(2, 3, 4)")

plt.subplot(2, 3, 5)
plt.plot(x, y2, "*")
plt.title("plt.subplot(2, 3, 5)")

plt.subplot(2, 3, 6)
plt.plot(x, y0, "chocolate")
plt.title("plt.subplot(2, 3, 6)");

In [None]:
x = np.linspace(0, 2 * np.pi, 200)
y = np.sin(x)
z = np.cos(x**2)

plt.plot(x, y, "chocolate")
plt.plot(x, z, "steelblue");

# Exercise section

1. Repeat the examples above and play with them

# * Optional Exercse

2. Can you plot a circle by computing `x` and `y` vectors suitable for `plt.plot` ? Make sure that the circle looks like a circle and not like an ellipse.

3. Plot three cricles with different radii and different colors, create labels and plot a legend. Make sure that the legend shows up in the top-right corner and does not overlap with the circles.

4. Plot the three circles in 3 different plots in one row using `plt.subplot`.
