# Tutorial 2

## Outline

* Simulated Annealing (for HW#2)
* Pandas
* Numba


## Simulated Annealing

In [None]:
import numpy as np
import matplotlib.pyplot as plt


def Camel(X):
    x, y = X
    return 2 * x**2 - 1.05 * x**4 + x**6 / 6 + x * y + y**2

def plot_surface(func, x_min=-2, x_max=2, y_min=-2, y_max=2):
    a = np.linspace(x_min, x_max, 100)
    b = np.linspace(y_min, y_max, 100)
    x,y = np.meshgrid(a, b)
    z = func((x, y))
    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    ax.plot_surface(x, y, z)

plot_surface(Camel)

<img src="SA.svg" />

+ Random Displacement:

$$X_{i+1} = X_i + \Delta * (2 * \mathrm{URN} -1)$$

+ Metropolis Rule:

$$P(\mathrm{accept})=\exp\left(-\frac{\Delta E}{T}\right)$$

In [None]:
def SA(solution, func, schedule, delta, boundary, n_iter, report_interval=None):
    """ 
    Simulated Annealing for minimization
    
    Parameters
    ----------
    solution: np.ndarray
        Initial guess
    func: Callable
        Function to minimize
    schedule: np.ndarray
        An array of temperatures for simulated annealing
    delta: float
        Magnitude of random displacement
    boundary: tuple
        Boundary of the variables to minimize. (lowerbound,upperbound)
    n_iter: int
        Number of random displacement move in each temperature
    report_interavl: int
        Number of temperature steps to report result
    
    Returns
    -------
    res: dict
        Minimized point and its evaulation value
    """
    
    best_solution = solution.copy()
    lowest_eval = func(best_solution)
    
    for idx, temp in enumerate(schedule):
        if report_interval is not None and ((idx + 1) % report_interval == 0 or idx == 0):
            msg = (
                f"{idx + 1}/{len(schedule)}, Temp: {temp:.2f}, "
                f"Best solution: {best_solution}, Value: {lowest_eval:.7f}"
            )
            print(msg)
        
        for n in range(n_iter):
            trial = solution.copy()
            trial += ...
            if np.all(trial >= boundary[0]) and np.all(trial <= boundary[1]):
                # fill in acceptance criterion
                if ...:
                    solution = trial
                    if func(solution) < lowest_eval:
                        # update solution here
                        best_solution = solution.copy()
                        lowest_eval = func(best_solution)
                        
    return {"solution":best_solution, "evaluation":lowest_eval}

Try linear cooling

In [None]:
linear_cooling = np.linspace(3000, 50, 1000)

In [None]:
# Start from a point which is close to local minimum
starting_point = np.array([-1.7, 0.7])
SA(starting_point, Camel, linear_cooling, 0.1, (-2, 2), 5, 100)

In [None]:
# Local minimization methods fail to find the global minimum
from scipy.optimize import minimize

minimize(Camel, starting_point, method="BFGS")

## Pandas

+ [Documentation](https://pandas.pydata.org/docs/)

In [None]:
import pandas as pd

### Read CSV file

In [None]:
df = pd.read_csv("../../Datasets/titanic.csv")
print(type(df))
df

In [None]:
df.head()

In [None]:
df.tail()

### Drop columns

In [None]:
df2 = df.drop(['Cabin'], axis=1)

### Drop NaN values

Drop the rows where at least one element is missing

In [None]:
df.dropna()

Drop the columns where at least one element is missing.

In [None]:
df.dropna(axis=1) # or axis='columns'

Define in which columns to look for missing values.

In [None]:
df.dropna(subset=['Age'])

### Indexing

In [None]:
df.loc[0, "Pclass"]

In [None]:
df.iloc[0, 2]

In [None]:
df[['Pclass', 'Survived']]

In [None]:
df[df['Survived'] == 1]

### Other useful methods

Use `.values` attribute to get values in numpy.ndarray

In [None]:
df.values

Use `describe()` method to get statistics

In [None]:
df.describe()

Use `.index` or `.columns` get index/columns

In [None]:
df.index

In [None]:
df.columns

Use `to_csv()` to export DataFrame

In [None]:
df.to_csv("test_export.csv")

Use `sort_values()` method to sort the DataFrame according to values in one column.

In [None]:
df.sort_values(by=["Age"], ascending=False)

## Numba

Use `pip install numba` or `conda install numba -c conda-forge` to install numba package.

Numba is a package that help users to accelerate the code.

In [None]:
import numba

In [None]:
def test():
    i = 0
    for a in range(100000):
        i += a
    return i

%timeit test()

In [None]:
...
def test():
    i = 0
    for a in range(100000):
        i += a
    return i

%timeit test()