# Fitting Data

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
plt.style.use('bmh')

# Fitting data with a ploynomial - `np.polyfit()`

### `np.ployfit()` takes three arguments:

* x-data array
* y-data array
* order-of-fit

In [None]:
my_data1 = pd.read_csv('./Data/fit_data1.csv')
my_data1.head(2)

In [None]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(8,6)
fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data1['x'],my_data1['y'],
        color = "m",
        marker = "o",
        linestyle = "None",
        markersize = 10);

---

## First-order fitting:  $f(x) = ax + b$

In [None]:
my_fit1 = np.polyfit(my_data1['x'], my_data1['y'], 1)

my_fit1       # The coefficients of the fit (a,b)

### You can use `np.poly1d(fit-parameters)` to plot and explore the fitted polynomial

In [None]:
fitted_polynomial = np.poly1d(my_fit1)
fitted_polynomial

In [None]:
my_data1['Fit'] = fitted_polynomial(my_data1['x'])

In [None]:
my_data1.head(2)

In [None]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(8,6)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data1['x'], my_data1['y'],
        color = "m",
        marker = "o",
        linestyle = "None",
        markersize = 10,
        label = "Data")

ax.plot(my_data1['x'], my_data1['Fit'],
        marker = "None",
        linewidth = 6,
        color = (0.3, 0.1, 0.9, 0.4),
        linestyle = '--',
        label = "Fit to Data")

ax.legend(loc=0);

In [None]:
fitted_polynomial(5)                 # value of f(x) at x = 5

In [None]:
fitted_polynomial.roots              # value of x at f(x) = 0

In [None]:
(fitted_polynomial - 40).roots       # value of x at f(x) = 40

---
## Second-order fitting:  $f(x) = ax^2 + bx + c$

In [None]:
my_data2 = pd.read_csv('./Data/fit_data2.csv')

In [None]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(8,6)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data2['x'],my_data2['y'],
        color = "m",
        marker = "o",
        linestyle = "None",
        markersize = 10);

In [None]:
my_fit2 = np.polyfit(my_data2['x'], my_data2['y'], 2)

my_fit2

In [None]:
fitted_polynomial = np.poly1d(my_fit2)

In [None]:
my_data2['Fit'] = fitted_polynomial(my_data2['x'])

In [None]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(8,6)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data2['x'], my_data2['y'],
        color = "m",
        marker = "o",
        linestyle = "None",
        markersize = 10,
        label = "Data")

ax.plot(my_data2['x'], my_data2['Fit'],
        marker = "None",
        linewidth = 6,
        color = (0.3, 0.1, 0.9, 0.4),
        linestyle = '--',
        label = "Fit to Data")

ax.legend(loc=0);

In [None]:
fitted_polynomial(5)                 # value of f(x) at x = 5

In [None]:
fitted_polynomial.roots              # value of x at f(x) = 0

In [None]:
(fitted_polynomial - 20).roots       # value of x at f(x) = 20

In [None]:
(fitted_polynomial - 80).roots       # value of x at f(x) = 80, no real root

---

## Side Topic - Complex Numbers

* Python uses `j` for the imaginary  part of a complex number
* `numpy` has *some* support for complex numbers

In [None]:
my_solutions = np.array([3.2 + 2.7j, 3.3j, 2.6 + 0j, 6.9])

In [None]:
my_solutions

In [None]:
np.isreal(my_solutions)

In [None]:
np.iscomplex(my_solutions)

## Just the Real Solutions

In [None]:
my_solutions[np.isreal(my_solutions)]

### A little neater

In [None]:
my_solutions[np.isreal(my_solutions)].real

In [None]:
my_solutions[np.isreal(my_solutions)][0].real

### Just the Complex Solutions

In [None]:
my_solutions[np.iscomplex(my_solutions)]

### Remember: A complex root cannot be the solution to a **real world** problem.

* Except for the infinite number of exceptions
* You know what I mean ....

### Also: The real part of a complex number is NOT a real number solution

* This is not a real solution:

In [None]:
my_solutions[np.iscomplex(my_solutions)][0].real

---

## Be careful! Very high-order fits (are almost always) garbage

In [None]:
my_fit3 = np.polyfit(my_data1['x'], my_data1['y'], 10)

fitted_polynomial = np.poly1d(my_fit3)

In [None]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(8,6)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")
ax.set_title("This is awesome fit!!!!")

ax.set_ylim(-20,120)

xx = np.linspace(0,10,200)

ax.plot(xx, fitted_polynomial(xx),
        linewidth = 6,
        color = (0.3, 0.1, 0.9, 0.4),
        linestyle = '-',
        label = "Fit to Data")

ax.plot(my_data1['x'], my_data1['y'],
        color = "m",
        marker = "o",
        linestyle = "None",
        markersize = 15,
        label = "Data")

ax.legend(loc=0);

---

## Side Topic - The unpack operator (**\***)

 * The unpacking operator takes a list and unpacks each value and sends each, in order.
 * It makes a list **iterable**

In [None]:
my_list = np.array([3.1, 5.6, 11.5, 19.6])

In [None]:
def some_numbers(a, b, c, d):
    my_string = f"I have four numbers: {a}, {b}, {c}, and {d}"
    print(my_string)

### This will **not** work as the list (`my_list`) is sent as one blob:

In [None]:
some_numbers(my_list)

### This **will** work as the list (`*my_list`) is unpacked and sent as 4 pieces:

In [None]:
some_numbers(*my_list)

---

# Fitting a specific function - `curve_fit`

In [None]:
from scipy.optimize import curve_fit

In [None]:
my_data3 = pd.read_csv('./Data/fit_data3.csv')

In [None]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(10,5)

fig.tight_layout()

ax.set_ylim(-100,100)

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data3['x'], my_data3['y'],
        color = "m",
        marker = "o",
        linestyle = "None",
        markersize = 10);

$$ \Large 
f(x) = a \sin(bx) 
$$

### You need to provide f(x)

* The first argument needs to be the points to fit (`my_x`)
* The other arguments are the parameters to be fit (`my_a, my_b`)
* The function just returns: `f(x)`

In [None]:
def ringo(my_x, my_a, my_b):
    return my_a * np.sin(my_b * my_x)

##### You need to provide an initial guess to the parameters `a` and `b`

In [None]:
my_guess_a = 75.0
my_guess_b = 1.0/5.0

init_guesses = [my_guess_a, my_guess_b]

#### My Parameters:

* `Function to fit = ringo`
* `X points to fit = my_data3['x']`
* `Y points to fit = my_data3['y']`
* `Initial guess (`p0`) at values for a,b = init_guesses`

In [None]:
fitpars, error = curve_fit(ringo,
                           my_data3['x'], my_data3['y'],
                           p0 = init_guesses)

print(fitpars)

In [None]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(10,5)
fig.tight_layout()

ax.set_ylim(-100,140)

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data3['x'], my_data3['y'],
        color = "m",
        marker = "o",
        linestyle = "None",
        markersize = 10,
        label = "Data")

ax.plot(my_data3['x'], ringo(my_data3['x'], *init_guesses),
        color = (0.3, 0.9, 0.2, 0.8),
        marker = "None",
        linestyle = "--",
        linewidth = 6,
        label = "Initial Guess")

ax.plot(my_data3['x'], ringo(my_data3['x'], *fitpars),
        color = (0.3, 0.1, 0.9, 0.4),
        marker = "None",
        linestyle = "-",
        linewidth = 6,
        label = "Fit to Data")

ax.legend(loc=0);

### Bad initial guesses can lead to very bad fits

In [None]:
my_guess_a = 35.0
my_guess_b = 1.0

init_guesses = [my_guess_a, my_guess_b]

In [None]:
fitpars, error = curve_fit(ringo,
                           my_data3['x'], my_data3['y'],
                           p0=init_guesses)

print(fitpars)

In [None]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(10,5)
fig.tight_layout()

ax.set_ylim(-100,140)

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data3['x'], my_data3['y'],
        color = "m",
        marker = "o",
        linestyle = "None",
        markersize = 10,
        label = "Data")

ax.plot(my_data3['x'], ringo(my_data3['x'], *init_guesses),
        color = (0.3, 0.9, 0.2, 0.8),
        marker = "None",
        linestyle = "--",
        linewidth = 6,
        label = "Initial Guess")

ax.plot(my_data3['x'], ringo(my_data3['x'], *fitpars),
        color = (0.3, 0.1, 0.9, 0.4),
        marker = "None",
        linestyle = "-",
        linewidth = 6,
        label = "Fit to Data")

ax.legend(loc=0);

---

### Side Topic - Scaling data: Normalization

* It is often useful to rescale your data.
* One of the most common types of rescaling is called: `min-max normalization`.
* This method rescales that data so the range of the `y-data` is `[0->1]`

$$ \Large
\text{Normalized}(y)={\frac  {y-{\text{min}}(y)}{{\text{max}}(y)-{\text{min}}(y)}}
$$

In [None]:
def normalize_data(data_array):
    result = (data_array - data_array.min()) / (data_array.max() - data_array.min())
    return result

In [None]:
my_data3['y'].min(), my_data3['y'].max()

In [None]:
y_normal = normalize_data(my_data3['y'])

In [None]:
y_normal.min(), y_normal.max()

In [None]:
fig,ax = plt.subplots(1,2)
fig.set_size_inches(14,4)

fig.tight_layout()


ax[0].set_xlabel("This is X")
ax[0].set_ylabel("This is Y")

ax[0].plot(my_data3['x'], my_data3['y'],
           color = "m",
           marker = "o",
           linestyle = "None",
           markersize = 10)

ax[1].set_xlabel("This is X")
ax[1].set_ylabel("This is Normalized Y")

ax[1].plot(my_data3['x'], y_normal,
           color = "m",
           marker = "o",
           linestyle = "None",
           markersize = 10);