# Fitting Data

In [0]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [0]:
plt.style.use('ggplot')

### Side Topic - Histogram Bins

* Plotting a histogram of two datasets with a differnt number of elements using the same bin number can lead to a misleading plot
* You can fix this by defining your bins

In [0]:
np.random.seed(42)
some_data = np.random.normal(2.0,1.0,500)  # 500 points from a normal dist, ave = 2.0, std = 1.0

mask2 = np.where(some_data > 0.0)

some_data.size,some_data[mask2].size

In [0]:
plt.hist(some_data, bins=20, facecolor='DodgerBlue')
plt.hist(some_data[mask2], bins=20, histtype='step', color='MidnightBlue', linewidth=4);

In [0]:
my_bins = np.arange(-2,6,0.25)
my_bins

In [0]:
plt.hist(some_data, bins=my_bins, facecolor='DodgerBlue')
plt.hist(some_data[mask2], bins=my_bins, histtype='step', color='MidnightBlue', linewidth=4);

---
## Fitting Data

In [0]:
my_data1 = pd.read_csv('./Data/fit_data1.csv')
my_data1[0:1]

In [0]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(6,4)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data1['x'],my_data1['y'], color = "m",
        marker="o",linestyle="None",markersize=10);

## `np.polyfit(x-data array, y-data array, order-of-fit)`

### First-order fitting:  $f(x) = ax + b$

In [0]:
my_fit1 = np.polyfit(my_data1['x'],my_data1['y'],1)

my_fit1       # The coefficients of the fit (a,b)

### You can use `np.poly1d(fit-parameters)` to plot and explore the fitted polynomial

In [0]:
fitted_polynomial = np.poly1d(my_fit1)

In [0]:
fitted_polynomial(my_data1['x'])

In [0]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(6,4)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data1['x'], fitted_polynomial(my_data1['x']),
        linewidth=5, color='y', linestyle='--',
        label="Fit to Data")

ax.plot(my_data1['x'], my_data1['y'], color = "m",
        marker="o", linestyle="None", markersize=10,
        label = "Data")

ax.legend(loc=0);

In [0]:
fitted_polynomial(5)                 # value of f(x) at x = 5

In [0]:
fitted_polynomial.roots              # value of x at f(x) = 0

In [0]:
(fitted_polynomial - 40).roots       # value of x at f(x) = 40

### Second-order fitting:  $f(x) = ax^2 + bx + c$

In [0]:
my_data2 = pd.read_csv('./Data/fit_data2.csv')

In [0]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(6,4)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data2['x'],my_data2['y'], color = "m",
        marker="o",linestyle="None",markersize=10);

In [0]:
my_fit2 = np.polyfit(my_data2['x'],my_data2['y'],2)

my_fit2

In [0]:
fitted_polynomial = np.poly1d(my_fit2)

In [0]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(6,4)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data2['x'], fitted_polynomial(my_data2['x']),
        linewidth=5, color='y', linestyle='--',
        label="Fit to Data")

ax.plot(my_data2['x'], my_data2['y'], color = "m",
        marker="o", linestyle="None", markersize=10,
        label = "Data")

ax.legend(loc=0);

In [0]:
fitted_polynomial(5)                 # value of f(x) at x = 5

In [0]:
fitted_polynomial.roots              # value of x at f(x) = 0

In [0]:
(fitted_polynomial - 20).roots       # value of x at f(x) = 20

In [0]:
(fitted_polynomial - 80).roots       # value of x at f(x) = 80, no real root

### Side Topic - Complex Numbers

* Python uses `j` for the imaginary  part of a complex number
* `numpy` has *some* support for complex numbers
* `cmath` (complex math) is much better

In [0]:
my_complex = (fitted_polynomial - 80).roots

In [0]:
my_complex[0]

In [0]:
my_complex[0].real

In [0]:
my_complex[0].imag

### Remember - the `real` part of a complex number is not a REAL number!

In [0]:
number_array = [1+1j, 1+0j, 4.5, 3, 2, 2j]

In [0]:
np.isreal(number_array)

In [0]:
np.iscomplex(number_array)

In [0]:
np.sqrt(-1)

In [0]:
import cmath as cx

In [0]:
cx.sqrt(-1)

### Be careful! Very high-order fits may be garbage

In [0]:
my_fit3 = np.polyfit(my_data1['x'],my_data1['y'],10)

fitted_polynomial = np.poly1d(my_fit3)

In [0]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(6,4)

fig.tight_layout()

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.set_ylim(-20,120)

xx = np.linspace(0,10,200)

ax.plot(xx, fitted_polynomial(xx), 
        linewidth=5, color='y', linestyle='-', 
        label="Fit to Data")

ax.plot(my_data1['x'], my_data1['y'], color = "m",
        marker="o", linestyle="None", markersize=15,
        label="Data")

ax.legend(loc=0);

### Side Topic - The unpack operator (**\***)

 * The unpacking operator takes a list and unpacks each value and sends each, in order.
 * It makes a list **iterable**

In [0]:
my_list = [3.1, 5.6, 11.5, 19.6]

In [0]:
def some_numbers(a, b, c, d):
    my_string = "I have four numbers: {0}, {1}, {2}, and {3}".format(a,b,c,d)
    print(my_string)

##### This will not work as the list (`my_list`) is sent as one blob:

In [0]:
some_numbers(my_list)

##### This **will** work as the list (`*my_list`) is unpacked and sent as 4 pieces:

In [0]:
some_numbers(*my_list)

# Fitting a specific function - `curve_fit`

In [0]:
from scipy.optimize import curve_fit

In [0]:
my_data3 = pd.read_csv('./Data/fit_data3.csv')

In [0]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(6,4)

fig.tight_layout()

ax.set_ylim(-100,140)

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data3['x'],my_data3['y'], color = "m",
        marker="o",linestyle="None",markersize=10);

$$ \Large f(x) = a \sin(bx) $$

In [0]:
def ringo(x,a,b):
    return a*np.sin(b*x)

##### You need to provide an initial guess to the parameters `a` and `b`

In [0]:
my_guess_a = 75.0
my_guess_b = 1.0/5.0

init_guesses = [my_guess_a, my_guess_b]

#### My Parameters:

* `Function to fit = ringo`
* `X points to fit = my_data3['x']`
* `Y points to fit = my_data3['y']`
* `Initial guess at values for a,b = init_guesses`

In [0]:
fitpars, error = curve_fit(ringo,my_data3['x'],my_data3['y'],p0=init_guesses)

print(fitpars)

In [0]:
Z = np.linspace(0,100,1000)

fig,ax = plt.subplots(1,1)
fig.set_size_inches(6,4)

fig.tight_layout()

ax.set_ylim(-100,140)

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data3['x'],my_data3['y'], color = "m",
        marker="o", linestyle="None", markersize=10,
        label = "Data")

ax.plot(Z, ringo(Z, *init_guesses),
        'g--', linewidth=3,
        label = "Initial Guess")

ax.plot(Z, ringo(Z, *fitpars),
        'y-',  linewidth=3,
        label = "Fit to Data")

ax.legend(loc=0);

### Bad initial guesses can lead to very bad fits

In [0]:
my_guess_a = 35.0
my_guess_b = 1.0

init_guesses = [my_guess_a, my_guess_b]

In [0]:
fitpars, error = curve_fit(ringo,my_data3['x'],my_data3['y'],p0=init_guesses)

print(fitpars)

In [0]:
fig,ax = plt.subplots(1,1)
fig.set_size_inches(6,4)

fig.tight_layout()

ax.set_ylim(-100,140)

ax.set_xlabel("This is X")
ax.set_ylabel("This is Y")

ax.plot(my_data3['x'],my_data3['y'], color = "m",
        marker="o", linestyle="None", markersize=10,
        label = "Data")

ax.plot(Z, ringo(Z, *init_guesses),
        'g--', linewidth=3,
        label = "Initial Guess")

ax.plot(Z, ringo(Z, *fitpars),
        'y-',  linewidth=3,
        label = "Fit to Data")

ax.legend(loc=0);

### Side Topic - Scaling data: Normalization

* It is often useful to rescale your data.
* One of the most common types of rescaling is called: `min-max normalization`.
* This method rescales that data so the range of the `y-data` is `[0->1]`

$$ \Large
\text{Normalized}(y)={\frac  {y-{\text{min}}(y)}{{\text{max}}(y)-{\text{min}}(y)}}
$$

In [0]:
def normalize_data(data_array):
    result = (data_array - data_array.min()) / (data_array.max() - data_array.min())
    return result

In [0]:
my_data3['y'].min(), my_data3['y'].max()

In [0]:
y_normal = normalize_data(my_data3['y'])

In [0]:
y_normal.min(), y_normal.max()

In [0]:
fig,ax = plt.subplots(1,2)
fig.set_size_inches(14,4)

fig.tight_layout()


ax[0].set_xlabel("This is X")
ax[0].set_ylabel("This is Y")

ax[0].plot(my_data3['x'], my_data3['y'], color = "m",
           marker="o", linestyle="None", markersize=10)

ax[1].set_xlabel("This is X")
ax[1].set_ylabel("This is Normalized Y")

ax[1].plot(my_data3['x'], y_normal, color = "m",
           marker="o", linestyle="None", markersize=10);