<a href="https://colab.research.google.com/github/iEpsilon-FPS/QU-Python/blob/master/6.1%20Analyzing_Data_Tutorial_Part_2_SciPy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Analyzing Data. Packages Covered
- SciPy

Copyright 2020 QuantUniversity LLC.

### References
* Note: Examples are derived and extended from:
* www.scipy.org
* www.scipy-lectures.org

In [None]:
# Install this if you don't have yfinance
!pip install yfinance

In [None]:
import yfinance as yf
import datetime

## Scipy

The Scipy package has multiple toolboxes meant to solve common problems in scientific computing

# Interpolation

In [None]:
# Generate data
from scipy.interpolate import interp1d
import numpy as np

measured_time = np.linspace(0, 1, 10)
noise = (np.random.random(10)*2 - 1) * 1e-1
measures = np.sin(2 * np.pi * measured_time) + noise

# Interpolate it to new time points
from scipy.interpolate import interp1d
linear_interp = interp1d(measured_time, measures)
interpolation_time = np.linspace(0, 1, 50)

linear_results = linear_interp(interpolation_time)
# ?interp1d
cubic_interp = interp1d(measured_time, measures, kind='cubic')
cubic_results = cubic_interp(interpolation_time)

# Plot the data and the interpolation
from matplotlib import pyplot as plt
plt.figure(figsize=(6, 4))
plt.plot(measured_time, measures, 'o', ms=6, label='measures')
plt.plot(interpolation_time, linear_results, label='linear interp')
plt.plot(interpolation_time, cubic_results, label='cubic interp')
plt.legend()
plt.show()


# Optimization and curve fitting

In [None]:
from scipy import optimize

# Seed the random number generator for reproducibility
np.random.seed(0)

x_data = np.linspace(-5, 5, num=50)
y_data = 2.9 * np.cos(1.5 * x_data) + np.random.normal(size=50)

# And plot it
import matplotlib.pyplot as plt
plt.figure(figsize=(6, 4))
plt.scatter(x_data, y_data)

def test_func(x, a, b):
    return a * np.cos(b * x)

params, params_covariance = optimize.curve_fit(test_func, x_data, y_data,
                                               p0=[2, 2])

print(params)


In [None]:
plt.figure(figsize=(6, 4))
plt.scatter(x_data, y_data, label='Data')
plt.plot(x_data, test_func(x_data, params[0], params[1]),
         label='Fitted function')

plt.legend(loc='best')

plt.show()

# Statistics

In [None]:
# Sample from a normal distribution using numpy's random number generator
samples = np.random.normal(size=10000)

# Compute a histogram of the sample
bins = np.linspace(-5, 5, 30)
histogram, bins = np.histogram(samples, bins=bins, density=True)
print(bins)

bin_centers = 0.5*(bins[1:] + bins[:-1])

In [None]:
bin_centers

In [None]:
# Compute the PDF on the bin centers from scipy distribution object
from scipy import stats
pdf = stats.norm.pdf(bin_centers)

from matplotlib import pyplot as plt
plt.figure(figsize=(6, 4))
plt.plot(bin_centers, histogram, label="Histogram of samples")
plt.plot(bin_centers, pdf, label="PDF")
plt.legend()
plt.show()

In [None]:
mean = np.mean(samples)
std = np.std(samples)
print(mean)
print(std)

In [None]:
loc, std = stats.norm.fit(samples)
print(loc)
print(std)

## Signal processing

In [None]:
from scipy import signal
t = np.linspace(0, 5, 100)
y = t + np.random.normal(size=100)

In [None]:
y_detrended = signal.detrend(y)

In [None]:
from matplotlib import pyplot as plt
plt.figure(figsize=(5, 4))
plt.plot(t, y, label="y")
plt.plot(t, y_detrended, label="y_detrended")
plt.legend(loc='best')
plt.show()