In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels as sm
%matplotlib inline

## Concatenating and Splitting Arrays
numpy.concatenate takes a sequence (tuple, list, etc.) of arrays and joins them
together in order along the input axis

In [None]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

In [None]:
np.concatenate([arr1, arr2], axis=0)

In [None]:
np.concatenate([arr1, arr2], axis=1)

In [None]:
np.vstack((arr1,arr2))

In [None]:
np.hstack((arr1,arr2))

In [None]:
arr = np.random.randn(5, 2)
arr

In [None]:
first, second, third = np.split(arr, [1, 3],axis=0)

In [None]:
first

In [None]:
second

In [None]:
third

## Universal Functions
A universal function, or ufunc, is a function that performs element-wise operations on data in ndarrays. They are fast wrapper functions that take one or more scalar values and produce one or more scalar results. Many ufuncs are simple element-wise transformations, like sqrt or exp:


In [None]:
arr = np.arange(1,10)
arr

In [None]:
np.sqrt(arr)

In [None]:
arr_log = np.log(arr)
arr_log

In [None]:
arr = np.exp(arr_log)
arr

In [None]:
x = np.random.randn(8)
y = np.random.randn(8)

In [None]:
x

In [None]:
y

In [None]:
np.maximum(x, y)

### Common Unary Ufuncs on Arrays
![image.png](attachment:image.png)

## Common Binary Ufuncs on Arrays
![image.png](attachment:image.png)

![image.png](attachment:image.png)

### High speed conditional operations

Suppose we wanted to take a value from xarr whenever the corresponding value in cond is True, and otherwise take the value from yarr. 

A list comprehension doing this might look like:

In [None]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

In [None]:
result = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)]
result

In [None]:
result = np.where(cond, xarr, yarr)
result

Suppose we had a matrix of randomly generated data and you wanted to replace all positive values with 2 and all negative values with –2. This is very easy to do with np.where:


In [None]:
arr = np.random.randn(4, 4)
arr

In [None]:
arr > 0

In [None]:
np.where(arr > 0, 2, -2)

We can combine scalars and arrays when using np.where. For example, I can replace all positive values in arr with the constant 2:

In [None]:
np.where(arr > 0, 2, arr) # set only positive values to 2

## Statistical Functions on Arrays

In [None]:
arr = np.random.randn(5, 4)
arr

In [None]:
arr.mean()

In [None]:
np.mean(arr)

Functions like mean and sum take an optional axis argument that computes the statistic
over the given axis, resulting in an array with one fewer dimension:

In [None]:
arr.mean(axis=1)

In [None]:
arr.sum(axis=0)

In [None]:
arr.cumsum()

In [None]:
arr.argmin()

In [None]:
arr.argmax()

In [None]:
arr.argmin(axis=0)

In [None]:
arr.argmax(axis=0)

### Statistics for Boolean Arrays

In [None]:
arr = np.random.randn(100)
(arr > 0).sum() # Number of positive values

### Any and All methods
There are two additional methods, any and all, useful especially for boolean arrays.
any tests whether one or more values in an array is True, while all checks if every
value is True:

These methods also work with non-boolean arrays, where non-zero elements evaluate
to True.

In [None]:
(arr > 0).any()


In [None]:
(arr > 0).all()

### Unique and Other Set Logic
NumPy has some basic set operations for one-dimensional ndarrays. A commonly used one is np.unique, which returns the sorted unique values in an array:

In [None]:
names = np.array(['Ali', 'Sohail', 'Sana', 'Umar', 'Sohail', 'Asim', 'Sania'])
np.unique(names)

In [None]:
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)

In [None]:
sorted(set(names))

## Save and Load functions

In [None]:
arr = np.arange(10)
np.save('some_array', arr) #If the file path does not already end in .npy, the extension is appended. 

In [None]:
np.load('some_array.npy')

You save multiple arrays in an uncompressed archive using np.savez and passing the
arrays as keyword arguments:

In [None]:
np.savez('array_archive.npz', a=arr, b=arr)

When loading an .npz file, you get back a dict-like object that loads the individual
arrays lazily:

In [None]:
arch = np.load('array_archive.npz')
arch['b']

In [None]:
np.savez_compressed('arrays_compressed.npz', a=arr, b=arr)

## Linear Algebra on Arrays

In [None]:
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])

In [None]:
x

In [None]:
y

In [None]:
x.dot(y)

In [None]:
np.dot(x, y)

In [None]:
np.dot(x, np.ones(3))

In [None]:
from numpy.linalg import inv, det

In [None]:
X = np.random.randn(5, 5)
mat = X.T.dot(X)
inv(mat)

In [None]:
det(mat)

### Common Matrix functions

![image.png](attachment:image.png)

![image.png](attachment:image.png)

### Random Number Generation

In [None]:
from random import normalvariate
N = 1000000
%timeit samples = [normalvariate(0, 1) for _ in range(N)]
%timeit np.random.normal(size=N)

In [None]:
np.random.seed(1234)

In [None]:
rng = np.random.RandomState(1234)
rng.randn(10)

### Commonly used functions available in Numpy Random library 

![image.png](attachment:image.png)

## Broadcasting
Broadcasting describes how arithmetic works between arrays of different shapes.

Combining a scalar value with an array:

In [None]:
arr = np.arange(5)
arr

In the following, we say that the scalar value 4 has been broadcast to all of the other elements in
the multiplication operation.

In [None]:
arr * 4

Getting the mean of each column of an array

In [None]:
arr = np.random.randn(4, 3)

In [None]:
arr

In [None]:
m = arr.mean(0)

In [None]:
m

In [None]:
m.shape,arr.shape

In [None]:
demeaned = arr - m
demeaned

In [None]:
demeaned.mean(0)

## The Broadcasting Rule
Two arrays are compatible for broadcasting if for each trailing dimension (i.e., starting
from the end) the axis lengths match or if either of the lengths is 1. Broadcasting is
then performed over the missing or length 1 dimensions.

In [None]:
row_mean = arr.mean(1)

In [None]:
row_mean

In [None]:
row_mean.shape

In [None]:
demeaned_rows = arr - row_mean

In [None]:
demeaned_rows = arr - row_mean.reshape(-1,1)

In [None]:
demeaned_rows

We often need to add a new axis with length 1 specifically for broadcasting purposes. Using reshape is one option, but inserting an axis requires constructing a tuple indicating the new shape. 
NumPy arrays offer a special syntax for inserting new axes by indexing. We use the special np.newaxis attribute along with “full” slices to insert the new axis:

In [None]:
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape

In [None]:
arr_1d = np.random.randn(3)
arr_1d

In [None]:
arr_1d[:, np.newaxis]

In [None]:
arr_1d[np.newaxis, :]

In [None]:
arr = np.random.randn(3, 4, 5)
depth_means = arr.mean(2)
depth_means

In [None]:
depth_means.shape

In [None]:
demeaned = arr - depth_means[:, :, np.newaxis]

### Seting values via Broadcasting

In [None]:
col = np.array([1.28, -0.42, 0.44, 1.6])
col

In [None]:
arr = np.zeros((4, 3))
arr[:] = col[:, np.newaxis]
arr

In [None]:
arr[:2] = [[-1.37], [0.509]]
arr

### Random Walk Example: Pure Python

In [None]:
import random
position = 0
walk = [position]
steps = 1000
for i in range(steps):
    step = 1 if random.randint(0, 1) else -1
    position += step
    walk.append(position)

In [None]:
plt.figure()

In [None]:
plt.plot(walk[:100])

### Random Walk Example: Numpy

In [None]:
nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()

In [None]:
print(walk.min())
print(walk.max())

In [None]:
(np.abs(walk) >= 10).argmax()

In [None]:
plt.plot(walk[:100])

### Simulating several Random Walks at Once

In [None]:
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0 or 1
steps = np.where(draws > 0, 1, -1)
walks = steps.cumsum(1)
walks

In [None]:
print(walks.max())
print(walks.min())

In [None]:
hits30 = (np.abs(walks) >= 30)

In [None]:
hits30

In [None]:
hits30_across_walks = hits30.any(1)
print(hits30_across_walks)
hits30_across_walks	.sum() # Number that hit 30 or -30

In [None]:
hits30.shape,hits30_across_walks.shape

In [None]:
crossing_times = hits30.argmax(1)
crossing_times.mean()