# Basic ndarray

- N-dimensional array
- Homogeneous collection of data
- Fast and efficient
- Supports mathematical functions

# Some important attributes of an ndarray object:

 - **ndarray.ndim**: 
The number of axes (dimensions) of the array; also referred to as rank.

 - **ndarray.shape**: 
The dimensions of the array. This is a tuple of integers indicating the size of the array in each dimension. For a matrix name matrix1 with n rows and m columns, matrix1.shape returns (n,m).

 - **ndarray.size**: 
The total number of elements of the array. This is equal to the product of the elements of shape.

 - **ndarray.type**: 
This is an object that describes the type of the elements in the array.

## Loading Numpy and Pandas libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Create Numpy Arrays

In [None]:
# creates a Numpy array from a list
list_1 = [1, 2, 3, 4]
np_arr = np.array(list_1)
np_arr

In [None]:
# see what type np_arr is:
type(np_arr)

In [None]:
# creates a Numpy array from a Numpy matrix
np.array(np.mat('1 2; 3 4; 5 6'))

In [None]:
# can you fix this?
a1 = np.array(1, 2, 3, 4)
a1

## Type Casting

In [None]:
list_2 = [1, 2.0, 3]
list_2

In [None]:
# creates a ndarray from a list
a2 = np.array(list_2) 
a2

In [None]:
# you can also specify the type data is stored in the array as:
print(np.array([1, 2, 3], dtype = float))
print(np.array([1, 2, 3], dtype = 'U'))
print(np.array([1, 2, 3], dtype = '<U2'))
print(np.array([1, 2, 3], dtype = complex))
print(np.array([1, 2, 3.2], dtype = int))

## NumPy Important Functions

**numpy.zeros(shape)**: Creates an array full of zeros with given shape

In [None]:
np.zeros((3,4))

**numpy.ones(shape)**: Creates an array full of zeros with given shape

In [None]:
np.ones((2,3,4), dtype=np.int16) # You can also specify the data type

**numpy.empty(shape)**: Creates an array whose initial content is random

In [None]:
np.empty((2,3))

**numpy.eye(integer)**: Creates an identity matrix with the shape of (integer, integer)

In [None]:
i = np.eye(6)
i

**numpy.diag(ndarray)**: Returns an array of the elements on the diagonal

In [None]:
arr = np.array([[1,2],[3,4]])
print('Array is ')
print(arr)

# numpy.diag(ndarray) returns an array of the elements on the diagonal
print('Diagonals of the array above: ', np.diag(arr))

**numpy.arange(startNum, endNum, gap)**: reates an array starting from startNum and ending at endNum with each element equally spaced by an interval of gap

In [None]:
# create an integer list from 0 to 9
list(range(10))

In [None]:
# creates an integer list from 10 to 30 with 5 in between each element
print(np.arange(10, 30, 5))
# creates an integer list from 0 to 100 with 20 in between each element
print(np.arange(0, 100, 20))

**numpy.linspace(startNum, endNum, gap)**: Creates an array starting from startNum and ending at endNum with each element equally spaced by an interval of gap

In [None]:
np.linspace(0, 100, 10)

In [None]:
# see how this works:
print(np.linspace(0,2,9))
x = np.linspace(0, 2*np.pi, 100)
f = np.sin(x)
plt.plot(f)
plt.show()

**np.tril(ndarray, k)**: Returns a copy of an array with elements above the k-th diagonal zeroed, essentially a lower triangle

In [None]:
arr = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr)
print('----')
print(np.tril(arr,-1))

**np.trii(ndarray, k)**: Returns a copy of an array with elements below the k-th diagonal zeroed, essentially an upper triangle

In [None]:
np.triu([[1,2,3],[4,5,6],[7,8,9]], 1)

## Printing Arrays

In [None]:
c = np.arange(9)
print(a)

In [None]:
np.arange(12).reshape(2,2,3)

In [None]:
c2 = np.arange(12).reshape(4,3) # 2D array
print(c2)

In [None]:
c3 = np.arange(24).reshape(2,3,4) # 3D array
print(c3)

In [None]:
print(np.arange(10000))

## Random Number Generation

In [None]:
# Random number generation in 2 by 3 matrix (from uniform distribution)
print(np.random.rand(2,3))

In [None]:
# Random number generation in 2 by 3 matrix(from standard normal distribution)
# where the mean = 0 and S.D. = 1
print(np.random.randn(2,3))

In [None]:
plt.hist(np.random.rand(100))
plt.show()

In [None]:
np.random.randint(1,100,10) # randint (lower bound, upper bound, number of samples to be drawn)

## Matrix Operations

In [None]:
A = np.array([[1,1],[0,1]])
B = np.array(([2,0],[3,4]))
print('A = ')
print(A)
print('B = ')
print(B)

In [None]:
print('A + B = ')
print(A + B)
print('A - B = ')
print(A - B)

In [None]:
# element multiplication
print('A * B = ')
print(A * B)

In [None]:
# matrix multiplication
np.dot(A,B)

## Indexing, Slicing, and Iterating

In [None]:
a = np.linspace(1, 20, 10)
print(a)

In [None]:
# returns the element at index = 2
a[2]

In [None]:
# gets the elements from index = 2 to index = 4
print(a[2:5])

In [None]:
# gets elements up to index = 2
print(a[:3])

In [None]:
# gets elements from 4th from the end to 3rd from the end
print(a[-4:-3])

In [None]:
# gets the entire array
print(a[:])

### Multi-dimensional Arrays

In [None]:
A = np.arange(12).reshape(3,4)
print(A)

In [None]:
# gets the dimensions of the array
A.shape

In [None]:
# slices the array from row = 1 to row = 2 and col = 0 to col = 2
A[1:3, 0:3]

In [None]:
# slices the array from col = 0 to col = 2
A[:,0:3]

In [None]:
A[1:3, 1]

In [None]:
A[:, 1]

In [None]:
# slices the array by taking just the last row
A[-1]

### Iteration 

In [None]:
# creates a matrix with values from 0 to 47 in a 3-dimensional 3*4*4 matrix
B = np.arange(48).reshape(3, 4, 4)
print(B)

In [None]:
# loops through every single value in matrix B
for axis1 in B:
    for axis2 in axis1:
        for vals in axis2:
            print(vals)

In [None]:
# loops through axis0
# each element in axis0 is the row of arrays
for axis0 in B:
    for vals in axis0:
        print(vals)
        print('  ')

## Manipulate the Shape

In [None]:
# creates a random 3 by 4 array from the interval [[0.0, 1.0)
# scales it by 10
# applies floor function to it
a = np.floor(10*np.random.random((3,4)))
print(a)
print(a.shape)

In [None]:
# reshape() function returns a new ndarray in new dimensions
a.reshape(6,2)

In [None]:
# T returns the transpose of the array upon which it is called
a.T

In [None]:
# resize() fucntion modifies the ndarray upon which it is called
a.resize((2,6))
print(a)

In [None]:
a.resize(2,6)
# if a dimension is given as -1, Python auto-calculate the dimension it ought to be
a.reshape(2,-1)

# Data Frame

### Create DataFrame

In [None]:
# creates a random numpy 
num_array = np.random.rand(6,4)

# creates a list of indices using pd.Index
rows = pd.Index(['R1', 'R2', 'R3', 'R4', 'R5', 'R6'])

# creates a lists of columns
columns = ['A','B','C','D']

# creates a DataFrame, similar to an Excel table, given the indicies and the columns
df1 = pd.DataFrame(num_array, index = rows, columns = columns)

# see what df1 looks like:
df1

In [None]:
# create a DataFrame with dictionary array
# each dictionary becomes one column
data = {'Animal': ['cat', 'cat', 'snake', 'dog', 'cat'],
        'Age': [2.5, 3, 0.5, 1, 4],
        'Visits': [1, 2, 4, 5, 3],
        'Healthy': ['Yes','No','Yes','Yes','No']}


# creates a list of labels
labels = ['A', 'B', 'C', 'D', 'E']

df2 = pd.DataFrame(data, index = labels)
df2

In [None]:
df2.dtypes

In [None]:
df3 = df2.head(3)
df3

In [None]:
df4 = df2.tail(3)
df4

In [None]:
df2.values

In [None]:
df2.describe()

In [None]:
df2.sort_values(by = 'Age')

In [None]:
df2.sort_values(by = 'Age')[1:3]

In [None]:
df2

In [None]:
df2.loc[:,'Animal':'Visits']

In [None]:
df2.iloc[:,0:3]

In [None]:
df2.loc['B':'D','Animal':'Visits']

In [None]:
incompleteData = {'Animal': ['cat', 'cat', 'snake', 'dog', 'cat', 'rabbit','dog'],
        'Age': [2.5, 3, 0.5, 1, 4, np.nan,5],
        'Visits': [1, 2, 4, 5, 3, 3, 2],
        'Healthy': ['Yes','No','Yes','Yes','No', 'No', 'Yes']}
newLabels = ['A', 'B', 'C', 'D', 'E','F','G']
df5 = pd.DataFrame(incompleteData, index = newLabels)
df5

### Simple Way to Deal with Missing Values

In [None]:
df6 = df5.copy()
df6.fillna(df6['Age'].mean())

df7 = df5.copy()
df7.dropna(how = 'any')

# Try These:

In [None]:
# imports train.csv and make a DataFrame from it
trainData = pd.read_csv("train.csv")
data_frame = pd.DataFrame(trainData)

In [None]:
# see what the data looks like:
data_frame

#### Let's see what the age distribution of the survivors is:

First, drop the passengers who did not survive.

In [None]:
data_frame.columns
ndf = data_frame.loc[data_frame['Survived'] != 0]
ndf

Fill in missing ages with the average age of the data.

In [None]:
ndf[['Age']].fillna(ndf[['Age']].mean())

Group the ages in to groups and count the number of individuals in each age group:

In [None]:
ranges = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]
bins = pd.cut(data_frame['Age'], ranges)
data_frame.groupby(bins)['Age'].agg(['count'])

Now, plot a histogram to see how the ages distribute:

In [None]:
plt.hist(data_frame['Age'], bins = ranges)
plt

## What else can we do with the train.csv?