# Matrices and Data-Frames

In [1]:
import numpy as np
import pandas as pd

### Numpy Arrays and Matrix Operations
Documentation: https://numpy.org/doc/stable/user/index.html

##### For basic 2D arrays:
matrix locations = (i,j)
- where i = 1st dimension
- where j = 2nd dimension

In [2]:
# indexing numpy array:
matrix = np.array([[1,9,-13],[2,5,-6]])
matrix[1,:]

array([ 2,  5, -6])

##### Most numpy use cases will involve n-dimensional arrays
- For multidimensional arrays, additional indices can be given
- for example:
- matrix[i, j, k, ...n]

In [3]:
matrix**(2)

array([[  1,  81, 169],
       [  4,  25,  36]])

In [4]:
matrix.T

array([[  1,   2],
       [  9,   5],
       [-13,  -6]])

### Multidimensional Arrays

See 3Blue1Brown on YouTube for linear algebra basics ("Essence of Linear Algebra" series)

In [5]:
matrix_3d = np.random.rand(3,3,3)
matrix_3d

array([[[0.67696948, 0.54664491, 0.8673507 ],
        [0.18166091, 0.99234231, 0.13322642],
        [0.01630432, 0.11404079, 0.35910618]],

       [[0.74440443, 0.60073306, 0.70321906],
        [0.75998581, 0.09087903, 0.23991692],
        [0.5235603 , 0.84000673, 0.86166263]],

       [[0.86819052, 0.96910806, 0.36750284],
        [0.44716809, 0.74965494, 0.56108961],
        [0.54781508, 0.60819169, 0.45475448]]])

In [6]:
matrix_3d = matrix_3d*10
matrix_3d

array([[[6.7696948 , 5.46644907, 8.67350696],
        [1.81660906, 9.92342309, 1.33226422],
        [0.16304322, 1.14040792, 3.5910618 ]],

       [[7.44404431, 6.00733056, 7.03219057],
        [7.59985814, 0.90879031, 2.39916918],
        [5.23560299, 8.40006729, 8.61662629]],

       [[8.68190518, 9.6910806 , 3.67502838],
        [4.47168085, 7.49654937, 5.61089611],
        [5.47815085, 6.08191693, 4.54754481]]])

In [7]:
matrix_3d.T

array([[[6.7696948 , 7.44404431, 8.68190518],
        [1.81660906, 7.59985814, 4.47168085],
        [0.16304322, 5.23560299, 5.47815085]],

       [[5.46644907, 6.00733056, 9.6910806 ],
        [9.92342309, 0.90879031, 7.49654937],
        [1.14040792, 8.40006729, 6.08191693]],

       [[8.67350696, 7.03219057, 3.67502838],
        [1.33226422, 2.39916918, 5.61089611],
        [3.5910618 , 8.61662629, 4.54754481]]])

### Pandas

This is an incredibly useful package for initial exploration of data and works well with various statistics and visualization packages such as matplotlib, seaborn, scipy, sklearn, statsmodels, and many more.

Be sure to read the documentation to find even more useful functions and utilities:
https://pandas.pydata.org/docs/

##### Numpy is oriented row-first, Pandas is oriented column-first
- this leads to speed advantages at different tasks for np-arrays versus pd-dataframes

In [8]:
df = pd.DataFrame(np.random.rand(10,10))

In [9]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.27113,0.548723,0.910259,0.298098,0.707197,0.185192,0.607955,0.491264,0.311462,0.26823
1,0.797669,0.526395,0.316534,0.339371,0.77919,0.674704,0.953748,0.030475,0.960333,0.569228
2,0.088028,0.831357,0.881244,0.149287,0.679348,0.797055,0.474042,0.928781,0.184906,0.294549
3,0.971662,0.224895,0.033094,0.056585,0.334995,0.012898,0.016748,0.742371,0.485073,0.235698
4,0.581061,0.945276,0.548197,0.944633,0.145542,0.393657,0.588403,0.667803,0.716061,0.39073
5,0.67568,0.024408,0.467024,0.028381,0.425624,0.7098,0.657009,0.42548,0.295653,0.68847
6,0.164547,0.614266,0.488344,0.65201,0.216234,0.565954,0.810064,0.817699,0.691308,0.156191
7,0.975777,0.859887,0.811564,0.366067,0.37391,0.995592,0.511935,0.004966,0.722226,0.592017
8,0.772718,0.872637,0.868912,0.157671,0.803218,0.635838,0.060725,0.385809,0.786952,0.79382
9,0.752304,0.591812,0.065456,0.037715,0.602587,0.42258,0.501678,0.012701,0.204509,0.160153


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       10 non-null     float64
 1   1       10 non-null     float64
 2   2       10 non-null     float64
 3   3       10 non-null     float64
 4   4       10 non-null     float64
 5   5       10 non-null     float64
 6   6       10 non-null     float64
 7   7       10 non-null     float64
 8   8       10 non-null     float64
 9   9       10 non-null     float64
dtypes: float64(10)
memory usage: 932.0 bytes


In [11]:
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.605058,0.603966,0.539063,0.302982,0.506785,0.539327,0.518231,0.450735,0.535848,0.414909
std,0.322691,0.297169,0.329242,0.296289,0.238017,0.292385,0.292525,0.344651,0.274507,0.229523
min,0.088028,0.024408,0.033094,0.028381,0.145542,0.012898,0.016748,0.004966,0.184906,0.156191
25%,0.348613,0.531977,0.354156,0.079761,0.344724,0.400887,0.480951,0.119309,0.299605,0.243831
50%,0.713992,0.603039,0.51827,0.227885,0.514106,0.600896,0.550169,0.458372,0.58819,0.342639
75%,0.791431,0.852754,0.854575,0.359393,0.700235,0.701026,0.644746,0.723729,0.720685,0.586319
max,0.975777,0.945276,0.910259,0.944633,0.803218,0.995592,0.953748,0.928781,0.960333,0.79382
