# Tabular Numeric Data

## Unit 21

In [1]:
import numpy as np
numbers = np.array(range(1, 11), copy=True)
numbers

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [2]:
ones = np.ones([2, 4], dtype=np.float64)
ones

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [3]:
zeros = np.zeros([2, 4], dtype=np.float64)
zeros

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [4]:
empty = np.empty([2, 4], dtype=np.float64)
empty

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [5]:
ones.shape

(2, 4)

In [6]:
numbers.ndim

1

In [7]:
zeros.dtype

dtype('float64')

In [8]:
eye = np.eye(3, k=1)
eye

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

In [9]:
np_numbers = np.arange(2, 5, 0.25)
np_numbers

array([2.  , 2.25, 2.5 , 2.75, 3.  , 3.25, 3.5 , 3.75, 4.  , 4.25, 4.5 ,
       4.75])

In [30]:
np_inumbers = np_numbers.astype(np.int)
np_inumbers

array([2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4])

## Unit 22

In [11]:
sap = np.array(["MMM", "ABT", "ABBV", "ACN", "ACE", "ATVI", "ADBE", "ADT"])
sap

array(['MMM', 'ABT', 'ABBV', 'ACN', 'ACE', 'ATVI', 'ADBE', 'ADT'],
      dtype='<U4')

In [12]:
sap2d = sap.reshape(2, 4)
sap2d

array([['MMM', 'ABT', 'ABBV', 'ACN'],
       ['ACE', 'ATVI', 'ADBE', 'ADT']], dtype='<U4')

In [13]:
sap3d = sap.reshape(2, 2, 2)
sap3d

array([[['MMM', 'ABT'],
        ['ABBV', 'ACN']],

       [['ACE', 'ATVI'],
        ['ADBE', 'ADT']]], dtype='<U4')

In [14]:
sap2d.T

array([['MMM', 'ACE'],
       ['ABT', 'ATVI'],
       ['ABBV', 'ADBE'],
       ['ACN', 'ADT']], dtype='<U4')

In [15]:
sap3d.swapaxes(1, 2)

array([[['MMM', 'ABBV'],
        ['ABT', 'ACN']],

       [['ACE', 'ADBE'],
        ['ATVI', 'ADT']]], dtype='<U4')

In [31]:
sap3d.transpose((0, 2, 1))

array([[['MMM', 'ABBV'],
        ['ABT', 'ACN']],

       [['ACE', 'ADBE'],
        ['ATVI', 'ADT']]], dtype='<U4')

## Unit 23

In [17]:
dirty = np.array([9, 4, 1, -0.01, -0.02, -0.001])
whos_dirty = dirty < 0
whos_dirty

array([False, False, False,  True,  True,  True])

In [18]:
dirty[whos_dirty] = 0
dirty

array([9., 4., 1., 0., 0., 0.])

In [19]:
linear = np.arange(-1, 1.1, 0.2)
(linear <= 0.5) & (linear >= -0.5)

array([False, False, False,  True,  True,  True,  True,  True, False,
       False, False])

In [20]:
sap[[1, 2, -1]]

array(['ABT', 'ABBV', 'ADT'], dtype='<U4')

In [21]:
sap2d[:, [1]]

array([['ABT'],
       ['ATVI']], dtype='<U4')

In [32]:
sap2d[:, 1]

array(['ABT', 'ATVI'], dtype='<U4')

## Unit 24

In [23]:
a = np.arange(4)
b = np.arange(1, 5)
a + b

array([1, 3, 5, 7])

In [24]:
a * 5

array([ 0,  5, 10, 15])

In [25]:
noise = np.eye(4) + 0.01 * np.ones((4, ))
noise

array([[1.01, 0.01, 0.01, 0.01],
       [0.01, 1.01, 0.01, 0.01],
       [0.01, 0.01, 1.01, 0.01],
       [0.01, 0.01, 0.01, 1.01]])

In [33]:
noise = np.eye(4) + 0.01 * np.random.random([4, 4])
np.round(noise, 2)

array([[1.01, 0.01, 0.01, 0.  ],
       [0.01, 1.01, 0.01, 0.  ],
       [0.01, 0.  , 1.  , 0.01],
       [0.01, 0.  , 0.  , 1.01]])

## Unit 28

In [27]:
dna = "AGTCCGCGAATACAGGCTCGGT"
dna_as_array = np.array(list(dna))
dna_as_array

array(['A', 'G', 'T', 'C', 'C', 'G', 'C', 'G', 'A', 'A', 'T', 'A', 'C',
       'A', 'G', 'G', 'C', 'T', 'C', 'G', 'G', 'T'], dtype='<U1')

In [28]:
np.unique(dna_as_array)

array(['A', 'C', 'G', 'T'], dtype='<U1')

In [29]:
np.in1d(["MSFT", "MMM", "AAPL"], sap)

array([False,  True, False])

## HEI Locator
I'm using a regular dictionary to map the institution id to the name, because I can't
store the name in the numpy array without having an effect on performance.

I'm calculating the distance from the mean using the Pythagorean method. That should
be close enough to the true distance in degrees.

I'm then using a python list comprehension to generate this list of distances sorted
in ascending order.

In [38]:
import numpy as np
import csv
import math
names = dict()
with open("data.csv", "rb") as infile:
    data = np.loadtxt(infile, delimiter=",", skiprows=1, usecols=(0,2,3))
with open("data.csv", "r") as infile:
    reader = csv.reader(infile, delimiter=",", quotechar='"')
    for row in reader:
        names[int(row[0])] = row[1]

datamean = np.mean(data, axis=(0))
longitude = datamean[1]
latitude = datamean[2]

datadiff = data - [0,longitude,latitude]
datadist = sorted([[datadiff[x][0], math.sqrt(abs(datadiff[x][1])**2 + abs(datadiff[x][2])**2)] for x in range(len(datadiff))], key=lambda x : x[1])
for row in datadist[:10]:
    print("Distance: {} degrees    Name: {}".format(row[1], names[int(row[0])]))

Distance: 0.4694457881670631 degrees    Name: Mineral Area College
Distance: 0.5857086412509711 degrees    Name: Poplar Bluff Technical Career Center
Distance: 0.603898425098322 degrees    Name: Three Rivers College
Distance: 0.8547075592093094 degrees    Name: Metro Business College-Cape Girardeau
Distance: 0.8678201413441387 degrees    Name: Cape Girardeau Career and Technology Center
Distance: 0.8841611226273491 degrees    Name: Southeast Missouri Hospital College of Nursing and Health Sciences
Distance: 0.8907578762367585 degrees    Name: Jefferson College
Distance: 0.8998180288674102 degrees    Name: Southeast Missouri State University
Distance: 0.9945469299881141 degrees    Name: Sikeston Career and Technology Center
Distance: 0.9967072755689832 degrees    Name: Creative Touch Cosmetology School
