In [78]:
import numpy as np
datfile = 'earthquakes1970-2014.csv'

# 1

In [79]:
num_bedrooms = np.random.randint(1,4,10)
num_rooms = [3+n for n in num_bedrooms] # add living room, bathroom, and kitchen
house_sizes = [(11*(n-1)) # normal bedrooms total size
              + 16 # master bedroom size
              + 30 # living room size
              + 14 # bathroom size
              + 15 # kitchen size
              for n in num_bedrooms] + np.random.randint(0,25,10) # add random extra sqm

house_prices = [3600*A for A in house_sizes] + np.random.randint(0,50000,10) # add random extra price

In [80]:
house_data = np.array([house_sizes, num_rooms, house_prices],dtype=int)
house_data

array([[   111,    119,     77,    108,    120,     76,    104,    109,
            95,    115],
       [     6,      6,      4,      6,      6,      4,      6,      5,
             4,      6],
       [436387, 437625, 305764, 412090, 478832, 301881, 406896, 419205,
        357852, 447758]])

# 2

In [81]:
house_info = house_data.copy().T
house_info

array([[   111,      6, 436387],
       [   119,      6, 437625],
       [    77,      4, 305764],
       [   108,      6, 412090],
       [   120,      6, 478832],
       [    76,      4, 301881],
       [   104,      6, 406896],
       [   109,      5, 419205],
       [    95,      4, 357852],
       [   115,      6, 447758]])

# 3

In [82]:
house_info.shape

(10, 3)

>(10, 3)

This shape means that there are data for 10 houses (ie rows), with each row containing 3 features (ie columns) belonging to that house : the area of the house in square meters, the number of rooms in the house, and the price of the house in USD.

# 4

In [83]:
eqfiledata = np.loadtxt(datfile, delimiter=',', dtype=str)
eqheader, eqdata = eqfiledata[0], eqfiledata[1:]
eqheader, eqdata

(array(['DateTime', 'Latitude', 'Longitude', 'Depth', 'Magnitude',
        'MagType', 'NbStations', 'Gap', 'Distance', 'RMS', 'Source',
        'EventID'], dtype='<U22'),
 array([['1970/01/04 17:00:40.20', '24.138999999999900',
         '102.503000000000000', ..., '0.000000000000000', 'NEI',
         '1970010440'],
        ['1970/01/06 05:35:51.80', '-9.628000000000000',
         '151.458000000000000', ..., '0.000000000000000', 'NEI',
         '1970010640'],
        ['1970/01/08 17:12:39.10', '-34.741000000000000',
         '178.568000000000000', ..., '0.000000000000000', 'NEI',
         '1970010840'],
        ...,
        ['2013/12/17 23:38:08.10', '20.763500000000000',
         '146.759899999999000', ..., '0.740000000000000', 'us',
         '2013121720'],
        ['2014/01/01 16:03:28.89', '-13.877900000000000',
         '167.243400000000000', ..., '0.670000000000000', 'us',
         '2014010120'],
        ['2014/01/13 04:01:04.69', '19.001400000000000',
         '-66.847800000000000

# 5

In [84]:
eqdata20 = eqdata[:20].T
for n,eID in enumerate(eqdata20[-1]):
    if eID == '': eqdata20[-1][n] = -1 # denote unrecorded event IDs as '-1'
for n,magType in enumerate(eqdata20[5]):
    eqdata20[5][n] = {'Ms':'1','Mb':'2', 'ML':'3'}[magType]
colstoslice = [2,4,5,6,11]
eqdatasliced = np.array([eqdata20[col] for col in colstoslice], dtype=float).T
eqheadsliced = np.array([eqheader[col] for col in colstoslice])

eqheadsliced, eqdatasliced

(array(['Longitude', 'Magnitude', 'MagType', 'NbStations', 'EventID'],
       dtype='<U10'),
 array([[ 1.02503000e+02,  7.50000000e+00,  1.00000000e+00,
          9.00000000e+01,  1.97001044e+09],
        [ 1.51458000e+02,  6.20000000e+00,  1.00000000e+00,
          8.50000000e+01,  1.97001064e+09],
        [ 1.78568000e+02,  6.10000000e+00,  2.00000000e+00,
          5.90000000e+01,  1.97001084e+09],
        [ 1.26737000e+02,  6.10000000e+00,  2.00000000e+00,
          9.10000000e+01,  1.97001104e+09],
        [-1.52660000e+02,  6.00000000e+00,  3.00000000e+00,
          0.00000000e+00, -1.00000000e+00],
        [-1.77349000e+02,  6.50000000e+00,  2.00000000e+00,
          1.75000000e+02,  1.97001204e+09],
        [ 1.42966000e+02,  6.40000000e+00,  1.00000000e+00,
          1.99000000e+02,  1.97001204e+09],
        [-1.04298000e+02,  6.60000000e+00,  1.00000000e+00,
          1.40000000e+02,  1.97001214e+09],
        [ 1.66370000e+02,  6.40000000e+00,  1.00000000e+00,
          9.100

# 6

In [85]:
magnitudes = eqdatasliced.T[1] # magnitude column
selectedrows = [row+1 for row,mag in enumerate(magnitudes) if float(mag) >= 4.5]
selectedrows

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

# 7

In [86]:
eqdatasliced[0] = np.ones(len(eqdatasliced[0]))
eqdatasliced

array([[ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,
         1.00000000e+00,  1.00000000e+00],
       [ 1.51458000e+02,  6.20000000e+00,  1.00000000e+00,
         8.50000000e+01,  1.97001064e+09],
       [ 1.78568000e+02,  6.10000000e+00,  2.00000000e+00,
         5.90000000e+01,  1.97001084e+09],
       [ 1.26737000e+02,  6.10000000e+00,  2.00000000e+00,
         9.10000000e+01,  1.97001104e+09],
       [-1.52660000e+02,  6.00000000e+00,  3.00000000e+00,
         0.00000000e+00, -1.00000000e+00],
       [-1.77349000e+02,  6.50000000e+00,  2.00000000e+00,
         1.75000000e+02,  1.97001204e+09],
       [ 1.42966000e+02,  6.40000000e+00,  1.00000000e+00,
         1.99000000e+02,  1.97001204e+09],
       [-1.04298000e+02,  6.60000000e+00,  1.00000000e+00,
         1.40000000e+02,  1.97001214e+09],
       [ 1.66370000e+02,  6.40000000e+00,  1.00000000e+00,
         9.10000000e+01,  1.97001264e+09],
       [-9.94840000e+01,  6.50000000e+00,  1.00000000e+00,
         1.00000000e+02

# 8

In [87]:
np.savetxt('eqdata.txt', eqdatasliced)

# 9

In [88]:
loaded_data = np.loadtxt('eqdata.txt')

# 10

In [89]:
np.array([[col.mean(), col.std()] for col in loaded_data.T])

array([[2.21156100e+01, 1.38403635e+02],
       [5.99000000e+00, 1.16528966e+00],
       [1.65000000e+00, 7.92148976e-01],
       [7.56000000e+01, 5.58134392e+01],
       [1.47751447e+09, 8.53043377e+08]])

# 11

In [90]:
for i,row in enumerate(loaded_data):
    loaded_data[i] = row - [1, 25, 25, 10, 4]

loaded_data

array([[ 0.00000000e+00, -2.40000000e+01, -2.40000000e+01,
        -9.00000000e+00, -3.00000000e+00],
       [ 1.50458000e+02, -1.88000000e+01, -2.40000000e+01,
         7.50000000e+01,  1.97001064e+09],
       [ 1.77568000e+02, -1.89000000e+01, -2.30000000e+01,
         4.90000000e+01,  1.97001084e+09],
       [ 1.25737000e+02, -1.89000000e+01, -2.30000000e+01,
         8.10000000e+01,  1.97001104e+09],
       [-1.53660000e+02, -1.90000000e+01, -2.20000000e+01,
        -1.00000000e+01, -5.00000000e+00],
       [-1.78349000e+02, -1.85000000e+01, -2.30000000e+01,
         1.65000000e+02,  1.97001204e+09],
       [ 1.41966000e+02, -1.86000000e+01, -2.40000000e+01,
         1.89000000e+02,  1.97001204e+09],
       [-1.05298000e+02, -1.84000000e+01, -2.40000000e+01,
         1.30000000e+02,  1.97001214e+09],
       [ 1.65370000e+02, -1.86000000e+01, -2.40000000e+01,
         8.10000000e+01,  1.97001264e+09],
       [-1.00484000e+02, -1.85000000e+01, -2.40000000e+01,
         9.00000000e+01

# 12

In [91]:
loaded_data = loaded_data * 2
loaded_data

array([[ 0.00000000e+00, -4.80000000e+01, -4.80000000e+01,
        -1.80000000e+01, -6.00000000e+00],
       [ 3.00916000e+02, -3.76000000e+01, -4.80000000e+01,
         1.50000000e+02,  3.94002127e+09],
       [ 3.55136000e+02, -3.78000000e+01, -4.60000000e+01,
         9.80000000e+01,  3.94002167e+09],
       [ 2.51474000e+02, -3.78000000e+01, -4.60000000e+01,
         1.62000000e+02,  3.94002207e+09],
       [-3.07320000e+02, -3.80000000e+01, -4.40000000e+01,
        -2.00000000e+01, -1.00000000e+01],
       [-3.56698000e+02, -3.70000000e+01, -4.60000000e+01,
         3.30000000e+02,  3.94002407e+09],
       [ 2.83932000e+02, -3.72000000e+01, -4.80000000e+01,
         3.78000000e+02,  3.94002407e+09],
       [-2.10596000e+02, -3.68000000e+01, -4.80000000e+01,
         2.60000000e+02,  3.94002427e+09],
       [ 3.30740000e+02, -3.72000000e+01, -4.80000000e+01,
         1.62000000e+02,  3.94002527e+09],
       [-2.00968000e+02, -3.70000000e+01, -4.80000000e+01,
         1.80000000e+02