# Jupyter Notebooks Intro

Jupyter notebook provides easy interactive environment for running python code

Let's try a simple equation

In [2]:
a = 5
b = 6
c = a + b
c

11

In [3]:
km = 100
miles = km / 1.6
miles

62.5

Now set it up as a simple function

In [4]:
def milesfromkm(km):
    return km/1.6

milesfromkm(100)

62.5

list is enclosed in [ ]

In [5]:
l1 = [1,2,3,4,5,6,7]
l1

[1, 2, 3, 4, 5, 6, 7]

access to list elements - zero indexed

In [6]:
print(l1[1])
print(l1[2])
print(l1[-1])
print(l1[2:4])
print(l1[4:])
print(l1[:3])

2
3
7
[3, 4]
[5, 6, 7]
[1, 2, 3]


In [7]:
print(len(l1))

7


simple code loop

In [8]:
for i in range(len(l1)):
    print(l1[i])

1
2
3
4
5
6
7


Multi dimensional list is entered as list of lists

In [9]:
l2 = [ [1,2,3], [2,3,4], [3,4,5], [4,5,6] ]

In [10]:
l2

[[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6]]

And indexed similarly

In [11]:
l2[2]

[3, 4, 5]

In [12]:
l2[2][0:2]

[3, 4]

Import libraries to extend the language and help with some basic vector and table manipulation

In [13]:
import numpy as np
import pandas as pd
from datetime import datetime

convert the list to an array, then do some basic array manipulation

In [14]:
a = np.array(l1)
a

array([1, 2, 3, 4, 5, 6, 7])

In [15]:
a+1

array([2, 3, 4, 5, 6, 7, 8])

In [16]:
a/2

array([0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5])

In [17]:
a.sum()

28

In [18]:
b = a/2

In [19]:
a+b

array([ 1.5,  3. ,  4.5,  6. ,  7.5,  9. , 10.5])

numpy is optimised to process aray data *very* quickly

Generate two large random arrays

In [20]:
big1 = np.random.randn(2000000)
big2 = np.random.randn(2000000)

In [21]:
len(big1)

2000000

In [22]:
big1

array([ 0.34372319, -1.13321849,  0.79811629, ..., -0.9066665 ,
        0.12669543, -0.41521569])

Now lets use a simple loop to get the dot product of these arrays

In [23]:
startedat = datetime.now()
total = 0
for i in range(len(big1)):
    total = total + big1[i] * big2[i]
print(total)
print(datetime.now() - startedat)

-1705.3161554593648
0:00:01.235643


same calculation can be done in one line, and is quicker

In [24]:
startedat = datetime.now()
total = np.dot(big1, big2)
print(total)
print(datetime.now() - startedat)

-1705.3161554593967
0:00:00.003968


Now convert it to a dataframe

In [34]:
df = pd.DataFrame(data=np.stack((big1,big2),axis=1), columns=(['List1','List2']))
df

Unnamed: 0,List1,List2
0,0.343723,-0.874714
1,-1.133218,-0.341259
2,0.798116,0.469656
3,1.468519,1.232541
4,-1.540071,0.248459
...,...,...
1999995,-0.932696,0.470136
1999996,0.045753,0.844823
1999997,-0.906667,-1.956883
1999998,0.126695,0.018887


Query the dataframe

In [40]:
df[df['List1']>3.5]

Unnamed: 0,List1,List2
266,3.580790,-1.479955
10220,3.647644,0.340492
18381,3.531267,-0.153776
23577,3.502322,0.040546
25723,3.832471,0.527205
...,...,...
1981642,4.257557,-0.110050
1981848,3.803146,-0.164788
1982134,3.606763,1.126351
1984566,3.623961,0.200475


generate matrices and manipulate them

In [96]:
mat1 = np.array([[1,2,3],[2,3,4]])
mat2 = np.array([[1,2],[2,3],[3,4]])

In [97]:
print(mat1.shape)
print(mat2.shape)

(2, 3)
(3, 2)


In [98]:
mat1

array([[1, 2, 3],
       [2, 3, 4]])

In [99]:
mat2

array([[1, 2],
       [2, 3],
       [3, 4]])

In [100]:
np.dot(mat1, mat2)

array([[14, 20],
       [20, 29]])

In [101]:
mat1 @ mat2

array([[14, 20],
       [20, 29]])