# 1. Getting Started

## Native Python for science

In [1]:
# Importing function
from collections import Counter

In [3]:
# List in Python
marriage_ages = [22, 22, 25, 25, 30, 24, 26, 24, 35] # create a list
value_counts = Counter(marriage_ages) # apply the counter function
print(value_counts.most_common())

[(22, 2), (25, 2), (24, 2), (30, 1), (26, 1), (35, 1)]


**Functions in Python**

In [5]:
def add_two_numbers(x, y):  # function header
    
    z = x + y
    return z  # function return
print(add_two_numbers(100,5))  # function  call

105


In [None]:
# nergvi functs
y = lambda x, y: x + y  # an anonymous function which takes x and y and input and returns x+y
print(y(100,5))  # call the function

# 2. Data Structures

## 2.1 Lists in Python

In [1]:
# This creates the list
depths = [1, 5, 3, 6, 4, 7, 10, 12]

# This outputs the first 5 elements. No number before the : implies 0
first_5_depths = depths[:5]

print("---0---")
print(first_5_depths)

# You can easily sum
print("---1---")
print(sum(depths))

# And take the max
print("---2---")
print(max(depths))

# Slicing with a negative starts from the end, so this returns the last element
print("---3---")
print(depths[-1])

# This returns the end of the list starting from the second to the end
# Nothing after the : implies the end of the list
print("---4---")
print(depths[-2:])

# This returns the second, third, and forth elements
# Remember counting starts at zero!
print("---5---")
print(depths[2:5])

# These commands check if a value is contained in the list
print("---6---")
print(22 in depths)
print(1 in depths)

# This is how you add another value to the end of your list
depths.append(44)
print("---7---")
print(depths)

# You can extend a list with another list
depths.extend([100, 200])
print("---8---")
print(depths)

# You can also modify a value
# This replaces the 4th value with 100
depths[4] = 100
print("---9---")
print(depths)

# Or you can do insert to accomplish the same thing
depths.insert(5, 1000)
print("---10---")
print(depths)

---0---
[1, 5, 3, 6, 4]
---1---
48
---2---
12
---3---
12
---4---
[10, 12]
---5---
[3, 6, 4]
---6---
False
True
---7---
[1, 5, 3, 6, 4, 7, 10, 12, 44]
---8---
[1, 5, 3, 6, 4, 7, 10, 12, 44, 100, 200]
---9---
[1, 5, 3, 6, 100, 7, 10, 12, 44, 100, 200]
---10---
[1, 5, 3, 6, 100, 1000, 7, 10, 12, 44, 100, 200]


## 2.2 Dictionaries in Python

In [2]:
# Initialize the dictionary.
# Keys are first then a : then the value
my_dict = {"age": 22, "birth_year": 1999, "name": "jack", "siblings": ["jill", "jen"]}

# Get the value for the key age
print("---0---")
print(my_dict['age'])

# Check is age is a key
print("---1---")
print('age' in my_dict)

# Check is company is a key
print("---2---")
print('company' in my_dict)

# Get the value for they key age
print("---3---")
print(my_dict.get('age'))

# Get the value for they key company
# If it doesn't exsist, return 1
print("---4---")
print(my_dict.get('company', 1))

# Return all the keys
print("---5---")
print(my_dict.keys())

# Return all the values
print("---6---")
print(my_dict.values())

# Return all the key, value pairs
print("---7---")
print(my_dict.items())

---0---
22
---1---
True
---2---
False
---3---
22
---4---
1
---5---
dict_keys(['age', 'birth_year', 'name', 'siblings'])
---6---
dict_values([22, 1999, 'jack', ['jill', 'jen']])
---7---
dict_items([('age', 22), ('birth_year', 1999), ('name', 'jack'), ('siblings', ['jill', 'jen'])])


In [3]:
from collections import defaultdict  # import defaultdict class
my_default_dict = defaultdict(int)   # make a default dictionary
my_default_dict['age'] = 22          # adding a key-value pair
print(my_default_dict['company'])    # printing the value of the key "company"

0


## 2.3 Sets in Python

In [4]:
my_set = set()
my_set.add(1)
my_set.add(2)
my_set.add(1)
# Note that the set only contains a single 1 value
print("---0---")
print(my_set)

my_set2 = set()
my_set2.add(1)
my_set2.add(2)
my_set2.add(3)
my_set2.add(4)
print("---1---")
print(my_set2)

# Prints the overlap
print("---1---")
print(my_set.intersection(my_set2))
print("---2---")

# Prints the combination
print(my_set.union(my_set2))

# Prints the difference (those in my_set but not my_set2)
print("---3---")
print(my_set.difference(my_set2))

---0---
{1, 2}
---1---
{1, 2, 3, 4}
---1---
{1, 2}
---2---
{1, 2, 3, 4}
---3---
set()


# 3 Control Flow and Build-in Functions

**The for construct**

In [5]:
names = ['tyler', 'karen', 'jill']   # list containing names

for i, name in enumerate(names):     # iterating over names
    print("Index: {0}".format(i))    # printing index number
    print("Value: {0}".format(name)) # print the value at the index

Index: 0
Value: tyler
Index: 1
Value: karen
Index: 2
Value: jill


**The sort function**

In [6]:
my_list = [2, 10, 1, -5, 22]
my_list.sort()  # sorting the list

print(my_list)

[-5, 1, 2, 10, 22]


In [7]:
my_list = [2, 10, 1, -5, 22]

# Sorted reversely on basis of absolute value
my_list_sorted_abs = sorted(my_list, key=abs, reverse=True)

print(my_list_sorted_abs)

[22, 10, -5, 2, 1]


**The zip function**

In [8]:
list_1 = [1, 2, 3]  # create your first list
list_2 = ['x', 'y', 'z']  # create your second list

print(list(zip(list_1, list_2)))  #combine and print

[(1, 'x'), (2, 'y'), (3, 'z')]


In [9]:
pairs = [('x', 1), ('y', 2), ('z', 3)]  # a list of tuples
letters, numbers = zip(*pairs)  # break into two lists

print(letters)  # print the first values of the tuples
print(numbers)  # print the second values of the tuples

('x', 'y', 'z')
(1, 2, 3)


# 4 Numpy an External Library

## Handling arrays with NumPy

In [12]:
# Single dimensional arrays
import numpy as np

# This creates our array
np_array = np.array([5, 10, 15, 20, 25, 30])
print("--0--")

# Gets the unique values
print(np.unique(np_array))
print("--1--")

# Calculates the standard deviation
print(np.std(np_array))
print("--2--")

# Calculates the maximum
print(np_array.max())
print("--3--")

# Squares each value in the array
print(np_array ** 2)
print("--4--")

# Adds the arrays together element wise
print(np_array + np_array)
print("--5--")

# The sum of the squares of the elements
print(np.sum(np_array ** 2))
print("--6--")

# Gives you the shape: (rows, columns)
print(np_array.shape)

--0--
[ 5 10 15 20 25 30]
--1--
8.539125638299666
--2--
30
--3--
[ 25 100 225 400 625 900]
--4--
[10 20 30 40 50 60]
--5--
2275
--6--
(6,)


In [13]:
# Two dimensional arrays
import numpy as np

# Create 2d array
print("--0--")
np_2d_array = np.array([[1,2,3], 
                        [4,5,6]])
print(np_2d_array)

# Calculate the transpose, which is when you swap the columns and rows.
print("--1--")
np_2d_array_T = np_2d_array.T
print(np_2d_array_T)

# Print the shape of the array as (number of rows, number of columns)
print("--3--")
print(np_2d_array.shape)

# Access elements in the 2d array by index. 
# First index is the row number
# Second index is the column number
# Index numbers start from 0
print("--4--")
print(np_2d_array[1,1])
print(np_2d_array[0,2])

--0--
[[1 2 3]
 [4 5 6]]
--1--
[[1 4]
 [2 5]
 [3 6]]
--3--
(2, 3)
--4--
5
3


## Important functionalities

In [14]:
# Calculating dot product
import numpy as np
np_array = np.array([5, 10, 15, 20, 25, 30])
dot_product = np.dot(np_array, np_array)
print(dot_product)

2275


In [16]:
# Generating random values
import numpy as np

# Generage a single random number in range [0,1)
print("--0--")
print(np.random.rand())

# Generate a matrix of random numbers in range [0,1) with shape (3,2)
print("--1--")
print(np.random.rand(3,2))

--0--
0.5329375801454256
--1--
[[0.56334125 0.44569458]
 [0.50322051 0.85942807]
 [0.30556731 0.99125287]]


In [17]:
import numpy as np

# Low=5, High=15, Size=2. Generate 2 values between 5 and 15 (exclusive)
print("--0--")
print(np.random.randint(5, 15, 2))

# Low=5, High=15, Size=(3,2). Generate a matrix of shape (3,2) with values between 5 and 15 (exclusive)
print("--1--")
print(np.random.randint(5, 15, (3,2)))

--0--
[10 11]
--1--
[[ 8 11]
 [14  9]
 [11  8]]


In [18]:
# Sampling the data
import numpy as np
array = np.array([1,2,3,4,5])

# Sample 10 data points with replacement. 
print("--0--")
print(np.random.choice(array, 10, replace=True))

# Sample 3 data points without replacement. 
print("--1--")
print(np.random.choice(array, 3, replace=False))

--0--
[5 4 5 1 4 2 1 3 4 1]
--1--
[1 3 4]


## Randomly shuffling value

In [19]:
import numpy as np

x = [1,2,3,4,5]  # Create a list of 5 elements
np.random.shuffle(x)  # Randomly shuffle the order of the elements in the list

print(x)

[3, 5, 2, 4, 1]


## [Scipy](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) an External Library

In [21]:
from scipy import stats
import numpy as np

array_1 = np.array([1,2,3,4,5,6])  # Create a numpy array from a list
array_2 = array_1  # Create another array with the same values

print(stats.pearsonr(array_1, array_2))  # Calculate the correlation which will be 1 since the values are the same 

(0.9999999999999999, 1.8488927466117464e-32)


## Generating samples from [distributions](https://docs.scipy.org/doc/scipy/reference/stats.html)

In [28]:
# normal distribution
from scipy import stats

x = stats.norm.rvs(loc=0, scale=10, size=10)  # Generate 10 values randomly sampled from a normal distribution with mean 0 and standard deviation of 10

print(x)

[ 14.65648769  -2.257763     0.67528205 -14.24748186  -5.44382725
   1.1092259  -11.50993577   3.75698018  -6.0063869   -2.9169375 ]


In [31]:
# Probability density function
from scipy import stats

p1 = stats.norm.pdf(x=-100, loc=0, scale=10)  # Get probability of sampling a value of -100
p2 = stats.norm.pdf(x=0, loc=0, scale=10)     # Get probability of sampling a value of 0

print(p1)
print(p2)

7.69459862670642e-24
0.03989422804014327


In [30]:
# Cumulative distribution function
from scipy import stats

p1 = stats.norm.cdf(x=0, loc=0, scale=10)  # Get probability of sampling a value less than or equal to 0

print(p1)

0.5


In [29]:
# Calculating descriptive statistics
from scipy import stats

# Calculate descriptive statistics for 500 data points sampled from normal distribution 
# with mean 0 and standard deviation of 1
print(stats.describe(stats.norm.rvs(loc=0, scale=1, size=500))) 

DescribeResult(nobs=500, minmax=(-3.2412673400690726, 3.852731490654721), mean=0.0026059282177155795, variance=0.9619747022730771, skewness=0.1862516424103553, kurtosis=0.2598514464267856)


# 5 Exercise: Using NumPy and Scipy

## Numpy: Create a function which takes that numpy 1-D array as input and returns the following (in the same order as listed):

MAX, STD, SUM, DOT PRODUCT

In [38]:
import numpy as np 

def perform_calculations(array): 

    return np.max(array), np.std(array), np.sum(array), np.dot(array, array)

print(perform_calculations(np.random.rand(5))) 

(0.7952673118598902, 0.251953356731858, 1.4645887493306218, 0.7464065107754213)


## Scipy: Create a function that takes in two numpy 1-D arrays and returns the correlation and p-value as a tuple.

In [37]:
from scipy import stats  
import numpy as np       

def correlation(array1, array2):

  return stats.pearsonr(array1, array2)

print(correlation(np.random.rand(5),np.random.rand(5))) 

(-0.4450601318408434, 0.4526387365590591)
