# numpy and pandas for data science

Both numpy and pandas are core packages for doing data science in Python. And we will use them frequently in our algorithms because they process much faster than python lists. 

Key difference: Pandas provides powerful objects like DataFrames and Series which are very useful for working with and analyzing data, whereas numpy library provides objects for multi-dimensional arrays. 


## 1) Creating a 2D/3D array with numpy

Here we use demonstrate some simple operations in numpy through the following applications
* creating an array
* creating a simple MDP and randomly initiate the Q values
* calculating the softmax policy and the epsilon greedy policy

In [9]:
import numpy as np

def show(obj, title): 
    """ replacement for print() """
    if len(str(obj)) <= 20:
        print('\n',title+':',obj)
    else:
        print('\n',title+': \n',obj)

show(np.ones(10), '1D array with all ones')
show(np.zeros((10, 4)), '2D array with all zeros')


 1D array with all ones: 
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

 2D array with all zeros: 
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [10]:
## imagine we have 3 tiles in the maze and four actions

n_states = 3 
states = list(range(n_states))

actions = ['up','down','left','right']
n_actions = len(actions)

## initialize actions and states
pi = np.zeros((n_actions, )).astype(int)
Q_sa = np.zeros((n_states, n_actions)).astype(int)

## randomly assign value functions
for i in range(n_states):
    Q_sa[i, np.random.choice(n_actions)] = 1
    
show(pi, 'randomly initialized pi')
show(Q_sa, 'randomly initialized Q(s,a)')


 randomly initialized pi: [0 0 0 0]

 randomly initialized Q(s,a): 
 [[0 0 1 0]
 [0 1 0 0]
 [0 0 1 0]]


In [12]:
""" 
    softmax policy ensures all outputs are all between zero and one 
    by normalizing the candidates based on their exponential values.
    
    A higher temperature (tau) produces a softer probability distribution 
    over the choices, resulting in more exploration.
    
        softmax(x) = np.exp(x/tau) / sum(np.exp(x/tau))
"""
from scipy.special import softmax

tau = 2
pi_exploration = softmax(Q_sa/tau, axis = 1) 
show(pi_exploration, 'softmax policy with high temperature '+str(tau))

tau = 0.0001
pi_exploitation = softmax(Q_sa/tau, axis = 1) 
show(pi_exploitation, 'softmax policy with low temperature '+str(tau))


 softmax policy with high temperature 2: 
 [[0.21511292 0.21511292 0.35466124 0.21511292]
 [0.21511292 0.35466124 0.21511292 0.21511292]
 [0.21511292 0.21511292 0.35466124 0.21511292]]

 softmax policy with low temperature 0.0001: 
 [[0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]]


In [65]:
"""
    greedy policy: 
        choose the action that maximize the value function
"""

current_state = 0
a = actions[np.argmax(Q_sa[current_state, :])]
show(a,'action chosen using greedy policy')

"""
    epsilon-greedy policy: 
        with probability epsilon, choose actions randomly 
        otherwise, choose according to policy
"""

pi = pi_exploitation
epsilon = 0.3
if np.random.rand() <= epsilon:
    a = actions[np.random.choice(n_actions)]
else:
    prob = pi[current_state, :]
    index = np.random.choice(n_actions, p=prob)
    # index = np.argmax(prob)  ## use non-softmax policy
    a = actions[index]
    
show(a,'action chosen using '+str(epsilon)+'-greedy policy')


 action chosen using greedy policy: down

 action chosen using 0.3-greedy policy: down


In [9]:
import numpy as np
import random

def choose_action(listy):
    chosen = random.choice(np.argwhere(listy == np.amax(listy)).flatten().tolist())
#     print('\n        choose action', chosen, 'from', listy)
    return chosen

a = np.arange(3)
a[2] = 1
one = 0
for i in range(100):
    if choose_action(a) == 1:
        one += 1
print(one, 100-one)

42 58


In [6]:
## mean of only nonzero values
import numpy as np

matrix = np.eye(3)
print(matrix)
np.true_divide(matrix.sum(),(matrix!=0).sum())

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


1.0

In [8]:
np.ones(4) * 3

array([3., 3., 3., 3.])

In [14]:
## normalize
import numpy as np
from sklearn.preprocessing import normalize

x = np.random.rand(1000)*10
norm1 = x / np.linalg.norm(x)
norm2 = normalize(x[:,np.newaxis], axis=0).ravel()
print(np.all(norm1 == norm2))
# True

ModuleNotFoundError: No module named 'sklearn'

## 2) Creating a 2D array with pandas

In gridworld, we often use pandas for printing the table of expected reward or table of policies on the Turtle interface. 

In [28]:
import pandas as pd

width = 4
height = 2

df = pd.DataFrame("x", index=range(height), columns=range(width))
print(df)
show(df.shape,'shape of the dataframe')

   0  1  2  3
0  x  x  x  x
1  x  x  x  x

 shape of the dataframe: (2, 4)


In [32]:
## note that there are two ways of modifying a cell value
df = pd.DataFrame("x", index=range(height), columns=range(width))
df.loc[(0, 1)] = "/"
print(df)

print('\n', df.loc[(0, 1)], df[0][1],'\n')

df[0][1] = "-"
print(df)

   0  1  2  3
0  x  /  x  x
1  x  x  x  x

 / x 

   0  1  2  3
0  x  /  x  x
1  -  x  x  x


In [3]:
import pandas as pd
df2 = pd.DataFrame(columns=['player', 'Q(s,a)', 'Q(s)', 'Pi(s)'])
df2
df2.loc[1] = ['he2illfllp', 0.28, 0.2224, [0, 3000, 4]]
df2

Unnamed: 0,player,"Q(s,a)",Q(s),Pi(s)
0,hellp,0.2,0.4,"[0, 3, 4]"
1,he2illfllp,0.28,0.2224,"[0, 3000, 4]"


## 3) Creating a 2D array with python list

In [27]:
combination = [ (i,j) 
    for i in range(-1,1) 
    for j in range(-2,2)]
combination

[(-1, -2), (-1, -1), (-1, 0), (-1, 1), (0, -2), (0, -1), (0, 0), (0, 1)]

In [1]:
## map two lists

tiles = [12,13,14,15]
states = [0,1,2,3]

state = 2
tile = states[state]
print(tile)

tile = 15
state = tiles.index(tile)
print(state)

2
3


In [32]:
## create 1D Gaussian filter

from math import pi, sqrt, exp

def gauss(n=20):
    """ half of gaussian filter as the window """
    sigma = n
    r = range(-int(n), int(n) + 1)
    g = [1 / (sigma * sqrt(2 * pi)) * exp(-float(x) ** 2 / (2 * sigma ** 2)) for x in r][1:n + 1]
    return np.asarray([m/g[-1] for m in g])

g = gauss(40)
print(g)

[0.62169075 0.63683161 0.65193364 0.66697681 0.68194075 0.69680478
 0.71154793 0.72614904 0.74058675 0.7548396  0.76888605 0.78270454
 0.79627355 0.80957165 0.82257756 0.83527021 0.84762878 0.85963276
 0.87126204 0.8824969  0.89331814 0.90370708 0.91364563 0.92311635
 0.93210249 0.94058806 0.94855785 0.95599748 0.96289347 0.96923323
 0.97500518 0.98019867 0.98480414 0.98881304 0.99221794 0.99501248
 0.99719145 0.99875078 0.99968755 1.        ]


In [7]:
## list subtraction, compare the speed of two methods
from time import time
import random

def list_minus(li1, li2):
    return [i for i in li1 if i not in li2]

def rand_list(length):
    randomlist = []
    for i in range(length):
        randomlist.append(random.randint(1,length))
    return randomlist

for length in [1000, 10000]:
    li1 = rand_list(length)
    li2 = rand_list(length)

    timepoint = time()
    li3 = list_minus(li1, li2)
    print('method 1', time()-timepoint)
    
    timepoint = time()
    li4 = list(set(li1) - set(li2))
    print('method 2', time()-timepoint, len(set(li3))==len(li4))
    
#     li3.sort()
#     print(li3)
#     li4.sort()
#     print(li4)


method 1 0.010236024856567383
method 2 0.00017213821411132812 True
method 1 0.7945082187652588
method 2 0.0010712146759033203 True
