# In this session, we learn two ways of adding an index to a list: dictionaries and series 


## <font color="red"> DICTIONARY </font>

In [4]:
# A dictionary is a collection which is unordered, changeable and indexed. 
# In Python dictionaries are written with curly brackets, and they have keys and values.
myDict = {
  "brand": "Ford",
  "model": "Mustang",
  "year": 1964
}
print(myDict)

{'brand': 'Ford', 'model': 'Mustang', 'year': 1964}


In [6]:
# Notice that you MUST have a key-value pair to declare a dictionary
test = {1:1, 2:3, "a":4}
# Would the above work?

In [7]:
# To retrieve the value, just use the key: 
myDict["brand"]
# What is wrong? how to do it properly?

'Ford'

In [8]:
myDict['brand']

'Ford'

In [9]:
#Since the value could be anything, you could use a dictionary to store any object type. For example, a list:
test ={'list':[1,2,3],'string':'test','number': 5}
test

{'list': [1, 2, 3], 'string': 'test', 'number': 5}

In [10]:
#What would this code generate?
test['list'][1]

2

## <font color="red"> SERIES </font>

In [11]:
# A very similar data structure to dictionary is called Series -- and it is in the Pandas library
import numpy as np
import pandas as pd

In [12]:
# A Series can take in any type of list object and turn them into a Series
# That means it can work with a list, an array, or a dictionary
myList = ['a','b','c']
myArray = np.arange(1,4)
myDict ={1:1,2:2,3:3}
print(myList, myArray, myDict)

['a', 'b', 'c'] [1 2 3] {1: 1, 2: 2, 3: 3}


In [13]:
#We use pd.Series() to create a series. Use the hotkey to check out the parameters:
pd.Series

pandas.core.series.Series

In [14]:
# At the bare minimum, we just need a list to create a series:
pd.Series(data=myList)

0    a
1    b
2    c
dtype: object

In [15]:
# Notice that a key is added automatically for each value
# We can also skip the use of "data =":
pd.Series(myArray)

0    1
1    2
2    3
dtype: int64

In [17]:
#And a dictionary will give the series both value and key
pd.Series(myDict)

1    1
2    2
3    3
dtype: int64

In [18]:
# We can also define our own keys (indexes)
a = pd.Series(data=myArray,index=myList)
print(a)

a    1
b    2
c    3
dtype: int64


In [19]:
# And we can change the keys later
a.index = [5,6,7]
a

5    1
6    2
7    3
dtype: int64

In [28]:
# To retrieve value, we just need the key -- same as dictionary
a['a']

KeyError: 'a'

In [29]:
# loc vs. iloc
# There are two ways to locate a certain element in your series
# iloc locates the VALUE at the POSITION of index you specified. 
# In the following example, you print out the value at index position 2, which is the THIRD position in the series
print(a)
print(a.iloc[2])

5    1
6    2
7    3
dtype: int64
3


In [30]:
# The loc() function allows you to specify the VALUE of the index
print(a[6])
print(a.loc[6])

2
2


In [31]:
# Let's create another series:
b = pd.Series([3,5,6],myList)
print(b)

a    3
b    5
c    6
dtype: int64


In [32]:
# Series could be added together
a + b

5   NaN
6   NaN
7   NaN
a   NaN
b   NaN
c   NaN
dtype: float64

In [None]:
# What if there are different keys in the two Series? Why don't you try it out yourself!

In [33]:
#A neat feature for Series is the use of the data_range() function to create time series
dates = pd.date_range('20200101',periods=12)
print(dates)

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12'],
              dtype='datetime64[ns]', freq='D')


In [34]:
# As you can see from the above, the default frequency for the period is day (D). If you want month or year, you could do so.
# Notice that the date will be set to the last day of the month when you do month
dates = pd.date_range('20200101',periods=12, freq='M')
print(dates)

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='M')
