# Python - Lists and dictionaries

In [42]:
list = ["Save","The","Pandas"]
dict = {"w1":"Save","w2":"The","w3":"Pandas"}

In [64]:
list[2]

'PANDAS'

In [70]:
dict["w3"]

'Pandas'

In [69]:
dict.keys()

['w3', 'w2', 'w1']

In [75]:
dict.values()

['Pandas', 'The', 'Save']

In [78]:
dict.values()[2]

'Save'

In [62]:
for element in list:
    print element

SAVE
THE
PANDAS


In [80]:
# Be careful: list not ordered
for element in dict:
    print element

w3
w2
w1


In [79]:
for element in dict:
    print dict[element]

Pandas
The
Save


### Function: enumerate

In [36]:
for item in enumerate(list):
    print item

(0, 'Save')
(1, 'The')
(2, 'Pandas')


In [23]:
for item in enumerate(list):
    print item[0]

0
1
2


In [24]:
for item in enumerate(list):
    print item[1]

Save
The
Pandas


In [34]:
for item in enumerate(dict):
    print item[0]

0
1
2


In [35]:
for item in enumerate(dict):
    print item[1]

w3
w2
w1


### Trick: modifying all elements of a list

In [44]:
for i,item in enumerate(list):
    list[i]=item.upper()
list

['SAVE', 'THE', 'PANDAS']

### Creating lists

In [56]:
list1 = range(10)
list1

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [58]:
list1 = range(1,10)
list1

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [61]:
list1 = range(0,10,3)
list1

[0, 3, 6, 9]

In [51]:
list2 = [item for item in range(10)]
list2

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [52]:
list3 = [item for item in range(10) if item % 3 ==0]
list3

[0, 3, 6, 9]

In [55]:
list4 = [item**2 for item in range(1,10,2)]
list4

[1, 9, 25, 49, 81]

# Numpy - Arrays

In [81]:
import numpy as np

In [88]:
array1 = np.array([0,1,2,3,4])
array1

array([0, 1, 2, 3, 4])

In [90]:
array2 = np.arange(5)
array2

array([0, 1, 2, 3, 4])

In [92]:
array3 = np.arange(1,5)
array3

array([1, 2, 3, 4])

In [95]:
array4 = np.arange(0,10,3)
array4

array([0, 3, 6, 9])

In [98]:
# Arrays are faster than lists
%time list0 = range(100000)
%time array0 = np.arange(100000)

CPU times: user 416 µs, sys: 766 µs, total: 1.18 ms
Wall time: 3.22 ms
CPU times: user 0 ns, sys: 1.07 ms, total: 1.07 ms
Wall time: 370 µs


# Pandas - Series

In [99]:
import pandas as pd
from pandas import Series

In [101]:
serie1 = Series([1,2,3])
print serie1

In [108]:
list2 = ["Save","The","Pandas"]
serie2 = Series(list2)
print serie2

0      Save
1       The
2    Pandas
dtype: object


In [179]:
# Output is an array
serie2.values

array([ 6, 10, 14, 18, 22, 26])

In [116]:
serie2.values[2]

'Pandas'

In [134]:
serie2.index

Int64Index([0, 1, 2], dtype='int64')

In [111]:
serie2.index.values

array([0, 1, 2])

In [135]:
serie2.index[2]

2

In [126]:
serie3 = Series(range(5))
serie3

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [125]:
serie4 = Series(range(5),index=[1,2,3,4,5])
serie4

1    0
2    1
3    2
4    3
5    4
dtype: int64

In [131]:
serie3 + serie4

0   NaN
1     1
2     3
3     5
4     7
5   NaN
dtype: float64

In [132]:
# Be careful: indexes with duplicates
serie5 = Series(range(5),index=[1,2,2,4,5])
# Output is a array
serie5[2]

2    1
2    2
dtype: int64

In [139]:
serie6 = Series(range(1,4),index=["num1","num2","num3"])
serie6

num1    1
num2    2
num3    3
dtype: int64

# Pandas - Dataframes

In [136]:
from pandas import DataFrame

In [162]:
serie1 = Series(range(3,14,2))
serie2 = serie1*2

In [237]:
df0 = DataFrame([serie1,serie2])
df0

Unnamed: 0,0,1,2,3,4,5
0,3,5,7,9,11,13
1,6,10,14,18,22,26


In [164]:
df1 = DataFrame({"col1":serie1,"col2":serie2})
df1

Unnamed: 0,col1,col2
0,3,6
1,5,10
2,7,14
3,9,18
4,11,22
5,13,26


In [169]:
df1.index

Int64Index([0, 1, 2, 3, 4, 5], dtype='int64')

In [170]:
df1.columns

Index([u'col1', u'col2'], dtype='object')

In [166]:
df1.values

array([[ 3,  6],
       [ 5, 10],
       [ 7, 14],
       [ 9, 18],
       [11, 22],
       [13, 26]])

In [178]:
# Output is a Serie
df1["col1"]

0     3
1     5
2     7
3     9
4    11
5    13
Name: col1, dtype: int64

In [200]:
df1['col3'] = 1
df1['col4'] = range(6)
df1['col5'] = np.arange(5,-1,-1)
df1

Unnamed: 0,col1,col2,col3,col4,col5
0,3,6,1,0,5
1,5,10,1,1,4
2,7,14,1,2,3
3,9,18,1,3,2
4,11,22,1,4,1
5,13,26,1,5,0


### Null values

In [205]:
df1['col5'][3] = np.nan
df1

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,col1,col2,col3,col4,col5
0,3,6,1,0,5.0
1,5,10,1,1,4.0
2,7,14,1,2,3.0
3,9,18,1,3,
4,11,22,1,4,1.0
5,13,26,1,5,0.0


In [234]:
# To delete lines with nulls. It does not change dataframe
df1.dropna()

Unnamed: 0,col1,col2,col3,col4,col5
0,3,6,1,0,5
1,5,10,1,1,4
2,7,14,1,2,3
4,11,22,1,4,1
5,13,26,1,5,0


In [235]:
df1

Unnamed: 0,col1,col2,col3,col4,col5
0,3,6,1,0,5.0
1,5,10,1,1,4.0
2,7,14,1,2,3.0
3,9,18,1,3,
4,11,22,1,4,1.0
5,13,26,1,5,0.0
