# Pandas Data types

In [1]:
import pandas as pd

In [2]:
ser = pd.Series(range(10))

In [3]:
ser

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [4]:
ser2 = pd.Series([1.1, 2, 3, 4])

In [5]:
ser2

0    1.1
1    2.0
2    3.0
3    4.0
dtype: float64

In [6]:
ser3 = pd.Series(['a', 'b', 'c'])

In [7]:
ser3

0    a
1    b
2    c
dtype: object

In [8]:
ser4 = pd.Series([{}, [], (2, 3)])

In [9]:
ser4

0        {}
1        []
2    (2, 3)
dtype: object

In [10]:
ser5 = pd.Series(['2017-01-01', '2018-01-01'])

In [11]:
ser5

0    2017-01-01
1    2018-01-01
dtype: object

In [12]:
pd.to_datetime(ser5)

0   2017-01-01
1   2018-01-01
dtype: datetime64[ns]

In [13]:
ser6 = pd.Series(['Type A', 'Type B'], dtype='category')

In [14]:
ser6

0    Type A
1    Type B
dtype: category
Categories (2, object): [Type A, Type B]

## Temps again!

In [15]:
temp = [30, 45, 60, 90]
s = pd.Series(temp)

In [16]:
s

0    30
1    45
2    60
3    90
dtype: int64

In [17]:
for number in s:
    print(number)

30
45
60
90


In [18]:
for num in temp:
    print(num)

30
45
60
90


In [19]:
45 in temp

True

In [20]:
45 in s #   !!!

False

In [21]:
1 in s #   !!!

True

In [22]:
4 in s 

False

In [23]:
45 in s.values

True

In [24]:
45 in set(s)

True

In [25]:
set(s)

{30, 45, 60, 90}

In [26]:
dict(s)

{0: 30, 1: 45, 2: 60, 3: 90}

In [27]:
0 in dict(s)

True

pandas Series look like dictionary!

In [28]:
for i, val in s.iteritems(): # i is the index!
    print(i, val)

0 30
1 45
2 60
3 90


In [29]:
# we had this in dict too.

In [30]:
for i,val in dict(s).items():
    print(i, val)

0 30
1 45
2 60
3 90


## Broadcasting

In [31]:
s

0    30
1    45
2    60
3    90
dtype: int64

In [32]:
s + 2

0    32
1    47
2    62
3    92
dtype: int64

In [33]:
s + s

0     60
1     90
2    120
3    180
dtype: int64

In [34]:
s == 45

0    False
1     True
2    False
3    False
dtype: bool

In [36]:
temp + 2

TypeError: can only concatenate list (not "int") to list

In [37]:
temp*2

[30, 45, 60, 90, 30, 45, 60, 90]

In [38]:
s2 = pd.Series([10, 20, 30], index=[2, 3, 4])

In [39]:
s + s2

0      NaN
1      NaN
2     70.0
3    110.0
4      NaN
dtype: float64

In [40]:
s * s2

0       NaN
1       NaN
2     600.0
3    1800.0
4       NaN
dtype: float64

In [41]:
def add_2(val):
    return val+2

In [42]:
s.apply(add_2)

0    32
1    47
2    62
3    92
dtype: int64

In [43]:
s.apply(float)

0    30.0
1    45.0
2    60.0
3    90.0
dtype: float64

In [44]:
s.astype(float)

0    30.0
1    45.0
2    60.0
3    90.0
dtype: float64

## CRUD

In [45]:
s[0]

30

In [47]:
s[-1] # !!!

KeyError: -1

In [48]:
s.loc[0] # label of index! (Because of integer index)

30

In [49]:
s.iloc[-1]

90

In [50]:
temp2 = pd.Series(temp, ['M', 'T', 'W', 'Th'])

In [51]:
temp2

M     30
T     45
W     60
Th    90
dtype: int64

In [52]:
temp2['M']

30

In [53]:
temp2[0] # location and position work in this situation! (Because of non-integer index)

30

In [54]:
temp2.loc['M']

30

In [56]:
temp2.loc[0] # just label

TypeError: cannot do label indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [0] of <class 'int'>

In [57]:
temp2.iloc[0] # works on position!

30

In [59]:
temp3 = pd.Series(temp, index=['M', 'T', 0, 1])

In [60]:
temp3[0]

60

In [61]:
temp3['M']

30

## Update

In [62]:
temp2['M'] = 21

In [63]:
temp2

M     21
T     45
W     60
Th    90
dtype: int64

In [64]:
temp2.iloc[-1] = 100

In [65]:
temp2

M      21
T      45
W      60
Th    100
dtype: int64

In [68]:
temp2.append(pd.Series([110], index=['F'])) # its extend ! not inplace also!

M      21
T      45
W      60
Th    100
F     110
dtype: int64

In [69]:
temp2

M      21
T      45
W      60
Th    100
dtype: int64

In [72]:
temp2.set_value('M', 4578) # updates inplace and returns !

M     4578
T       45
W       60
Th     100
dtype: int64

In [73]:
temp2

M     4578
T       45
W       60
Th     100
dtype: int64

In [74]:
temp2.set_value('Sa', 213) # append if does not exists!

M     4578
T       45
W       60
Th     100
Sa     213
dtype: int64

## Delete

In [75]:
del temp2['M']

In [76]:
temp2

T      45
W      60
Th    100
Sa    213
dtype: int64

## Summary Statistics

In [77]:
temp2.mean()

104.5

In [78]:
temp2.median()

80.0

In [80]:
temp2.mode()

0     45
1     60
2    100
3    213
dtype: int64

In [81]:
temp2.describe()

count      4.000000
mean     104.500000
std       75.967098
min       45.000000
25%       56.250000
50%       80.000000
75%      128.250000
max      213.000000
dtype: float64

In [83]:
temp2.value_counts() # more useful for categorical data

45     1
60     1
100    1
213    1
dtype: int64