# Intro to Pandas

## Series

In [2]:
# basic imports
import numpy as np
from pandas import Series,DataFrame
import pandas as pd

### Create our first series

A Series is an array of data associated with data labels, its index 

In [84]:
#Lets create a Series 

obj = Series(data= [2,4,6,8])

#Show
obj

0    2
1    4
2    6
3    8
dtype: int64

In [7]:
# check the type
type(obj)

pandas.core.series.Series

In [85]:
#Lets show the values
obj.values

array([2, 4, 6, 8], dtype=int64)

In [86]:
#Lets show the index
obj.index

RangeIndex(start=0, stop=4, step=1)

### Now lets create a Series with an index

In [14]:
#Python developers 
py_devs = Series(data = [870, 430, 300, 210, 400],index=['USSR','Germany','China','Japan','USA'])

#Show
py_devs

USSR       870
Germany    430
China      300
Japan      210
USA        400
dtype: int64

In [15]:
py_devs.index

Index(['USSR', 'Germany', 'China', 'Japan', 'USA'], dtype='object')

In [16]:
#Now we can use index values to select Series values
py_devs['USA']

400

#### Can also check with array operations

In [17]:
#Check who has more than 400 professional python developers

py_devs[py_devs > 400]

USSR       870
Germany    430
dtype: int64

In [19]:
# This operation will gives you a boolean Series
py_devs > 400

USSR        True
Germany     True
China      False
Japan      False
USA        False
dtype: bool

#### Can treat Series as ordered dictionary

In [21]:
#Check if USSR is in Series
'USSR' in py_devs

True

#### Can convert Series into Python dictionary

In [23]:
# We use the "to_dict()" method
devs_dict = py_devs.to_dict()

#Show
devs_dict

{'USSR': 870, 'Germany': 430, 'China': 300, 'Japan': 210, 'USA': 400}

#### Can convert back into a Series

In [27]:
# Back to series use the "Series" method
devs_series = Series(data= devs_dict)

# Show
devs_series

USSR       870
Germany    430
China      300
Japan      210
USA        400
dtype: int64

#### Passing a dictionary the index will have the dict keys in order

In [55]:
# Here we set a new index

countries = ['China','Germany','Japan','USA','USSR','Argentina']

In [56]:
#Lets redefine a Series

obj2 = Series(data= devs_dict, index= countries)

In [57]:
#Show
obj2

China        300.0
Germany      430.0
Japan        210.0
USA          400.0
USSR         870.0
Argentina      NaN
dtype: float64

#### We can use isnull and notnull to find missing data

We have two options

In [58]:
# First option

pd.isnull(obj2)

China        False
Germany      False
Japan        False
USA          False
USSR         False
Argentina     True
dtype: bool

In [59]:
# Second option 
obj2.isnull() 

China        False
Germany      False
Japan        False
USA          False
USSR         False
Argentina     True
dtype: bool

#### Same for the opposite, notnull

We have two options

In [60]:
# First option

pd.notnull(obj2)

China         True
Germany       True
Japan         True
USA           True
USSR          True
Argentina    False
dtype: bool

In [61]:
# Second option

obj2.notnull()

China         True
Germany       True
Japan         True
USA           True
USSR          True
Argentina    False
dtype: bool

In [63]:
#Lets see the py_devs Series again

py_devs

USSR       870
Germany    430
China      300
Japan      210
USA        400
dtype: int64

In [64]:
#Lets check our Series with Argentine again

obj2

China        300.0
Germany      430.0
Japan        210.0
USA          400.0
USSR         870.0
Argentina      NaN
dtype: float64

#### More operations on Series

In [79]:
#Now we can add the two series and pandas automatically aligns data by index

py_devs + obj2 

Argentina       NaN
China         600.0
Germany       860.0
Japan         420.0
USA           800.0
USSR         1740.0
dtype: float64

In [80]:
#We can give Series names
obj2.name = "Python Developers by Countries"

In [81]:
#Show
obj2

COUNTRIES
China        300.0
Germany      430.0
Japan        210.0
USA          400.0
USSR         870.0
Argentina      NaN
Name: Python Developers by Countries, dtype: float64

In [82]:
#We can also name index
obj2.index.name = 'COUNTRIES'

In [83]:
#Show
obj2

COUNTRIES
China        300.0
Germany      430.0
Japan        210.0
USA          400.0
USSR         870.0
Argentina      NaN
Name: Python Developers by Countries, dtype: float64

### We will see the Series many times during this session, but now...

## Let's do some exercise!