In [80]:
import pandas as pd
import numpy as np

### Series Object

A series object in pandas is simply a one-dimensional array of indexed data.It means that each element has an index, allowing one to access them using values and the index attributes.

    Syntax: 
        pd.Series(data, index = index)

index argument is optional

In [81]:
s = pd.Series([1,3,4,6,7,12]) # creating a pandas series using a list
s

0     1
1     3
2     4
3     6
4     7
5    12
dtype: int64

In [82]:
# as you can see the result shows a sequence of indices and values.

s.values # getting the values only

array([ 1,  3,  4,  6,  7, 12], dtype=int64)

In [83]:
s.index # accessing the indices

RangeIndex(start=0, stop=6, step=1)

In [84]:
# we can also get the values as follows:
s[0]

1

In [85]:
s[3]

6

In [86]:
s[:5]

0    1
1    3
2    4
3    6
4    7
dtype: int64

### The difference between Pandas Series Object and One-Dimensional array

The only difference between a series object and a Numpy one-dimensional array is the indices.An important feature of the series object is that the indices can take any type of value and can be set up in any way.

In [87]:
s = pd.Series([1,4,5,7,8,11], index = [2,3,8,4,6,7]) # the indices are different in this case
s

2     1
3     4
8     5
4     7
6     8
7    11
dtype: int64

In [88]:
s = pd.Series([2,3,4,5,6,7], index = ['b', 'c', 's', 'd', 'n', 'e']) # the indices are strings
s

b    2
c    3
s    4
d    5
n    6
e    7
dtype: int64

In [89]:
s['b']

2

In [90]:
# Creating Pandas Object using a dictionary 

name_age_dict = {'Alex': 22, 'John':17, 'Samantha': 25}

s = pd.Series(name_age_dict)
s

Alex        22
John        17
Samantha    25
dtype: int64

In [91]:
s['Alex'] # Accessing values

22

In [92]:
s['Alex':'Samantha'] # Slicing

Alex        22
John        17
Samantha    25
dtype: int64

### Pandas DataFrame Object

A dateframe object is similar to a Numpy two-dimensional array with row indices and column names.

In [93]:
age = pd.Series([20, 23, 35, 54])
name = pd.Series(['Smith', 'Robin', 'Alain', 'Pierre'])

df = pd.DataFrame({'Age':age,'Name':name})
df

Unnamed: 0,Age,Name
0,20,Smith
1,23,Robin
2,35,Alain
3,54,Pierre


In [94]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [95]:
population_dict = {'California': 38332521,
'Texas': 26448193,
'New York': 19651127,
'Florida': 19552860,
'Illinois': 12882135}

population = population_dict

In [96]:
# exemple from the book python for data science handbook

area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
'Florida': 170312, 'Illinois': 149995}

area = area_dict


In [97]:
states = pd.DataFrame({'population': population,
'area': area})

states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [98]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [99]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [100]:
states['area'] # Calling a column name will return a Series of column data 

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

### Creating DataFrame

In [101]:
p = pd.Series(population) # A Series Object
p

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [102]:
type(p)

pandas.core.series.Series

In [103]:
pd.DataFrame(p) # Creating a DataFrame from a Series Object

Unnamed: 0,0
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [104]:
pd.DataFrame(p, columns=['population']) # adding a column name

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [105]:
a = pd.Series(area)
type(a)

pandas.core.series.Series

In [106]:
pd.DataFrame(a, columns=['area']) # creating a dataframe from a series object

Unnamed: 0,area
California,423967
Texas,695662
New York,141297
Florida,170312
Illinois,149995


In [107]:
# Creating a dataframe from a dictionary of series object

pd.DataFrame({'area': a, 'population':p})

Unnamed: 0,area,population
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [108]:
# Creating a DataFrame from a 2-D Numpy array

pd.DataFrame(np.random.rand(2,2), columns=['name','age'], index = ['@','!'])

Unnamed: 0,name,age
@,0.855261,0.349573
!,0.777303,0.797035


### Pandas Index Object

Index object is similar to an immutable array, meaning one cannot modify its values. 


In [113]:
index = pd.Index([1,2,2,3,4,6])

In [114]:
index[0]

1

In [117]:
a = index[::-1]
a

Int64Index([6, 4, 3, 2, 2, 1], dtype='int64')

In [120]:
index[0] = 1 # immutable cannot change the value at index 0

TypeError: Index does not support mutable operations

In [121]:
index_1 = pd.Index([2,2,3,4,5])
index_2 = pd.Index([2,5,6,7,4])

In [124]:
index_1 & index_2 # intersection

  index_1 & index_2


Int64Index([2, 4, 5], dtype='int64')

In [125]:
index_1 | index_2 # union

  index_1 | index_2


Int64Index([2, 2, 3, 4, 5, 6, 7], dtype='int64')

In [126]:
index_1 ^ index_2 # difference

  index_1 ^ index_2


Int64Index([3, 6, 7], dtype='int64')

### Data Selection in Series 

In [130]:
s_obj = pd.Series([1,2,3,4,4], index = ['a', 'b', 'c', 'd', 'e'])

In [131]:
s_obj

a    1
b    2
c    3
d    4
e    4
dtype: int64

In [132]:
s_obj['b'] # Selecting the value of index b

2

In [134]:
'a' in s_obj # is 'a' an index of s_obj?

True

In [135]:
2 in s_obj # is 2 an index of s_obj?

False

In [136]:
s_obj.keys() # selecting the indices of s_obj

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [140]:
# changing the content of a Series object

s_obj['d','e'] = 9
s_obj

a    1
b    2
c    3
d    9
e    9
dtype: int64

### Slicing Pandas Series


In [144]:
s_obj['a':'c'] # Selecting values from index a to c

a    1
b    2
c    3
dtype: int64

In [142]:
s_obj[:3] # alternative way 

a    1
b    2
c    3
dtype: int64

In [146]:
s_obj[['a','d']] # Selecting the values at index a and d only

a    1
d    9
dtype: int64

In [151]:
s_obj[(s_obj >0) & (s_obj>5)] # masking

d    9
e    9
dtype: int64

In [152]:
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data

1    a
3    b
5    c
dtype: object

In [158]:
data[3] # explicit index when indexing, your way of indexing

'b'

In [154]:
data[5]

'c'

In [156]:
data[1:3] # implicit index when indexing uses the python-style index, python way of slicing 

3    b
5    c
dtype: object

### Loc and iloc