In [1]:
'''
Here, we learn the:
- Series
- DataFrame
- Index
'''


In [2]:
import numpy as np
import pandas as pd

In [3]:
# Pandas Series is an array
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [5]:
data[2]

0.75

In [6]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [14]:
data[:-2]

0    0.25
1    0.50
dtype: float64

In [15]:
# Index in strings

In [17]:
data = pd.Series([0.25, 0.5, 0.75, 1],
                index = ['a', 'b', 'c', 'd'])

In [18]:
data[:]

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [19]:
data['b']

0.5

In [26]:
# Python dictionary can get too compute heavy, so we can make use of 
# pandas Series to make this efficient

population_dict = {
    'California': 5,
    'Texas'     : 4,
    'New York'  : 3,
    'Florida'   : 2,
    'Illinois'  : 1
}

population = pd.Series(population_dict)
population

California    5
Texas         4
New York      3
Florida       2
Illinois      1
dtype: int64

In [25]:
population['Texas':'Illinois']

Texas       4
New York    3
Florida     2
Illinois    1
dtype: int64

In [30]:
# If the indexes outnumber values then the values will catch up to the index
pd.Series(3,
         index=[100, 200, 30])

100    3
200    3
30     3
dtype: int64

In [33]:
pd.Series({2:'a', 1:'b', 3:'c'}).sort_index()

1    b
2    a
3    c
dtype: object

In [34]:
pd.Series({1: 'a', 2: 'b', 3: 'c'},
         index=[2,1])

2    b
1    a
dtype: object

In [60]:
Area = {
    'A' : 50,
    'B' : 40,
    'C' : 30,
    'D' : 20,
    'E' : 10
}

population = {
    'A' : 550,
    'B' : 440,
    'C' : 330,
    'D' : 220,
    'E' : 110
}

area = pd.Series(Area)
area_pop = pd.Series(population)
area

A    50
B    40
C    30
D    20
E    10
dtype: int64

In [61]:
area_pop

A    550
B    440
C    330
D    220
E    110
dtype: int64

In [62]:
# We can combine the area and the area_pop in one DataFrame

states = pd.DataFrame({
    'population' : area_pop,
    'area' : area
})

states

Unnamed: 0,population,area
A,550,50
B,440,40
C,330,30
D,220,20
E,110,10


In [63]:
states.index

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [64]:
states.columns

Index(['population', 'area'], dtype='object')

In [65]:
states['population']

A    550
B    440
C    330
D    220
E    110
Name: population, dtype: int64

In [68]:
# Just like the series, the DataFrames can be constructed in other fashions too
populationA = {
    'A' : 550,
    'B' : 440,
    'C' : 330,
    'D' : 220,
    'E' : 110
}
pd.DataFrame(area_pop, columns=['population'])

Unnamed: 0,population
A,550
B,440
C,330
D,220
E,110


In [79]:
data = [
    {'a' : i,
     'b' : 2*i}
    
    for i in range(100,103)
]

data

[{'a': 100, 'b': 200}, {'a': 101, 'b': 202}, {'a': 102, 'b': 204}]

In [80]:
pd.DataFrame(data)

Unnamed: 0,a,b
0,100,200
1,101,202
2,102,204


In [81]:
data_nn = ({
    'a' : 100,
    'b' : 200
},
{
    'c' : 300,
    'b' : 450
})

data_nn

({'a': 100, 'b': 200}, {'c': 300, 'b': 450})

In [82]:
pd.DataFrame(data_nn)

Unnamed: 0,a,b,c
0,100.0,200,
1,,450,300.0


In [91]:
# In dataFrames we can also mention the columns and the indexes explicitly

pd.DataFrame(
    np.random.rand(3, 2),
    columns = ['Alpha', 'Beta'],
    index = ['A', 'B', 'C']
)

Unnamed: 0,Alpha,Beta
A,0.310512,0.892933
B,0.681179,0.003986
C,0.603483,0.327214


In [92]:
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
A

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

In [93]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


In [94]:
# Indexing

ind = pd.Index([2, 3, 5, 7, 11])
ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

In [95]:
ind[2]

5

In [97]:
ind[::2]

Int64Index([2, 5, 11], dtype='int64')

In [98]:
# Pandas indexes are immutable, they cannot be modified:

ind[0] = -9

TypeError: Index does not support mutable operations

In [99]:
# The index immutability makes is safe for data sharing

In [103]:
# Indexes can be used for arithmetic operations of unions, intersections, differences

indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 4, 5, 7, 11])

In [104]:
indA & indB # intersection

Int64Index([3, 5, 7], dtype='int64')

In [105]:
indA | indB # Union

Int64Index([1, 2, 3, 4, 5, 7, 9, 11], dtype='int64')