In [2]:
import numpy as np
print('NumPy version: ',np.__version__)
import pandas as pd
print('Pandas version: ',pd.__version__)

NumPy version:  1.26.4
Pandas version:  2.2.1


## Introducing Pandas objects

3 Fundamental data structures: `DataFrame`, `Series` and `Index`.

In [4]:
# Pandas Series

a = np.arange(6)

data1 = pd.Series([1,3,6,7])
data2 = pd.Series(a, index = ['a','b','c','d','e',6])

In [8]:
print(data1)
print(data2)

0    1
1    3
2    6
3    7
dtype: int64
a    0
b    1
c    2
d    3
e    4
6    5
dtype: int32


In [10]:
print(data2[6], data2['d'])

5 3


In [11]:
# Series as a specialised dictionary (typed dictionary)
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [12]:
population['California']

38332521

In [13]:
population['California':'Illinois']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [14]:
# Pandas DataFrame as a generalized NumPy array

area_dict = {'California': 423967, 
             'Texas': 695662, 
             'New York': 141297,
             'Florida': 170312, 
             'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [15]:
states = pd.DataFrame({'population': population,
                       'area': area})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [17]:
print(states.index)
print(states.columns)

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')
Index(['population', 'area'], dtype='object')


In [18]:
# DataFrame as a specialized dictionary

states['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [19]:
# Index as immutable array

ind = pd.Index([1,7,5,6])
ind

Index([1, 7, 5, 6], dtype='int64')

In [20]:
print(ind[1])
print(ind[1::2])

7
Index([7, 6], dtype='int64')


In [21]:
print(ind.shape, ind.size, ind.ndim, ind.dtype)

(4,) 4 1 int64


In [22]:
ind[1] = 13  #Index is immutable

TypeError: Index does not support mutable operations

In [25]:
# Index as ordered set

indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

In [31]:
indA.intersection(indB)

Index([3, 5, 7], dtype='int64')

In [32]:
indA.union(indB)

Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

In [34]:
indA.symmetric_difference(indB)

Index([1, 2, 9, 11], dtype='int64')