# Introducing Pandas Objects

In [1]:
import numpy as np
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
data = pd.Series([0.25,0.5, 0.75, 1.0])

In [3]:
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [4]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [5]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
data[1]

0.5

In [7]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [8]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])

In [9]:
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
data['b']

0.5

In [11]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=[2,5,3,7])

In [12]:
data[5]

0.5

In [13]:
data[2]

0.25

In [14]:
population_dict = {'California': 38332521, 'Texas': 26448193, 'New York': 19651127, 'Florida': 19552860,
                   'Illinois': 12882135  }

In [15]:
population = pd.Series(population_dict)

In [16]:
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [17]:
population['California']

38332521

In [18]:
population['California':'Florida']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
dtype: int64

### Constructing Series Objects

In [19]:
pd.Series(data)

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [20]:
pd.Series([2,4,6])

0    2
1    4
2    6
dtype: int64

In [21]:
pd.Series(5, index=[100, 200, 300])

100    5
200    5
300    5
dtype: int64

In [22]:
pd.Series({2:'a',1:'b',3:'c'})

2    a
1    b
3    c
dtype: object

In [23]:
pd.Series({2:'a',1:'b', 3: 'c'}, index=[3,2])


3    c
2    a
dtype: object

### Pandas DataFrame Object

In [26]:

area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [35]:
states = pd.DataFrame({'population':population, 'area': area})

In [36]:
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [37]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [38]:
states.columns

Index(['population', 'area'], dtype='object')

In [39]:
states['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [42]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [45]:
data = [{'a': i, 'b' : 2* i}
        for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [46]:
pd.DataFrame([{'a':1,'b':2},{'b':3,'c':4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


### Form a dictionary of Series Object

In [47]:
pd.DataFrame({'population': population, 'area': area})

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [49]:
pd.DataFrame(np.random.rand(3,2), columns=['foo','bar'], index=['a','b','c'])

Unnamed: 0,foo,bar
a,0.796238,0.256108
b,0.344825,0.096864
c,0.383174,0.561655


# From a numpy structured array

In [51]:
A = np.zeros(3, dtype=[('A','i8'),('B', 'f8')])
A

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

In [52]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


In [53]:
ind = pd.Index([2,3,5,7,11])

In [54]:
ind

Index([2, 3, 5, 7, 11], dtype='int64')

In [55]:
ind[::2]

Index([2, 5, 11], dtype='int64')

In [56]:
print(ind.size, ind.shape, ind.ndim, ind.dtype)

5 (5,) 1 int64


In [57]:
int[1] = 0

TypeError: 'type' object does not support item assignment

### Index as Ordered Set

In [58]:
indA = pd.Index([1,3,5,7,9])
indB = pd.Index([2,3,5,7,11])

In [59]:
indA & indB #intersection

Index([0, 3, 5, 7, 9], dtype='int64')

In [61]:
indA | indB #union

Index([3, 3, 5, 7, 11], dtype='int64')

In [62]:
indA ^ indB #symmetric difference

Index([3, 0, 0, 0, 2], dtype='int64')