# Installing and Using Pandas

In [2]:
import pandas as pd
import numpy as np
print('Pandas version:',pd.__version__ )
print('NumPy version:',np.__version__ )

Pandas version: 2.2.2
NumPy version: 1.26.4


# Introducing Panda Objects

## Pandas Series Objects

In [13]:
data = pd.Series([2.5, 3.14, 'Tacos', 7])

print(data)

0      2.5
1     3.14
2    Tacos
3        7
dtype: object


### Series as NumPy array

In [37]:
data = pd.Series([0, 2.5, 5, 7.5, 10],
                 index=['1.', '2.', '3.', '4.', '5.'])

print('The whole series:\n', data)
print('\nThe specified series:', data['3.'])

The whole series:
 1.     0.0
2.     2.5
3.     5.0
4.     7.5
5.    10.0
dtype: float64

The specified series: 5.0


### Series as specialized dictionary

In [18]:
homeruns_dict = {'Barry Bonds': 762,
                'Hank Aaron': 755,
                'Babe Ruth': 714,
                'Albert Pujols': 703,
                'Alex Rodriguez': 696,
                'Willie Mays' : 660,
                'Ken Griffey Jr.' : 630,
                'Jim Thome' : 612,
                'Sammy Sosa' : 609,
                'Frank Robinson' : 586
                }
homeruns = pd.Series(homeruns_dict)
print('Top three homerun hitters:\n',homeruns['Barry Bonds': 'Babe Ruth'])

Top three homerun hitters:
 Barry Bonds    762
Hank Aaron     755
Babe Ruth      714
dtype: int64


### Constructing series objects

In [9]:
P = pd.Series({'x': 100, 'y': 200, 'z': 300}, index=['z', 'y'])
print('The desired index is:\n',P)

The desired index is:
 z    300
y    200
dtype: int64


## The Pandas DataFrame Object

### DataFrame as NumPy array

In [30]:
hits_dict = {'Barry Bonds': 2935,
                'Hank Aaron': 3771,
                'Babe Ruth': 2873,
                'Albert Pujols': 3384,
                'Alex Rodriguez': 3115,
                'Willie Mays' : 3283,
                'Ken Griffey Jr.' : 2781,
                'Jim Thome' : 2328,
                'Sammy Sosa' : 2408,
                'Frank Robinson' : 2943
                }
hits = pd.Series(hits_dict)

players = pd.DataFrame({'Homeruns': homeruns,
                        'Hits': hits})
print('The best baseball players are:')
players

The best baseball players are:


Unnamed: 0,Homeruns,Hits
Barry Bonds,762,2935
Hank Aaron,755,3771
Babe Ruth,714,2873
Albert Pujols,703,3384
Alex Rodriguez,696,3115
Willie Mays,660,3283
Ken Griffey Jr.,630,2781
Jim Thome,612,2328
Sammy Sosa,609,2408
Frank Robinson,586,2943


### Constructing DataFrame objects

In [48]:
pd.DataFrame([{'a': 3, 'b': 2, 'c':1}, {'a':4,'b': 5, 'c': 6}],
             columns=['a', 'b', 'c'],
             index=['Row 1', 'Row 2'])

Unnamed: 0,a,b,c
Row 1,3,2,1
Row 2,4,5,6


## Pandas index object

### Index as immutable array

In [55]:
i = pd.Index([6, 3.0, 9.4, 'tacos', 7])
print('The index from the third value on:\n',i[2:])

The index from the third value on:
 Index([9.4, 'tacos', 7], dtype='object')


In [57]:
# The array can't be changed
i[2] = 99

TypeError: Index does not support mutable operations

### Index as ordered set

In [67]:
I1 = pd.Index([7, 5, 4, 3, 6])
I2 = pd.Index([1, 5, 4, 3, 2])

print('Union of the indexes:',I1.union(I2))
print('Intersection of the indexes:',I1.intersection(I2))
print('Diffferences of the indexes:',I1.symmetric_difference(I2))

Union of the indexes: Index([1, 2, 3, 4, 5, 6, 7], dtype='int64')
Intersection of the indexes: Index([5, 4, 3], dtype='int64')
Diffferences of the indexes: Index([1, 2, 6, 7], dtype='int64')


# Data Indexing and Selection

## Data Selection in series

### Series as dictionary

In [102]:
players = pd.DataFrame({'Homeruns': homeruns,
                        'Hits': hits})
print('The best baseball players are:')
print('Here are the hits for all the players:\n',players['Hits'])


The best baseball players are:
Here are the hits for all the players:
 Barry Bonds        2935
Hank Aaron         3771
Babe Ruth          2873
Albert Pujols      3384
Alex Rodriguez     3115
Willie Mays        3283
Ken Griffey Jr.    2781
Jim Thome          2328
Sammy Sosa         2408
Frank Robinson     2943
Name: Hits, dtype: int64


### Series as 1D array

In [111]:
print('Players that have 3000 hits and 700 homeruns:')
players[(homeruns > 700) & (hits > 2800)]

Players that have 3000 hits and 700 homeruns:


Unnamed: 0,Homeruns,Hits
Barry Bonds,762,2935
Hank Aaron,755,3771
Babe Ruth,714,2873
Albert Pujols,703,3384


### Indexers: loc, iloc and ix

In [140]:
data = pd.Series(['Hank Aaron    ', 'Albert Pujols', 'Barry Bonds  ']
                 , index=[1, 2, 3])
print('The top three players in baseball:\n', data)

print('Here\n',data.iloc[0])

The top three players in baseball:
 1    Hank Aaron    
2     Albert Pujols
3     Barry Bonds  
dtype: object
Here
 1    Hank Aaron    
2     Albert Pujols
dtype: object
