# Pandas

In [None]:
!python -m pip install pandas

In [2]:
import pandas as pd

In [4]:
obj = pd.Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [10]:
obj.get(4)

## Values and Index

In [14]:
sales = pd.Series([100, 200, 100, 400])
print(sales.values)
print(sales.index)

[100 200 100 400]
RangeIndex(start=0, stop=4, step=1)


In [17]:
sales = pd.Series([100, 200, 100, 400], index = ['Jan', 'Feb', 'Mar', 'Apr'])
print(sales)
print(sales.values)
print(sales.index)

Jan    100
Feb    200
Mar    100
Apr    400
dtype: int64
[100 200 100 400]
Index(['Jan', 'Feb', 'Mar', 'Apr'], dtype='object')


In [44]:
sales = pd.Series([100, 200, 100, 400], index = ['Jan', 'Feb', 'Mar', 'Apr'], name = "4 Month Sales")
print(sales)

Jan    100
Feb    200
Mar    100
Apr    400
Name: 4 Month Sales, dtype: int64


### Accessing Data

In [46]:
print(sales[0])
print(sales.get(0))
print(sales['Jan'])
print(sales.get('Jan'))
# print(sales[4]) # will through error index out of bounds
# print(sales['May']) # will through key error
print(sales.get('May'))   # Returns None instead of error

100
100
100
100
None


### Create a Series which contains how many sandwiches are sold each day in a week

In [56]:
sales = pd.Series([10, 20, 30, 40, 20, 50, 55], index = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun'], name = 'Sales/Day')
sales

Mon     10
Tue     20
Wed     30
Thur    40
Fri     20
Sat     50
Sun     55
Name: Sales/Day, dtype: int64

## Selecting multiple elements

In [58]:
print(sales[[3, 5]])
print(sales[['Tue', 'Thur']])

Thur    40
Sat     50
Name: Sales/Day, dtype: int64
Tue     20
Thur    40
Name: Sales/Day, dtype: int64


### Conditional Selection

In [59]:
print(sales[sales > 20])

Wed     30
Thur    40
Sat     50
Sun     55
Name: Sales/Day, dtype: int64


## Arithematic Operations

In [61]:
# Series are immutable
sales * 2
sales

Mon     10
Tue     20
Wed     30
Thur    40
Fri     20
Sat     50
Sun     55
Name: Sales/Day, dtype: int64

In [62]:
sales = sales * 2
sales

Mon      20
Tue      40
Wed      60
Thur     80
Fri      40
Sat     100
Sun     110
Name: Sales/Day, dtype: int64

In [63]:
# Notice data has changed into float from int
sales = sales / 2
sales

Mon     10.0
Tue     20.0
Wed     30.0
Thur    40.0
Fri     20.0
Sat     50.0
Sun     55.0
Name: Sales/Day, dtype: float64

## Check if a Property Exist

In [69]:
'Mon' in sales

True

### Using numpy with pandas

In [70]:
import numpy as np
data = np.array([2, 3, 4, 5, 6])
indices = np.array(['Mon', 'Tues', 'Wednes', 'Thurs', 'Fri'])
data_series = pd.Series(data, index = indices)

Mon       2
Tues      3
Wednes    4
Thurs     5
Fri       6
dtype: int32

### Using Dictionary with pandas

In [72]:
data = { 'Sindh': 35000, 'Punjab': 4500, 'KPK': 3000, 'Balochistan': 2000}
tax = pd.Series(data)
print(tax)
print(tax.index)

Sindh          35000
Punjab          4500
KPK             3000
Balochistan     2000
dtype: int64
Index(['Sindh', 'Punjab', 'KPK', 'Balochistan'], dtype='object')


In [3]:
# Change indices
data = { 'Sindh': 35000, 'Punjab': 4500, 'KPK': 3000, 'Balochistan': 2000}
tax = pd.Series(data, index = ['Punjab', 'Sindh', 'KPK', 'Balochistan'])
print(tax)
print(tax.index)

Punjab          4500
Sindh          35000
KPK             3000
Balochistan     2000
dtype: int64
Index(['Punjab', 'Sindh', 'KPK', 'Balochistan'], dtype='object')


In [8]:
tax.name = 'State Tax Paying Capacity'
tax.index.name = 'State Name'
print(tax)
print(tax.index)

State Name
Punjab          4500.0
Sindh          35000.0
KPK             3000.0
Balochistan     2000.0
GB                 NaN
Name: State Tax Paying Capacity, dtype: float64
Index(['Punjab', 'Sindh', 'KPK', 'Balochistan', 'GB'], dtype='object', name='State Name')


In [7]:
data = { 'Sindh': 35000, 'Punjab': 4500, 'KPK': 3000, 'Balochistan': 2000}
tax = pd.Series(data, index = ['Punjab', 'Sindh', 'KPK', 'Balochistan', 'GB'])
print(tax)
print(pd.isnull(tax))
print(tax.isnull())

Punjab          4500.0
Sindh          35000.0
KPK             3000.0
Balochistan     2000.0
GB                 NaN
dtype: float64
Punjab         False
Sindh          False
KPK            False
Balochistan    False
GB              True
dtype: bool
Punjab         False
Sindh          False
KPK            False
Balochistan    False
GB              True
dtype: bool


In [9]:
sales = pd.Series([20, 30, 40, 50, 60, 70, 0], index = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun'])
print(sales)
sales.index = ['M', 'T', 'W', 'T', 'F', 'S', 'S']
sales

Mon     20
Tue     30
Wed     40
Thur    50
Fri     60
Sat     70
Sun      0
dtype: int64


M    20
T    30
W    40
T    50
F    60
S    70
S     0
dtype: int64