# Pandas

In [1]:
import pandas as pd
import numpy as np

## Using List

In [2]:
my_list = [100,200,300,400,500]

In [3]:
x = pd.Series(my_list, index = ['A','B','C','D','E'])
x

A    100
B    200
C    300
D    400
E    500
dtype: int64

In [4]:
type(x)

pandas.core.series.Series

## Using Numpy Array

In [5]:
arr = np.array(my_list)

In [6]:
arr

array([100, 200, 300, 400, 500])

In [7]:
'A B C D E'.split()

['A', 'B', 'C', 'D', 'E']

In [8]:
pd.Series(arr,'A B C D E'.split())

A    100
B    200
C    300
D    400
E    500
dtype: int32

## Using Dictionaries

In [9]:
dict1 = {'A':100,'B':200,'C':300,'D':400}

In [10]:
z1 = pd.Series(dict1)
z1

A    100
B    200
C    300
D    400
dtype: int64

In [11]:
z2 = pd.Series([10,20,30,40],'A B C D'.split())
z2

A    10
B    20
C    30
D    40
dtype: int64

In [12]:
z1 + z2

A    110
B    220
C    330
D    440
dtype: int64

## Slicing in series z2

In [13]:
z2[0:2]

A    10
B    20
dtype: int64

In [14]:
z2['A':'C']

A    10
B    20
C    30
dtype: int64

In [15]:
z2['A'::2]

A    10
C    30
dtype: int64

## .loc & .iloc Functions

In [16]:
z2.loc['A':'C']

A    10
B    20
C    30
dtype: int64

In [17]:
z2.iloc[0:3]

A    10
B    20
C    30
dtype: int64

In [18]:
z = pd.Series({100:'a',200:'b',300:'c',400:'d'})
z

100    a
200    b
300    c
400    d
dtype: object

In [19]:
#z[0]

In [20]:
#indexing
z.iloc[::]

100    a
200    b
300    c
400    d
dtype: object

## Operations


In [21]:
z1


A    100
B    200
C    300
D    400
dtype: int64

In [22]:
z1>100

A    False
B     True
C     True
D     True
dtype: bool

In [23]:
z1<100


A    False
B    False
C    False
D    False
dtype: bool

In [24]:
z1<200

A     True
B    False
C    False
D    False
dtype: bool

## Broadcasting

In [25]:
z1 + 300

A    400
B    500
C    600
D    700
dtype: int64

## Ordering

In [26]:
z2


A    10
B    20
C    30
D    40
dtype: int64

In [27]:
z2.sort_values(ascending = False)

D    40
C    30
B    20
A    10
dtype: int64

In [28]:
z2

A    10
B    20
C    30
D    40
dtype: int64

In [29]:
z2.sort_values(ascending = False, inplace = True)

In [30]:
z2

D    40
C    30
B    20
A    10
dtype: int64

## Aggregation on Series

In [31]:
z2

D    40
C    30
B    20
A    10
dtype: int64

In [32]:
z2.min()

10

In [33]:
z2.max()

40

In [34]:
z2.idxmax()

'D'

# Data Frame

In [35]:
rand_mat = np.random.randn(5,4)

In [36]:
rand_mat

array([[ 0.15548919, -2.46958116,  1.68288705,  0.21772189],
       [ 0.56259735,  0.22556301, -1.67783405, -0.14435631],
       [ 0.08344981, -1.0661107 ,  1.09695692, -1.36686173],
       [-1.00593439, -0.78675549,  1.45948793,  0.12700984],
       [ 0.06090078, -0.49943053, -1.13782814,  0.10358344]])

In [37]:
df = pd.DataFrame(data = rand_mat, index = 'A B C D E'.split(),columns = 'W X Y Z'.split())
df

Unnamed: 0,W,X,Y,Z
A,0.155489,-2.469581,1.682887,0.217722
B,0.562597,0.225563,-1.677834,-0.144356
C,0.08345,-1.066111,1.096957,-1.366862
D,-1.005934,-0.786755,1.459488,0.12701
E,0.060901,-0.499431,-1.137828,0.103583


In [38]:
type(df)

pandas.core.frame.DataFrame

In [39]:
df['W']

A    0.155489
B    0.562597
C    0.083450
D   -1.005934
E    0.060901
Name: W, dtype: float64

## Selecting and Indexing

In [40]:
df.loc['A']

W    0.155489
X   -2.469581
Y    1.682887
Z    0.217722
Name: A, dtype: float64

In [41]:
df[['W','Z']]

Unnamed: 0,W,Z
A,0.155489,0.217722
B,0.562597,-0.144356
C,0.08345,-1.366862
D,-1.005934,0.12701
E,0.060901,0.103583


### Add  a New Column

In [42]:
#Total
df['New']= df['W'] + df['Y']

In [43]:
df

Unnamed: 0,W,X,Y,Z,New
A,0.155489,-2.469581,1.682887,0.217722,1.838376
B,0.562597,0.225563,-1.677834,-0.144356,-1.115237
C,0.08345,-1.066111,1.096957,-1.366862,1.180407
D,-1.005934,-0.786755,1.459488,0.12701,0.453554
E,0.060901,-0.499431,-1.137828,0.103583,-1.076927


In [44]:
df.drop('New',axis=1)

Unnamed: 0,W,X,Y,Z
A,0.155489,-2.469581,1.682887,0.217722
B,0.562597,0.225563,-1.677834,-0.144356
C,0.08345,-1.066111,1.096957,-1.366862
D,-1.005934,-0.786755,1.459488,0.12701
E,0.060901,-0.499431,-1.137828,0.103583


In [45]:
df.drop('New',axis=1,inplace=True)

In [46]:
df

Unnamed: 0,W,X,Y,Z
A,0.155489,-2.469581,1.682887,0.217722
B,0.562597,0.225563,-1.677834,-0.144356
C,0.08345,-1.066111,1.096957,-1.366862
D,-1.005934,-0.786755,1.459488,0.12701
E,0.060901,-0.499431,-1.137828,0.103583


## Add a New Row

In [47]:
df.loc['F'] = df.loc['A'] + df.loc['C']

In [48]:
df

Unnamed: 0,W,X,Y,Z
A,0.155489,-2.469581,1.682887,0.217722
B,0.562597,0.225563,-1.677834,-0.144356
C,0.08345,-1.066111,1.096957,-1.366862
D,-1.005934,-0.786755,1.459488,0.12701
E,0.060901,-0.499431,-1.137828,0.103583
F,0.238939,-3.535692,2.779844,-1.14914


In [49]:
df.drop('F',axis=0,inplace=True)

In [50]:
df

Unnamed: 0,W,X,Y,Z
A,0.155489,-2.469581,1.682887,0.217722
B,0.562597,0.225563,-1.677834,-0.144356
C,0.08345,-1.066111,1.096957,-1.366862
D,-1.005934,-0.786755,1.459488,0.12701
E,0.060901,-0.499431,-1.137828,0.103583


In [51]:
df['X']['C']

-1.0661107013965545

In [52]:
df.loc['C']['X']

-1.0661107013965545

## Data Input & OutPut

In [53]:
pd.read_csv('matches.csv')

Unnamed: 0,id,city,date,player_of_match,venue,neutral_venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,eliminator,method,umpire1,umpire2
0,335982,Bangalore,2008-04-18,BB McCullum,M Chinnaswamy Stadium,0,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140.0,N,,Asad Rauf,RE Koertzen
1,335983,Chandigarh,2008-04-19,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",0,Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,N,,MR Benson,SL Shastri
2,335984,Delhi,2008-04-19,MF Maharoof,Feroz Shah Kotla,0,Delhi Daredevils,Rajasthan Royals,Rajasthan Royals,bat,Delhi Daredevils,wickets,9.0,N,,Aleem Dar,GA Pratapkumar
3,335985,Mumbai,2008-04-20,MV Boucher,Wankhede Stadium,0,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,Royal Challengers Bangalore,wickets,5.0,N,,SJ Davis,DJ Harper
4,335986,Kolkata,2008-04-20,DJ Hussey,Eden Gardens,0,Kolkata Knight Riders,Deccan Chargers,Deccan Chargers,bat,Kolkata Knight Riders,wickets,5.0,N,,BF Bowden,K Hariharan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
811,1216547,Dubai,2020-09-28,AB de Villiers,Dubai International Cricket Stadium,0,Royal Challengers Bangalore,Mumbai Indians,Mumbai Indians,field,Royal Challengers Bangalore,tie,,Y,,Nitin Menon,PR Reiffel
812,1237177,Dubai,2020-11-05,JJ Bumrah,Dubai International Cricket Stadium,0,Mumbai Indians,Delhi Capitals,Delhi Capitals,field,Mumbai Indians,runs,57.0,N,,CB Gaffaney,Nitin Menon
813,1237178,Abu Dhabi,2020-11-06,KS Williamson,Sheikh Zayed Stadium,0,Royal Challengers Bangalore,Sunrisers Hyderabad,Sunrisers Hyderabad,field,Sunrisers Hyderabad,wickets,6.0,N,,PR Reiffel,S Ravi
814,1237180,Abu Dhabi,2020-11-08,MP Stoinis,Sheikh Zayed Stadium,0,Delhi Capitals,Sunrisers Hyderabad,Delhi Capitals,bat,Delhi Capitals,runs,17.0,N,,PR Reiffel,S Ravi
