# Analysis on IPL DataSet

In [1]:
# Private moduleGPT
from copilot import *

In [41]:
# Loading Libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as exp

from skimage import io

In [15]:
# Nan
import time
import datetime as dt

def bot(tokens):
    now = time.ctime()
    value=dt.date.today()

    session=open("{:%d-%m-%Y}".format(value),'a')


    prompt,response=write(tokens)

    session.writelines(f'\n{now}\nMe:\n{prompt.capitalize()}\n\nGPT:\n{response.strip()}\n----------------\n')
    session.close()

# Data Dictionary


| Column             | Description                                                                                              |
|:--------------------:|:----------------------------------------------------------------------------------------------------------|
| id                 | Represents a unique identifier for each entry in the data card.                                          |
| inning             | Indicates the inning number of the cricket match.                                                        |
| over               | Refers to the over number in the current inning.                                                         |
| ball               | Represents the ball number within the over.                                                              |
| batsman            | The name of the batsman who faced the ball during a particular delivery.                                 |
| non_striker        | The name of the batsman who is not facing the ball but is positioned at the non-striker's end.            |
| bowler             | The name of the bowler who delivers the ball to the batsman.                                             |
| batsman_runs       | The number of runs scored by the batsman on that particular ball.                                        |
| extra_runs         | Any additional runs scored that are not attributed to the batsman, such as wides or no balls.            |
| total_runs         | The total runs scored on that ball, including both the batsman's runs and any extras.                    |
| non_boundary       | Indicates whether the runs scored on that ball were through boundaries (4s or 6s).                       |
| is_wicket          | Indicates whether a wicket fell on that ball.                                                            |
| dismissal_kind     | If a wicket fell, this column describes the manner of dismissal (e.g., caught, bowled, run-out).         |
| player_dismissed   | The name of the batsman who got dismissed on that ball.                                                  |
| fielder            | The name of the fielder involved in the dismissal (if applicable).                                       |
| extras_type        | If there were any extras, this column specifies the type (e.g., wide, no ball).                          |
| batting_team       | The team that is currently batting.                                                                      |
| bowling_team       | The team that is currently bowling.                                                                      |


# Loading and Inspecting Data

In [8]:
ball2ball=pd.read_csv('IPL Ball-by-Ball.csv')

ball2ball.head()

Unnamed: 0,id,inning,over,ball,batsman,non_striker,bowler,batsman_runs,extra_runs,total_runs,non_boundary,is_wicket,dismissal_kind,player_dismissed,fielder,extras_type,batting_team,bowling_team
0,335982,1,6,5,RT Ponting,BB McCullum,AA Noffke,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
1,335982,1,6,6,BB McCullum,RT Ponting,AA Noffke,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
2,335982,1,7,1,BB McCullum,RT Ponting,Z Khan,0,0,0,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
3,335982,1,7,2,BB McCullum,RT Ponting,Z Khan,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
4,335982,1,7,3,RT Ponting,BB McCullum,Z Khan,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore


In [9]:
ball2ball.tail()

Unnamed: 0,id,inning,over,ball,batsman,non_striker,bowler,batsman_runs,extra_runs,total_runs,non_boundary,is_wicket,dismissal_kind,player_dismissed,fielder,extras_type,batting_team,bowling_team
193463,1237181,1,12,5,RR Pant,SS Iyer,NM Coulter-Nile,0,0,0,0,0,,,,,Delhi Capitals,Mumbai Indians
193464,1237181,1,12,6,RR Pant,SS Iyer,NM Coulter-Nile,1,0,1,0,0,,,,,Delhi Capitals,Mumbai Indians
193465,1237181,1,13,1,RR Pant,SS Iyer,KH Pandya,0,1,1,0,0,,,,wides,Delhi Capitals,Mumbai Indians
193466,1237181,1,13,2,RR Pant,SS Iyer,KH Pandya,1,0,1,0,0,,,,,Delhi Capitals,Mumbai Indians
193467,1237181,1,13,3,SS Iyer,RR Pant,KH Pandya,1,0,1,0,0,,,,,Delhi Capitals,Mumbai Indians


In [17]:
data.columns

Index(['id', 'inning', 'over', 'ball', 'batsman', 'non_striker', 'bowler',
       'batsman_runs', 'extra_runs', 'total_runs', 'non_boundary', 'is_wicket',
       'dismissal_kind', 'player_dismissed', 'fielder', 'extras_type',
       'batting_team', 'bowling_team'],
      dtype='object')

In [25]:
print(f'Shape: {data.shape}')
rows,columns = data.shape

print(f'Rows: {rows}\nColumns: {columns}')


Shape: (193468, 18)
Rows: 193468
Columns: 18


# Profile Report

In [27]:
from ydata_profiling import ProfileReport

report = ProfileReport(ball2ball)

report.to_notebook_iframe()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

###### extra_runs == 7

In [48]:
ball2ball[ball2ball['extra_runs']>6]

Unnamed: 0,id,inning,over,ball,batsman,non_striker,bowler,batsman_runs,extra_runs,total_runs,non_boundary,is_wicket,dismissal_kind,player_dismissed,fielder,extras_type,batting_team,bowling_team
98262,729309,2,4,2,M Vijay,Q de Kock,SL Malinga,0,7,7,0,0,,,,penalty,Delhi Daredevils,Mumbai Indians


###### 

- batsman has a high cardinality: 537 distinct values	High cardinality
- non_striker has a high cardinality: 530 distinct values	High cardinality

**In other words few batsman never played on non striker ends lets find out who.**

In [40]:
print('Batsman')
ball2ball[~(ball2ball['batsman'].isin(ball2ball['non_striker']))][['batsman']].value_counts()

Batsman


batsman       
Arshdeep Singh    3
KK Ahmed          3
Shivam Sharma     3
DR Sams           2
IC Pandey         2
VS Yeligati       2
C Nanda           1
JL Denly          1
S Lamichhane      1
U Kaul            1
V Pratap Singh    1
dtype: int64