In [1]:
# Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
from pyspark.sql import SparkSession
from pyspark.sql.types import *
%matplotlib inline

In [2]:
spark = SparkSession.builder.master("local").appName('Ops').getOrCreate()

In [3]:
path = "E:/Rutgers/Projects/MDSR/IPL-MSDR"

In [4]:
# Reading Data
matches = pd.read_csv(path + '/dataset/original_ipldata/matches.csv')
deliveries = pd.read_csv(path + '/dataset/original_ipldata/deliveries.csv')

In [5]:
# Schema of original data (matches.csv)
matches.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 756 entries, 0 to 755
Data columns (total 18 columns):
id                 756 non-null int64
season             756 non-null int64
city               749 non-null object
date               756 non-null object
team1              756 non-null object
team2              756 non-null object
toss_winner        756 non-null object
toss_decision      756 non-null object
result             756 non-null object
dl_applied         756 non-null int64
winner             752 non-null object
win_by_runs        756 non-null int64
win_by_wickets     756 non-null int64
player_of_match    752 non-null object
venue              756 non-null object
umpire1            754 non-null object
umpire2            754 non-null object
umpire3            119 non-null object
dtypes: int64(5), object(13)
memory usage: 68.0+ KB


In [6]:
# Schema of original data (deliveries.csv)
deliveries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179078 entries, 0 to 179077
Data columns (total 21 columns):
match_id            179078 non-null int64
inning              179078 non-null int64
batting_team        179078 non-null object
bowling_team        179078 non-null object
over                179078 non-null int64
ball                179078 non-null int64
batsman             179078 non-null object
non_striker         179078 non-null object
bowler              179078 non-null object
is_super_over       179078 non-null int64
wide_runs           179078 non-null int64
bye_runs            179078 non-null int64
legbye_runs         179078 non-null int64
noball_runs         179078 non-null int64
penalty_runs        179078 non-null int64
batsman_runs        179078 non-null int64
extra_runs          179078 non-null int64
total_runs          179078 non-null int64
player_dismissed    8834 non-null object
dismissal_kind      8834 non-null object
fielder             6448 non-null object
dtype

# Data Cleaning

In [7]:
# Dropping columns that are of no use
matches = matches.drop(columns = ['umpire1', 'umpire2','umpire3','date'])

In [8]:
#Filing empty values
matches = matches.fillna(value = 'None')
deliveries = deliveries.fillna(value = 0)

In [9]:
# Schema of cleaned data (matches.csv)
matches.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 756 entries, 0 to 755
Data columns (total 14 columns):
id                 756 non-null int64
season             756 non-null int64
city               756 non-null object
team1              756 non-null object
team2              756 non-null object
toss_winner        756 non-null object
toss_decision      756 non-null object
result             756 non-null object
dl_applied         756 non-null int64
winner             756 non-null object
win_by_runs        756 non-null int64
win_by_wickets     756 non-null int64
player_of_match    756 non-null object
venue              756 non-null object
dtypes: int64(5), object(9)
memory usage: 56.1+ KB


In [10]:
# Schema of cleaned data (deliveries.csv)
deliveries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179078 entries, 0 to 179077
Data columns (total 21 columns):
match_id            179078 non-null int64
inning              179078 non-null int64
batting_team        179078 non-null object
bowling_team        179078 non-null object
over                179078 non-null int64
ball                179078 non-null int64
batsman             179078 non-null object
non_striker         179078 non-null object
bowler              179078 non-null object
is_super_over       179078 non-null int64
wide_runs           179078 non-null int64
bye_runs            179078 non-null int64
legbye_runs         179078 non-null int64
noball_runs         179078 non-null int64
penalty_runs        179078 non-null int64
batsman_runs        179078 non-null int64
extra_runs          179078 non-null int64
total_runs          179078 non-null int64
player_dismissed    179078 non-null object
dismissal_kind      179078 non-null object
fielder             179078 non-null object

In [11]:
# Saving cleaned data (matches.csv)
matches.to_csv(path + '/dataset/clean_data/matches.csv')

In [12]:
# Saving cleaned data (deliveries.csv)
deliveries.to_csv(path + '/dataset/clean_data/deliveries.csv')

# Basic Analysis

In [None]:
# Teams playing in the league
teams = matches['team1'].unique()
print("Total number of teams participated so far: " + str(len(matches['team1'].unique())))
print("Teams participated so far: ")
for i in teams:
    print("- " + i)

In [None]:
# Total Venues
print("Number of venues matches were played: " + str(len(matches['venue'].unique())))
for i in matches['venue'].unique():
    print("- " + i)

In [None]:
# Cities the matches were played
print("Number of cities matches were played: " + str(len(matches['city'].unique())))
for i in matches['city'].unique():
    print("- " + i)

In [None]:
# Total number of bowlers so far
print("Total number of bowlers: " + str(len(deliveries['bowler'].unique())))

In [None]:
# Total number of batsmen so far
print("Total number of batsmen: " + str(len(deliveries['batsman'].unique())))

In [None]:
# Total number of participating players
players = set()
for i in range(len(deliveries['match_id'])):
    players.add(deliveries['bowler'][i])
    players.add(deliveries['batsman'][i])
    players.add(deliveries['non_striker'][i])
print("Total number of player: " + str(len(players)))

# Spark Analysis

In [None]:
matches = spark.read.csv(path + '/dataset/clean_data/matches.csv',inferSchema=True,header=True)
deliveries = spark.read.csv(path + '/dataset/clean_data/deliveries.csv',inferSchema=True,header=True)

### Total number of matches per season

In [None]:
matches.registerTempTable('seasons')
seasons = spark.sql('''Select distinct(season),count(*) as total_matches from seasons group by season ''') 
seasons.show()

In [None]:
# Plot
fig, a = plt.subplots()
a = sns.barplot(x ="season", y="total_matches", data=seasons.toPandas(),palette='viridis')
a.set_xlabel('Season')
a.set_ylabel('Total Matches')
a.set_title('Number of matches in each season')

### Number of maches played by each team since season 1

In [None]:
matches.registerTempTable('team')
team = spark.sql('''Select distinct(team), count(*) as total_matches from (Select team1 as team from team UNION ALL (select team2 as team from team)) group by team ''')
team.show()

In [None]:
# Plot
fig, a = plt.subplots(figsize = (5,5))
a = sns.barplot(x ="total_matches", y="team", data=team.toPandas(), palette='viridis')
a.set_ylabel('Team')
a.set_xlabel('Total Matches')
a.set_title('Number of matches played by each team')

### Total season in which teams have played

In [None]:
matches.registerTempTable('team_season')
team_season = spark.sql('''Select team1 as team, min(season) as first_season, max(season) as last_season, count(distinct(season)) as total_seasons from team_season group by team1 order by total_seasons desc''')
team_season.show()

### Total number of matches won by teams

In [None]:
matches.registerTempTable('most_win')
most_win = spark.sql('''Select distinct(winner) as team, count(*) as total_matches from most_win where winner <>'None' group by winner order by total_matches ''')
most_win.show()

In [None]:
# Plot
fig, a = plt.subplots(figsize = (5,5))
a = sns.barplot(x ="total_matches", y="team", data=most_win.toPandas(), palette='viridis')
a.set_ylabel('Team')
a.set_xlabel('Total Matches')
a.set_title('Number of matches won by each team')

### Total matches won by teams in each season

In [None]:
matches.registerTempTable('most_win_by_season')
most_win_by_season = spark.sql('''Select season, winner as team, count(*) as total_matches_won from most_win_by_season where winner <> 'None' group by season, winner order by total_matches_won desc''')
most_win_by_season.show()

### Players with maximum man of the match awards 

In [None]:
matches.registerTempTable('man_match')
man_match = spark.sql('''Select distinct(player_of_match), count(*) as total_matches from man_match group by player_of_match order by total_matches desc limit 10 ''')
man_match.show()

In [None]:
# Plot
fig, a = plt.subplots(figsize = (5,5))
a = sns.barplot(x ="total_matches", y="player_of_match", data=man_match.toPandas(), palette='viridis')
a.set_xlabel('Total Matches')
a.set_ylabel('Player')
a.set_title('Number of times player won man of the match')

### Number of matches per Venue

In [None]:
matches.registerTempTable('venue')
venue = spark.sql('''Select distinct(venue), count(*) as total_matches from venue group by venue''')
venue.show()

In [None]:
# Plot
fig, a = plt.subplots(figsize = (10,20))
a = sns.barplot(x ="total_matches", y="venue", data=venue.toPandas(), palette='viridis')
a.set_ylabel('Venue')
a.set_xlabel('Total Matches')
a.set_title('Number of matches at each venue')

### Percentage toss decisions 

In [None]:
matches.registerTempTable('toss')
toss = spark.sql('''Select distinct(toss_decision), ((count(toss_decision)*100)/ (select count(*) from toss)) as percentage_count from toss group by toss_decision''')
toss.show()

In [None]:
# Plot
fig, a = plt.subplots(figsize = (5,5))
a = sns.barplot(x ="toss_decision", y="percentage_count", data=toss.toPandas(), palette='viridis')
a.set_ylabel('Percentage')
a.set_xlabel('Toss Decision')
a.set_title('Percentage Plot of toss_decision')

### Percentage of team winning the toss as well as the match

In [None]:
matches.registerTempTable('toss_and_won')
matches.registerTempTable('toss_won_data')
toss_won_data = spark.sql('''Select t1.season, t1.total_matches, \
          t2.count_toss_and_won as count_toss_and_won, \
          (t2.count_toss_and_won / t1.total_matches * 100) as percent_toss_and_won from \
          (Select distinct(season),count(*) as total_matches from seasons group by season)t1 \
          left join (Select distinct(season), count(*) as count_toss_and_won from toss_and_won where toss_winner = winner group by season)t2 on t1.season = t2.season order by season''')
toss_won_data.show()

In [None]:
# Plot
fig, a = plt.subplots(figsize = (10,5))
a = sns.barplot(x ="season", y="percent_toss_and_won", data=toss_won_data.toPandas(), palette='viridis')
a.set_ylabel('Percentage')
a.set_xlabel('Season')
a.set_title('Percentage Plot of Season and Toss_and_won')

### Percentage matches won by batting first 

In [None]:
win_batting_first = spark.sql('''Select t1.season, t1.total_matches, \
          t2.win_batting_first as win_batting_first, \
          (t2.win_batting_first/ t1.total_matches * 100) as percent_win_batting_first from \
          (Select distinct(season),count(*) as total_matches from seasons group by season)t1 \
          left join (Select distinct(season), count(*) as win_batting_first from seasons where win_by_runs > 0  group by season)t2 on t1.season = t2.season order by season ''')
win_batting_first.show()

In [None]:
# Plot
fig, a = plt.subplots(figsize = (10,5))
a = sns.barplot(x ="season", y="percent_win_batting_first", data=win_batting_first.toPandas(), palette='viridis')
a.set_ylabel('Percentage')
a.set_xlabel('Season')
a.set_title('Percentage Plot of Season and won by batting')

### Percentage matches won by fielding first

In [None]:
win_bowling_first = spark.sql('''Select t1.season, t1.total_matches, \
          t2.win_bowling_first as win_bowling_first, \
          (t2.win_bowling_first/ t1.total_matches * 100) as percent_win_bowling_first from \
          (Select distinct(season),count(*) as total_matches from seasons group by season)t1 \
          left join (Select distinct(season), count(*) as win_bowling_first from seasons where win_by_wickets > 0  group by season)t2 on t1.season = t2.season order by season ''')
win_bowling_first.show()

In [None]:
# Plot
fig, a = plt.subplots(figsize = (10,5))
a = sns.barplot(x ="season", y="percent_win_bowling_first", data=win_bowling_first.toPandas(), palette='viridis')
a.set_ylabel('Percentage')
a.set_xlabel('Season')
a.set_title('Percentage Plot of Season and won by wickets ')

# Final Analysis

In [13]:
# Reading data
matches = spark.read.csv(path + '/dataset/clean_data/matches.csv',inferSchema=True,header=True)
deliveries = spark.read.csv(path + '/dataset/clean_data/deliveries.csv',inferSchema=True,header=True)

In [14]:
# Creating temporary tables of the data
matches.registerTempTable('matches_db')
deliveries.registerTempTable('deliveries_db')

In [15]:
# Merging both the tables
merged_db = spark.sql('select m.*,d.* from matches_db as m inner join deliveries_db as d on m.id=d.match_id')
merged_db.registerTempTable('analysis_db')

## Batting Metrics 

In [18]:
# nm: no. of matches
# hha: hard hitting ability
# f: finisher
# fsa: fast scoring ability

In [19]:
# Calculating number of matches
nm = spark.sql('select batsman, count(distinct(match_id)) as no_of_matches \
                from analysis_db group by batsman')
nm.registerTempTable('no_of_matches_table')

### Hard Hitting Ability 

In [None]:
# Hard Hitting Ability = (4*Fours + 6*Sixes)/Balls Played by Batsman
hha = spark.sql('select nmt.batsman as Batsman, nvl(t4.hard_hitting_ability,0) as \
                Hard_Hitting_Ability from \
                (select t1.batsman, (t1.fours*4 + t2.sixes*6)/t3.balls_played as hard_hitting_ability\
                from (select batsman,count(*) as fours from analysis_db where batsman_runs = 4 group by batsman) t1 \
                inner join  \
                (select batsman,count(*) as sixes from analysis_db where batsman_runs = 6 \
                group by batsman) t2 on t1.batsman=t2.batsman\
                inner join\
                (select batsman,count(*) as balls_played from analysis_db \
                group by batsman) t3 on t3.batsman=t1.batsman) t4 \
                right join no_of_matches_table nmt on t4.batsman = nmt.batsman')

hha.registerTempTable('hard_hitting_ability_table')
hha.show(10)

In [None]:
count = spark.sql('select count(*) from hard_hitting_ability_table')
count.show()

In [None]:
hha = spark.sql('select rank() over (order by Hard_Hitting_Ability desc) as Rank, t1.* \
                  from hard_hitting_ability_table t1 inner join no_of_matches_table t2\
                  on t1.batsman = t2.batsman where no_of_matches>9')
hha.registerTempTable('hard_hitting_ability_rank')
hha.show(10)
count = spark.sql('select count(*) from hard_hitting_ability_rank')

In [None]:
hha = spark.sql('select t1.*, (240-rank)/240 as Points from hard_hitting_ability_rank t1')
hha.registerTempTable('hard_hitting_ability_points')
hha.show(10)

In [None]:
hha = spark.sql('select t1.*, Points*1.25 as Weight from hard_hitting_ability_points t1')
hha.registerTempTable('hard_hitting_ability_weights')
hha.show(10)

### Finisher 

In [20]:
# Finisher = Not Out innings/Total Innings played
f = spark.sql('select t3.batsman as Batsman, t3.not_out_innings/t4.total_matches_played as Finisher from\
              (select t1.batsman, t1.matches_played-t2.number_of_times_out as not_out_innings from \
              (select batsman, count(distinct(match_id)) as matches_played from analysis_db group by batsman) t1\
              inner join \
              (select batsman, count(*) as number_of_times_out from analysis_db where player_dismissed = batsman group by batsman) t2\
              on t1.batsman=t2.batsman) t3\
              inner join\
              (select batsman, count(distinct(match_id)) as total_matches_played \
              from analysis_db group by batsman) t4\
              on t3.batsman = t4.batsman')
f.registerTempTable('finisher_table')
f.show(10)

+--------------+-------------------+
|       Batsman|           Finisher|
+--------------+-------------------+
| Kuldeep Yadav|                0.5|
|    S Anirudha| 0.3333333333333333|
|    TM Dilshan|                0.2|
|       J Botha| 0.2857142857142857|
|    KA Pollard| 0.2727272727272727|
| LA Carseldine|                0.4|
|M Muralitharan| 0.3333333333333333|
|     CA Ingram| 0.3333333333333333|
|       A Hales|                0.0|
|      DR Smith|0.09090909090909091|
+--------------+-------------------+
only showing top 10 rows



In [21]:
f = spark.sql('select rank() over (order by finisher desc) as Rank, t1.* \
              from finisher_table t1 \
              inner join \
              no_of_matches_table t2\
              on t1.batsman = t2.batsman \
              where no_of_matches>9')
f.registerTempTable('finisher_rank')
f.show(10)
count = spark.sql('select count(*) from finisher_rank')

+----+--------------+------------------+
|Rank|       Batsman|          Finisher|
+----+--------------+------------------+
|   1| Iqbal Abdulla|0.9230769230769231|
|   2|      A Kumble|0.8666666666666667|
|   3|Sandeep Sharma|0.7857142857142857|
|   4|   S Sreesanth|              0.75|
|   5|     S Aravind|               0.7|
|   5|     JJ Bumrah|               0.7|
|   5|      VR Aaron|               0.7|
|   8|     YS Chahal|0.6666666666666666|
|   8|      I Sharma|0.6666666666666666|
|  10|  Bipul Sharma|0.6470588235294118|
+----+--------------+------------------+
only showing top 10 rows



In [22]:
f = spark.sql('select t1.*, (240-rank)/240 as Points from finisher_rank t1')
f.registerTempTable('finisher_points')
f.show(10)

+----+-----------------+------------------+------------------+
|Rank|          Batsman|          Finisher|            Points|
+----+-----------------+------------------+------------------+
|   1|    Iqbal Abdulla|0.9230769230769231|0.9958333333333333|
|   2|         A Kumble|0.8666666666666667|0.9916666666666667|
|   3|   Sandeep Sharma|0.7857142857142857|            0.9875|
|   4|      S Sreesanth|              0.75|0.9833333333333333|
|   5|        S Aravind|               0.7|0.9791666666666666|
|   5|        JJ Bumrah|               0.7|0.9791666666666666|
|   5|         VR Aaron|               0.7|0.9791666666666666|
|   8|        YS Chahal|0.6666666666666666|0.9666666666666667|
|   8|         I Sharma|0.6666666666666666|0.9666666666666667|
|  10|     Bipul Sharma|0.6470588235294118|0.9583333333333334|
|  10|          A Nehra|0.6470588235294118|0.9583333333333334|
|  12|         UT Yadav|0.6363636363636364|              0.95|
|  13|Washington Sundar|0.5833333333333334|0.9458333333

In [23]:
f = spark.sql('select *, Points*1.25 as Weight from finisher_points')
f.registerTempTable('finisher_weights')
f.show(10)

+----+-----------------+------------------+------------------+------------------+
|Rank|          Batsman|          Finisher|            Points|            Weight|
+----+-----------------+------------------+------------------+------------------+
|   1|    Iqbal Abdulla|0.9230769230769231|0.9958333333333333|1.2447916666666667|
|   2|         A Kumble|0.8666666666666667|0.9916666666666667|1.2395833333333335|
|   3|   Sandeep Sharma|0.7857142857142857|            0.9875|          1.234375|
|   4|      S Sreesanth|              0.75|0.9833333333333333|1.2291666666666665|
|   5|        S Aravind|               0.7|0.9791666666666666|1.2239583333333333|
|   5|        JJ Bumrah|               0.7|0.9791666666666666|1.2239583333333333|
|   5|         VR Aaron|               0.7|0.9791666666666666|1.2239583333333333|
|   8|        YS Chahal|0.6666666666666666|0.9666666666666667|1.2083333333333333|
|   8|         I Sharma|0.6666666666666666|0.9666666666666667|1.2083333333333333|
|  10|     Bipul

### Fast Scoring Ability

In [27]:
# Fast Scoring Ability = Total Runs/Balls Played by Batsman
fsa = spark.sql('select batsman as Batsman, Total_Runs/balls_played as Fast_Scoring_Ability \
                                      from (select batsman,sum(batsman_runs) as Total_Runs, count(*) as balls_played \
                                      from analysis_db group by batsman)')
fsa.registerTempTable('fast_scoring_ability_table')
fsa.show(10)

+--------------+--------------------+
|       Batsman|Fast_Scoring_Ability|
+--------------+--------------------+
| Kuldeep Yadav|  0.8363636363636363|
|    S Anirudha|  1.1239669421487604|
|    TM Dilshan|  1.1012416427889207|
|    KA Pollard|  1.4175152749490836|
|M Muralitharan|  0.6666666666666666|
| LA Carseldine|  1.1408450704225352|
|       J Botha|  1.1236263736263736|
|     CA Ingram|  0.8076923076923077|
|      DR Smith|  1.3227953410981697|
|Jaskaran Singh|  0.7272727272727273|
+--------------+--------------------+
only showing top 10 rows



In [32]:
fsa = spark.sql('select rank() over (order by fast_scoring_ability desc) as Rank, t1.* \
                      from fast_scoring_ability_table t1 inner join no_of_matches_table t2 \
                      on t1.batsman = t2.batsman where no_of_matches>9')
fsa.registerTempTable('fast_scoring_ability_rank')
fsa.show(10)
count = spark.sql('select count(*) from fast_scoring_ability_rank')

+----+-------------+--------------------+
|Rank|      Batsman|Fast_Scoring_Ability|
+----+-------------+--------------------+
|   1|   AD Russell|  1.7995018679950188|
|   2|    K Gowtham|  1.7209302325581395|
|   3|        M Ali|  1.6994535519125684|
|   4|    SP Narine|  1.6694386694386694|
|   5|    KK Cooper|  1.6571428571428573|
|   6|  BCJ Cutting|   1.643835616438356|
|   7|  Rashid Khan|   1.626865671641791|
|   8|      RR Pant|  1.6231884057971016|
|   9|   J Bairstow|  1.5972696245733788|
|  10|CR Brathwaite|  1.5666666666666667|
+----+-------------+--------------------+
only showing top 10 rows



In [33]:
fsa = spark.sql('select t1.*, (240-rank)/240 as Points \
                from fast_scoring_ability_rank t1')
fsa.registerTempTable('fast_scoring_ability_points')
fsa.show(10)

+----+--------------+--------------------+------------------+
|Rank|       Batsman|Fast_Scoring_Ability|            Points|
+----+--------------+--------------------+------------------+
|   1|    AD Russell|  1.7995018679950188|0.9958333333333333|
|   2|     K Gowtham|  1.7209302325581395|0.9916666666666667|
|   3|         M Ali|  1.6994535519125684|            0.9875|
|   4|     SP Narine|  1.6694386694386694|0.9833333333333333|
|   5|     KK Cooper|  1.6571428571428573|0.9791666666666666|
|   6|   BCJ Cutting|   1.643835616438356|             0.975|
|   7|   Rashid Khan|   1.626865671641791|0.9708333333333333|
|   8|       RR Pant|  1.6231884057971016|0.9666666666666667|
|   9|    J Bairstow|  1.5972696245733788|            0.9625|
|  10| CR Brathwaite|  1.5666666666666667|0.9583333333333334|
|  11|    GJ Maxwell|  1.5554323725055432|0.9541666666666667|
|  12| Mohammad Nabi|  1.5368421052631578|              0.95|
|  13|     CH Morris|  1.5339233038348083|0.9458333333333333|
|  14|  

In [35]:
fsa = spark.sql('select t1.*, Points*1.25 as Weight from fast_scoring_ability_points t1')
fsa.registerTempTable('fast_scoring_ability_weights')
fsa.show(10)

+----+--------------+--------------------+------------------+------------------+
|Rank|       Batsman|Fast_Scoring_Ability|            Points|            Weight|
+----+--------------+--------------------+------------------+------------------+
|   1|    AD Russell|  1.7995018679950188|0.9958333333333333|1.2447916666666667|
|   2|     K Gowtham|  1.7209302325581395|0.9916666666666667|1.2395833333333335|
|   3|         M Ali|  1.6994535519125684|            0.9875|          1.234375|
|   4|     SP Narine|  1.6694386694386694|0.9833333333333333|1.2291666666666665|
|   5|     KK Cooper|  1.6571428571428573|0.9791666666666666|1.2239583333333333|
|   6|   BCJ Cutting|   1.643835616438356|             0.975|           1.21875|
|   7|   Rashid Khan|   1.626865671641791|0.9708333333333333|1.2135416666666667|
|   8|       RR Pant|  1.6231884057971016|0.9666666666666667|1.2083333333333333|
|   9|    J Bairstow|  1.5972696245733788|            0.9625|          1.203125|
|  10| CR Brathwaite|  1.566

### Consistency

In [None]:
# Consistency = Total Runs/Number of Times Out


###  Running Between Wickets

In [None]:
# Running Between Wickets = (Total Runs – (4*Fours + 6*Sixes))/(Total Balls Played – Boundary Balls)


## Bowling Metrics 

### Economy 

In [None]:
# Economy = Runs Scored / (Number of balls bowled by bowler/6)


### Wicket Taking Ability

In [None]:
# Wicket Taking Ability = Number of balls bowled / Wickets Taken


### Consistency 

In [None]:
# Consistency = Runs Conceded / Wickets Taken


### Crucial Wicket Taking Ability

In [None]:
# Crucial Wicket Taking Ability = Number of times Four or Five Wickets Taken / Number of Innings Played


### Short Performance Index

In [None]:
# Short Performance Index = (Wickets Taken – 4* Number of Times Four Wickets Taken – 5* Number of Times Five Wickets Taken) / (Innings Played – Number of Times Four Wickets or Five Wickets Taken)
