<a href="https://colab.research.google.com/github/Venkat3103/2022Fall_projects/blob/main/Copy_of_Hypothesis_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hypothesis 3: Consistency in Team selection improves chances of winning the match


Please refer to .py file of this hypothesis for code quality review.

Author : Burzin Wadia
---
Net ID : bwadia2

Import Libraries

In [None]:
from typing import Tuple
import pandas as pd
import plotly.graph_objects as go
from pandas import DataFrame
from plotly.subplots import make_subplots

Importing csv files and defining parameters

In [None]:
# Dataframe 1 : Information of matches, teams and players who played for their team  each of the match.
team_df = pd.read_csv("https://raw.githubusercontent.com/Venkat3103/2022Fall_projects/main/teamsheet.csv")


# Dataframe 2 : Information of matches, toss decision and match winner for specified season
toss_df = pd.read_csv("https://raw.githubusercontent.com/Venkat3103/2022Fall_projects/main/ipl_match_info.csv", usecols = ['Team1','Team2', 'date', 'venue', 'toss_winner', 'toss_decision','match_winner','year'])

# Which IPL season do you want to check the statistics for ?
season_to_check = 2020

# Define the % of matches a player played for his team for a season to count him towards team consistency?
consistency_value = 40

In [None]:
def SeasonData(df: DataFrame, season: int) -> DataFrame:
    """
    This function takes the main teamsheet.csv file to filter it according to the season year and gives it as output
    dataframe.

    df: The Team DataFrame which provides information about the players played for a team for each season
    :season: Year value that is provided by the user. Data will be filtered according to the year mentioned.
    return: It returns a dataframe with all data with respect to the year mentioned in the function


    >>> d1= SeasonData(pd.DataFrame({"Unnamed: 0": [0,1,2], "Team":["Chennai Super Kings", "Sunrisers Hyderabad", "Rajasthan Royals" ] , "date": ["2018-04-07", "2019-05-17", "2018-04-01" ], "player1": ["A1","B1","C1"] ,"player2": ["A2","B2","C2"] , "player3": ["A3","B3","C3"] , "player4": ["A4","B4","C4"] , "player5": ["A5","B5","C5"] ,"player6": ["A6","B6","C6"],"player7": ["A7","B7","C7"] ,"player8": ["A8","B8","C8"] ,"player9": ["A9","B9","C9"] ,"player10": ["A10","B10","C10"] ,"player11": ["A11","B11","C11"],"year": [2018,2019,2018]}),2018)
    >>> d1["Team"].tolist()
    ['Chennai Super Kings', 'Rajasthan Royals']

    >>> d1["year"].tolist()
    [2018, 2018]

    >>> d2= SeasonData(pd.DataFrame({"Unnamed: 0": [0,1,2], "Team":["Chennai Super Kings", "Sunrisers Hyderabad", "Rajasthan Royals" ] , "date": ["2018-04-07", "2019-05-17", "2018-04-01" ], "player1": ["A1","B1","C1"] ,"player2": ["A2","B2","C2"] , "player3": ["A3","B3","C3"] , "player4": ["A4","B4","C4"] , "player5": ["A5","B5","C5"] ,"player6": ["A6","B6","C6"],"player7": ["A7","B7","C7"] ,"player8": ["A8","B8","C8"] ,"player9": ["A9","B9","C9"] ,"player10": ["A10","B10","C10"] ,"player11": ["A11","B11","C11"],"year": [2018,2019,2018]}),2019)
    >>> d2["Team"].tolist()
    ['Sunrisers Hyderabad']

    >>> d2[["player1","player2","player3"]]
      player1 player2 player3
    1      B1      B2	   B3

    >>> d2= SeasonData(pd.DataFrame({"Unnamed: 0": [0,1,2], "Team":["Chennai Super Kings", "Sunrisers Hyderabad", "Rajasthan Royals" ] , "date": ["2018-04-07", "2019-05-17", "2018-04-01" ], "player1": ["A1","B1","C1"] ,"player2": ["A2","B2","C2"] , "player3": ["A3","B3","C3"] , "player4": ["A4","B4","C4"] , "player5": ["A5","B5","C5"] ,"player6": ["A6","B6","C6"],"player7": ["A7","B7","C7"] ,"player8": ["A8","B8","C8"] ,"player9": ["A9","B9","C9"] ,"player10": ["A10","B10","C10"] ,"player11": ["A11","B11","C11"],"year": [2018,2019,2018]}),2020)
    >>> d2["Team"].tolist()
    []
    """
    # User input at the very beginning of the file should be the season year

    # preprocessing of the dataframe
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = pd.DatetimeIndex(df['date']).year

    if "Unnamed: 0" in df.columns:
        df.drop(columns="Unnamed: 0", inplace=True)

    team_season = pd.DataFrame(df[df["year"] == season].sort_values(by=['Team', 'date']))

    team_season = team_season.astype({"Team": "str", "player1": "str", "player2": "str", "player3": "str",
                                      "player4": "str", "player5": "str", "player6": "str", "player7": "str",
                                      "player8": "str", "player9": "str", "player10": "str", "player11": "str",
                                      "year": "int"})

    # Returns Dataframe for a season with variable name like "season_2018" if season = 2018

    return team_season

In [None]:
team_season_data = SeasonData(team_df, season_to_check)
team_season_data

Unnamed: 0,Team,date,player1,player2,player3,player4,player5,player6,player7,player8,player9,player10,player11,year
441,Chennai Super Kings,2020-09-19,M Vijay,SR Watson,F du Plessis,AT Rayudu,RA Jadeja,SM Curran,MS Dhoni,KM Jadhav,PP Chawla,DL Chahar,L Ngidi,2020
285,Chennai Super Kings,2020-09-22,M Vijay,SR Watson,F du Plessis,SM Curran,RD Gaikwad,KM Jadhav,MS Dhoni,RA Jadeja,PP Chawla,DL Chahar,L Ngidi,2020
561,Chennai Super Kings,2020-09-25,M Vijay,SR Watson,F du Plessis,RD Gaikwad,KM Jadhav,MS Dhoni,RA Jadeja,SM Curran,PP Chawla,DL Chahar,JR Hazlewood,2020
1267,Chennai Super Kings,2020-10-02,F du Plessis,SR Watson,AT Rayudu,KM Jadhav,MS Dhoni,RA Jadeja,SM Curran,DJ Bravo,SN Thakur,PP Chawla,DL Chahar,2020
1705,Chennai Super Kings,2020-10-04,SR Watson,F du Plessis,AT Rayudu,KM Jadhav,MS Dhoni,RA Jadeja,SM Curran,DJ Bravo,PP Chawla,SN Thakur,DL Chahar,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580,Sunrisers Hyderabad,2020-10-27,DA Warner,WP Saha,MK Pandey,KS Williamson,V Shankar,Abdul Samad,JO Holder,Rashid Khan,Sandeep Sharma,T Natarajan,S Nadeem,2020
1895,Sunrisers Hyderabad,2020-10-31,DA Warner,WP Saha,MK Pandey,KS Williamson,Abhishek Sharma,JO Holder,Abdul Samad,Rashid Khan,S Nadeem,Sandeep Sharma,T Natarajan,2020
513,Sunrisers Hyderabad,2020-11-03,DA Warner,WP Saha,MK Pandey,KS Williamson,PK Garg,JO Holder,Abdul Samad,Rashid Khan,S Nadeem,Sandeep Sharma,T Natarajan,2020
807,Sunrisers Hyderabad,2020-11-06,DA Warner,SP Goswami,MK Pandey,KS Williamson,PK Garg,JO Holder,Abdul Samad,Rashid Khan,S Nadeem,Sandeep Sharma,T Natarajan,2020


In [None]:
def PlayerCount(season_df: DataFrame) -> DataFrame:
    """
    This function takes seasonal team dataframe which consists of all the information of players for a specific
    season year. The function then aggregates and counts the number of time each player appeared for their team
    for that specific season. It then returns a dataframe of this information.

    param season_df: The input would be the name of the dataframe for a specific season.
    :return: will return a dataframe with all the player count for each team for a specific season year.

    >>> f1 = PlayerCount(pd.DataFrame({ "Team":["Chennai Super Kings", "Sunrisers Hyderabad", "Rajasthan Royals", "Chennai Super Kings", "Chennai Super Kings", "Sunrisers Hyderabad" ] , "date": ["2018-04-07", "2019-05-17", "2018-04-01", '2018-04-03', "2018-07-01", "2018-09-12"  ], "player1": ["A1","B1","C1","A1","A1","B1"] ,"player2": ["A2","B2","C2","A2","A3","B1"] , "player3": ["A3","B3","C3","A1","A3","B2"] , "player4": ["A4","B4","C4","A4","A4","B4"] , "player5": ["A5","B5","C5","A5","A5","B4"] ,"player6": ["A6","B6","C6","A6","A7","B6"],"player7": ["A7","B7","C7","A7","A7","B7"] ,"player8": ["A8","B8","C8","A8","A8","B8"] ,"player9": ["A9","B9","C9","A9","A7","B7"] ,"player10": ["A10","B10","C10","A10","A10","B10"] ,"player11": ["A11","B11","C11","A7","A8","B7"],"year": [2018,2019,2018,2018,2018,2018]}))
    >>> f1[f1["count"] >4]
                      Team player  count
    8  Chennai Super Kings     A7      6

    >>> f1["count"].sum()
    66

    >>> f1["Team"].unique().tolist()
    ['Chennai Super Kings', 'Rajasthan Royals', 'Sunrisers Hyderabad']

    >>> f1.dtypes
    Team      object
    player    object
    count      int64
    dtype: object

    """

    p1 = season_df.groupby(['Team', 'player1'])['player1'].count().to_frame().rename(columns={"player1": "count1"})
    p2 = season_df.groupby(['Team', 'player2'])['player2'].count().to_frame().rename(columns={"player2": "count2"})
    p3 = season_df.groupby(['Team', 'player3'])['player3'].count().to_frame().rename(columns={"player3": "count3"})
    p4 = season_df.groupby(['Team', 'player4'])['player4'].count().to_frame().rename(columns={"player4": "count4"})
    p5 = season_df.groupby(['Team', 'player5'])['player5'].count().to_frame().rename(columns={"player5": "count5"})
    p6 = season_df.groupby(['Team', 'player6'])['player6'].count().to_frame().rename(columns={"player6": "count6"})
    p7 = season_df.groupby(['Team', 'player7'])['player7'].count().to_frame().rename(columns={"player7": "count7"})
    p8 = season_df.groupby(['Team', 'player8'])['player8'].count().to_frame().rename(columns={"player8": "count8"})
    p9 = season_df.groupby(['Team', 'player9'])['player9'].count().to_frame().rename(columns={"player9": "count9"})
    p10 = season_df.groupby(['Team', 'player10'])['player10'].count().to_frame().rename(columns={"player10": "count10"})
    p11 = season_df.groupby(['Team', 'player11'])['player11'].count().to_frame().rename(columns={"player11": "count11"})

    p1.reset_index(inplace=True)
    p2.reset_index(inplace=True)
    p3.reset_index(inplace=True)
    p4.reset_index(inplace=True)
    p5.reset_index(inplace=True)
    p6.reset_index(inplace=True)
    p7.reset_index(inplace=True)
    p8.reset_index(inplace=True)
    p9.reset_index(inplace=True)
    p10.reset_index(inplace=True)
    p11.reset_index(inplace=True)

    p1.rename(columns={"player1": "player", "count1": "count"}, inplace=True)
    p2.rename(columns={"player2": "player", "count2": "count"}, inplace=True)
    p3.rename(columns={"player3": "player", "count3": "count"}, inplace=True)
    p4.rename(columns={"player4": "player", "count4": "count"}, inplace=True)
    p5.rename(columns={"player5": "player", "count5": "count"}, inplace=True)
    p6.rename(columns={"player6": "player", "count6": "count"}, inplace=True)
    p7.rename(columns={"player7": "player", "count7": "count"}, inplace=True)
    p8.rename(columns={"player8": "player", "count8": "count"}, inplace=True)
    p9.rename(columns={"player9": "player", "count9": "count"}, inplace=True)
    p10.rename(columns={"player10": "player", "count10": "count"}, inplace=True)
    p11.rename(columns={"player11": "player", "count11": "count"}, inplace=True)

    count_df = (pd.concat([p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11]))
    count_df = count_df.groupby(["Team", "player"])[["player", "count"]].sum()
    count_df.reset_index(inplace=True)

    return count_df

In [None]:
player_count_df = PlayerCount(team_season_data)
player_count_df

Unnamed: 0,Team,player,count
0,Chennai Super Kings,AT Rayudu,12
1,Chennai Super Kings,DJ Bravo,6
2,Chennai Super Kings,DL Chahar,14
3,Chennai Super Kings,F du Plessis,13
4,Chennai Super Kings,Imran Tahir,3
...,...,...,...
146,Sunrisers Hyderabad,SP Goswami,2
147,Sunrisers Hyderabad,Sandeep Sharma,13
148,Sunrisers Hyderabad,T Natarajan,16
149,Sunrisers Hyderabad,V Shankar,7


In [None]:
def PlayerConsistency(df_count: DataFrame, team_name: str) -> DataFrame:
    """
    This Function takes the output DataFrame from function PlayerCount() as an input and calculate the consistency aka
    % of matches played by a player for his team throughout the season. The output is a bar chart of each team showing
    the consistency of each player selected for all the matches that team played.

    param df_count: DataFrame which will provide the team name , each of its player name and how many matches the
            player played imm total for a whole season.

    param team_name: The name of the Team for which the Player Consistency needs to be checked for.

    :return: Bar Chart of individual player consistency of each team and its respective dataframe

    >>> df = pd.DataFrame({"Team":["Chennai Super Kings", "Sunrisers Hyderabad", "Rajasthan Royals", "Chennai Super Kings", "Chennai Super Kings", "Sunrisers Hyderabad" ], "player":["A1","B1","C1","A2","A3","B2"], "count":[1,2,1,3,2,1]})
    >>> PlayerConsistency(df, "Chennai Super Kings")
    (None,                   Team player  count  percentage
    3  Chennai Super Kings     A2      3      100.00
    4  Chennai Super Kings     A3      2       66.67
    0  Chennai Super Kings     A1      1       33.33)


    >>> x, y = PlayerConsistency(df, "Chennai Super Kings")
    >>> len(y)
    3

    >>> type(y)
    <class 'pandas.core.frame.DataFrame'>

    """

    plot_df = df_count[df_count["Team"] == team_name].copy()

    plot_df["percentage"] = round((plot_df.loc[:, "count"] / len(plot_df.loc[:, "count"])) * 100, 2)

    plot_df.sort_values(by=['percentage'], ascending=False, inplace=True)

    x = plot_df["player"]
    y = plot_df["percentage"]

    fig = go.Figure(data=[go.Bar(
        x=x, y=y,
        text=y,
        textposition='auto',
        marker_color='rgb(55, 83, 109)'
    )])

    fig.update_layout(
        title_text=f'{team_name} : Players Selection Consistency % for each match',
        uniformtext=dict(mode="hide", minsize=10),
        xaxis_title="Team Player Name",
        yaxis_title="% of Matches Played")

    return fig.show(), plot_df

In [None]:
plot_list = player_count_df["Team"].unique().tolist()

for i in plot_list:
  Pconsistency_plot, Pconsistency_df = PlayerConsistency(player_count_df, i)
  print(Pconsistency_df)
  Pconsistency_plot

                   Team        player  count  percentage
11  Chennai Super Kings      MS Dhoni     14        70.0
2   Chennai Super Kings     DL Chahar     14        70.0
17  Chennai Super Kings     SM Curran     14        70.0
15  Chennai Super Kings     RA Jadeja     14        70.0
3   Chennai Super Kings  F du Plessis     13        65.0
0   Chennai Super Kings     AT Rayudu     12        60.0
19  Chennai Super Kings     SR Watson     11        55.0
18  Chennai Super Kings     SN Thakur      9        45.0
6   Chennai Super Kings     KM Jadhav      8        40.0
14  Chennai Super Kings     PP Chawla      7        35.0
1   Chennai Super Kings      DJ Bravo      6        30.0
16  Chennai Super Kings    RD Gaikwad      6        30.0
7   Chennai Super Kings     KV Sharma      5        25.0
13  Chennai Super Kings  N Jagadeesan      5        25.0
8   Chennai Super Kings       L Ngidi      4        20.0
9   Chennai Super Kings       M Vijay      3        15.0
5   Chennai Super Kings  JR Haz

              Team        player  count  percentage
38  Delhi Capitals       SS Iyer     17        85.0
36  Delhi Capitals      S Dhawan     17        85.0
29  Delhi Capitals      K Rabada     17        85.0
31  Delhi Capitals    MP Stoinis     17        85.0
21  Delhi Capitals      A Nortje     16        80.0
23  Delhi Capitals      AR Patel     15        75.0
34  Delhi Capitals      R Ashwin     15        75.0
35  Delhi Capitals       RR Pant     14        70.0
33  Delhi Capitals       PP Shaw     13        65.0
37  Delhi Capitals    SO Hetmyer     12        60.0
22  Delhi Capitals     AM Rahane      9        45.0
39  Delhi Capitals  TU Deshpande      5        25.0
27  Delhi Capitals      HV Patel      5        25.0
20  Delhi Capitals      A Mishra      3        15.0
32  Delhi Capitals       P Dubey      3        15.0
26  Delhi Capitals       DR Sams      3        15.0
24  Delhi Capitals      AT Carey      3        15.0
28  Delhi Capitals      I Sharma      1         5.0
25  Delhi Ca

               Team            player  count  percentage
49  Kings XI Punjab          KL Rahul     14        70.0
57  Kings XI Punjab      Ravi Bishnoi     14        70.0
55  Kings XI Punjab          N Pooran     14        70.0
53  Kings XI Punjab    Mohammed Shami     14        70.0
44  Kings XI Punjab        GJ Maxwell     13        65.0
51  Kings XI Punjab        MA Agarwal     11        55.0
50  Kings XI Punjab          M Ashwin      9        45.0
42  Kings XI Punjab         CJ Jordan      9        45.0
40  Kings XI Punjab    Arshdeep Singh      8        40.0
41  Kings XI Punjab          CH Gayle      7        35.0
52  Kings XI Punjab     Mandeep Singh      7        35.0
43  Kings XI Punjab          DJ Hooda      7        35.0
59  Kings XI Punjab       SS Cottrell      6        30.0
46  Kings XI Punjab       JDS Neesham      5        25.0
58  Kings XI Punjab           SN Khan      5        25.0
48  Kings XI Punjab           KK Nair      4        20.0
47  Kings XI Punjab         K G

                     Team             player  count  percentage
69  Kolkata Knight Riders             N Rana     14       73.68
71  Kolkata Knight Riders         PJ Cummins     14       73.68
63  Kolkata Knight Riders         EJG Morgan     14       73.68
64  Kolkata Knight Riders         KD Karthik     14       73.68
77  Kolkata Knight Riders       Shubman Gill     14       73.68
62  Kolkata Knight Riders           CV Varun     13       68.42
72  Kolkata Knight Riders        RA Tripathi     11       57.89
75  Kolkata Knight Riders          SP Narine     10       52.63
60  Kolkata Knight Riders         AD Russell     10       52.63
65  Kolkata Knight Riders       KL Nagarkoti     10       52.63
76  Kolkata Knight Riders        Shivam Mavi      8       42.11
68  Kolkata Knight Riders  M Prasidh Krishna      6       31.58
67  Kolkata Knight Riders        LH Ferguson      5       26.32
66  Kolkata Knight Riders      Kuldeep Yadav      5       26.32
78  Kolkata Knight Riders           T Ba

              Team           player  count  percentage
85  Mumbai Indians       KA Pollard     16      106.67
86  Mumbai Indians        KH Pandya     16      106.67
88  Mumbai Indians        Q de Kock     16      106.67
91  Mumbai Indians         SA Yadav     16      106.67
83  Mumbai Indians        JJ Bumrah     15      100.00
89  Mumbai Indians        RD Chahar     15      100.00
93  Mumbai Indians         TA Boult     15      100.00
80  Mumbai Indians        HH Pandya     14       93.33
81  Mumbai Indians     Ishan Kishan     14       93.33
90  Mumbai Indians        RG Sharma     12       80.00
84  Mumbai Indians     JL Pattinson     10       66.67
87  Mumbai Indians  NM Coulter-Nile      7       46.67
92  Mumbai Indians        SS Tiwary      7       46.67
82  Mumbai Indians          J Yadav      2       13.33
79  Mumbai Indians      DS Kulkarni      1        6.67


                 Team        player  count  percentage
108  Rajasthan Royals     SV Samson     14       77.78
98   Rajasthan Royals     JC Archer     14       77.78
104  Rajasthan Royals     R Tewatia     14       77.78
106  Rajasthan Royals       S Gopal     14       77.78
107  Rajasthan Royals     SPD Smith     14       77.78
99   Rajasthan Royals    JC Buttler     13       72.22
103  Rajasthan Royals       R Parag     12       66.67
105  Rajasthan Royals    RV Uthappa     12       66.67
101  Rajasthan Royals  Kartik Tyagi     10       55.56
96   Rajasthan Royals     BA Stokes      8       44.44
100  Rajasthan Royals    JD Unadkat      7       38.89
95   Rajasthan Royals    AS Rajpoot      6       33.33
109  Rajasthan Royals     TK Curran      5       27.78
110  Rajasthan Royals      VR Aaron      3       16.67
111  Rajasthan Royals   YBK Jaiswal      3       16.67
102  Rajasthan Royals     MK Lomror      3       16.67
97   Rajasthan Royals     DA Miller      1        5.56
94   Rajas

                            Team             player  count  percentage
129  Royal Challengers Bangalore          YS Chahal     15       83.33
127  Royal Challengers Bangalore            V Kohli     15       83.33
116  Royal Challengers Bangalore         D Padikkal     15       83.33
128  Royal Challengers Bangalore  Washington Sundar     15       83.33
113  Royal Challengers Bangalore     AB de Villiers     15       83.33
123  Royal Challengers Bangalore           NA Saini     13       72.22
114  Royal Challengers Bangalore           AJ Finch     12       66.67
124  Royal Challengers Bangalore             S Dube     11       61.11
119  Royal Challengers Bangalore            I Udana     10       55.56
115  Royal Challengers Bangalore          CH Morris      9       50.00
122  Royal Challengers Bangalore     Mohammed Siraj      9       50.00
118  Royal Challengers Bangalore    Gurkeerat Singh      8       44.44
120  Royal Challengers Bangalore        JR Philippe      5       27.78
112  R

                    Team           player  count  percentage
148  Sunrisers Hyderabad      T Natarajan     16       76.19
134  Sunrisers Hyderabad        DA Warner     16       76.19
139  Sunrisers Hyderabad        MK Pandey     16       76.19
143  Sunrisers Hyderabad      Rashid Khan     16       76.19
142  Sunrisers Hyderabad          PK Garg     14       66.67
147  Sunrisers Hyderabad   Sandeep Sharma     13       61.90
130  Sunrisers Hyderabad      Abdul Samad     12       57.14
138  Sunrisers Hyderabad    KS Williamson     12       57.14
135  Sunrisers Hyderabad      JM Bairstow     11       52.38
131  Sunrisers Hyderabad  Abhishek Sharma      8       38.10
145  Sunrisers Hyderabad         S Nadeem      7       33.33
149  Sunrisers Hyderabad        V Shankar      7       33.33
136  Sunrisers Hyderabad        JO Holder      7       33.33
137  Sunrisers Hyderabad         KK Ahmed      7       33.33
150  Sunrisers Hyderabad          WP Saha      4       19.05
132  Sunrisers Hyderabad

In [None]:
def TeamConsistency(player_count_df: pd.DataFrame, consistency_threshold: int) -> DataFrame:
    """
    This Function takes the output DataFrame from function PlayerCount() as an input and integer from the user
    as defined by consistency_value variable in the if_main condition. This function then calculates Team Consistency
    i.e. % of players that players equal to or above the consistency_threshold defined by the user. The output is a
    Team's player selection Consistency plot and its DataFrame.

    param player_count_df: DataFrame which will provide the team name , each of its player name and how many matches the
                           player played in total for a whole season.
    param consistency_threshold: A integer describing a percentage value by the user.
    :return: Team's player selection Consistency plot and its DataFrame

    """

    cons_dict = dict()

    team_list = player_count_df["Team"].unique().tolist()

    for i in team_list:
        cons_df = player_count_df[player_count_df["Team"] == i].copy()

        cons_df["percentage"] = round((cons_df.loc[:, "count"] / len(cons_df.loc[:, "count"])) * 100, 2)

        cons_df.sort_values(by=['percentage'], ascending=False, inplace=True)

        count = cons_df[cons_df["percentage"] >= consistency_threshold].count().player

        consistency_per = round(((count / cons_df["player"].count()) * 100), 2)

        cons_dict[i] = consistency_per

    team_cons_df = pd.DataFrame(cons_dict.items(), columns=["Team", "Consistency of Players (%)"])

    x = team_cons_df["Team"]
    y = team_cons_df["Consistency of Players (%)"]

    fig = go.Figure(data=[go.Bar(
        x=x, y=y,
        text=y,
        textposition='auto',
        marker_color='rgb(55, 83, 109)',
    )])

    fig.update_layout(
        title_text=f"Team Selection Consistency % for overall season {season_to_check}",
        uniformtext=dict(mode="hide", minsize=10),
        xaxis_title = "Team Name",
        yaxis_title = f"Overall Team's Consistency (Players Consistent >={consistency_value}%)"

    )

    return fig.show(), team_cons_df

In [None]:
# A Bar Plot of % of Team Consistency, with individual player consistency >= consistency_value of all the playersfor each team
consistency_plot, team_consistency = TeamConsistency(player_count_df, consistency_value)

In [None]:
def TossSeasonData(toss_df: DataFrame, season: int) -> DataFrame:
    """
    This function takes the ipl_match_info.csv file to filter it according to the season year and gives it as output
    dataframe.

    param df: A DataFrame with information on Teams, Fixtures, Date, Toss Decision, Toss Winner and Match Winner.
    param season: Year value that is provided by the user. Data will be filtered according to the year mentioned.
    :return: It returns a dataframe with all data with respect to the year mentioned in the function


    >>> t_df = pd.DataFrame({"Team1":["Mumbai Indians","Kolkata Knight Riders", "Kings XI Punjab","Royal Challengers Bangalore", "Royal Challengers Bangalore", "Delhi Capitals"] , "Team2":["Chennai Super Kings", "Chennai Super Kings", "Chennai Super Kings", "Chennai Super Kings", "Chennai Super Kings", "Rajasthan Royals"], "date":["2018-04-07","2018-04-10","2018-04-15", "2018-04-25", "2018-05-05", "2019-05-18"], "venue":["Wankhede Stadium","MA Chidambaram Stadium", "Punjab Cricket Association IS Bindra Stadium","M.Chinnaswamy Stadium","Maharashtra Cricket Association Stadium","Arun Jaitley Stadium"], "toss_winner":["Chennai Super Kings", "Chennai Super Kings","Chennai Super Kings","Chennai Super Kings","Chennai Super Kings","Chennai Super Kings"], "toss_decision":["field","field","field","field","field","field"], "match_winner":["Chennai Super Kings","Chennai Super Kings","Kings XI Punjab","Chennai Super Kings","Chennai Super Kings","Delhi Capitals"], "year":[2018,2018,2018,2018,2018,2019]})
    >>> len(TossSeasonData(t_df, 2018))
    5

    >>> len(TossSeasonData(t_df, 2019))
    1

    >>> t = TossSeasonData(t_df, 2018)
    >>> type(t["year"][0])
    <class 'numpy.int64'>

    """
    # User input at the very beginning of the file should be the season year

    # preprocessing of the dataframe
    toss_df['date'] = pd.to_datetime(toss_df['date'])

    match_season = pd.DataFrame(toss_df[toss_df["year"] == season].sort_values(by=['match_winner', 'date']))

    # Returns Dataframe for a season with variable name like "season_2018" if season = 2018

    return match_season

In [None]:
# Variable to filter and store toss_df data for a specific season.
toss_season_data = TossSeasonData(toss_df, season_to_check)

toss_season_data

Unnamed: 0,Team1,Team2,date,venue,toss_winner,toss_decision,match_winner,year
117,Mumbai Indians,Chennai Super Kings,2020-09-19,Sheikh Zayed Stadium,Chennai Super Kings,field,Chennai Super Kings,2020
131,Kings XI Punjab,Chennai Super Kings,2020-10-04,Dubai International Cricket Stadium,Kings XI Punjab,bat,Chennai Super Kings,2020
143,Chennai Super Kings,Sunrisers Hyderabad,2020-10-13,Dubai International Cricket Stadium,Chennai Super Kings,bat,Chennai Super Kings,2020
156,Royal Challengers Bangalore,Chennai Super Kings,2020-10-25,Dubai International Cricket Stadium,Royal Challengers Bangalore,bat,Chennai Super Kings,2020
161,Kolkata Knight Riders,Chennai Super Kings,2020-10-29,Dubai International Cricket Stadium,Chennai Super Kings,field,Chennai Super Kings,2020
166,Kings XI Punjab,Chennai Super Kings,2020-11-01,Sheikh Zayed Stadium,Chennai Super Kings,field,Chennai Super Kings,2020
122,Delhi Capitals,Chennai Super Kings,2020-09-25,Dubai International Cricket Stadium,Chennai Super Kings,field,Delhi Capitals,2020
130,Delhi Capitals,Kolkata Knight Riders,2020-10-03,Sharjah Cricket Stadium,Kolkata Knight Riders,field,Delhi Capitals,2020
133,Delhi Capitals,Royal Challengers Bangalore,2020-10-05,Dubai International Cricket Stadium,Royal Challengers Bangalore,field,Delhi Capitals,2020
137,Delhi Capitals,Rajasthan Royals,2020-10-09,Sharjah Cricket Stadium,Rajasthan Royals,field,Delhi Capitals,2020


In [None]:
def MatchesWon(toss_season_df: DataFrame) -> DataFrame:
    """
    The Function simply groups by Team and Counts how many matches have each team won for a season.

    param toss_season_df: Takes the filtered dataframe from TossSeasonData(), data for the respective season.
    :return: DataFrame with Team Name and Number of Matches Won by each team for the specified Season.

    >>> t_df = pd.DataFrame({"Team1":["Mumbai Indians","Kolkata Knight Riders", "Kings XI Punjab","Royal Challengers Bangalore", "Royal Challengers Bangalore", "Delhi Capitals"] , "Team2":["Chennai Super Kings", "Chennai Super Kings", "Chennai Super Kings", "Chennai Super Kings", "Chennai Super Kings", "Rajasthan Royals"], "date":["2018-04-07","2018-04-10","2018-04-15", "2018-04-25", "2018-05-05", "2019-05-18"], "venue":["Wankhede Stadium","MA Chidambaram Stadium", "Punjab Cricket Association IS Bindra Stadium","M.Chinnaswamy Stadium","Maharashtra Cricket Association Stadium","Arun Jaitley Stadium"], "toss_winner":["Chennai Super Kings", "Chennai Super Kings","Chennai Super Kings","Chennai Super Kings","Chennai Super Kings","Chennai Super Kings"], "toss_decision":["field","field","field","field","field","field"], "match_winner":["Chennai Super Kings","Chennai Super Kings","Kings XI Punjab","Chennai Super Kings","Chennai Super Kings","Delhi Capitals"], "year":[2018,2018,2018,2018,2018,2018]})
    >>> MatchesWon(t_df)
              match_winner  Number of Matches Won
    0  Chennai Super Kings                      4
    1       Delhi Capitals                      1
    2      Kings XI Punjab                      1

    >>> MatchesWon(t_df)["Number of Matches Won"].sum()
    6

    """
    win_df = pd.DataFrame(toss_season_df.groupby(["match_winner"])["match_winner"].count())

    win_df.rename(columns={"match_winner": "Number of Matches Won"}, inplace=True)

    win_df.reset_index(inplace=True)

    return win_df

In [None]:
# This variable will store a dataframe with team name and its number of wins for that season as two columns.
win_info = MatchesWon(toss_season_data)

win_info

Unnamed: 0,match_winner,Number of Matches Won
0,Chennai Super Kings,6
1,Delhi Capitals,8
2,Kings XI Punjab,5
3,Kolkata Knight Riders,6
4,Mumbai Indians,11
5,Rajasthan Royals,6
6,Royal Challengers Bangalore,6
7,Sunrisers Hyderabad,8


In [None]:
def hypo_plot(MatchWin_df: DataFrame, TeamConsistency_df: DataFrame):
    """
    Creates a Line Plot of Overall Team Selection Consistency vs Number of Matches Won by the team for a season.
    Using this plot one can understand the explanation behind whether it statisfies the Hypothesis 3 or not.

    param MatchWin_df: output DataFrame of TeamConsistency()
    :param TeamConsistency_df: output DataFrame of TeamConsistency().
    :return: a Line Plot of Overall Team Selection Consistency vs Number of Matches Won by the team for a season.

    """
    plot_df = MatchWin_df.merge(TeamConsistency_df, left_on="match_winner", right_on="Team", how="inner")
    plot_df.sort_values(by="Consistency of Players (%)", inplace=True)
    # Create figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Add traces
    fig.add_trace(
        go.Scatter(x=plot_df["Team"], y=plot_df["Consistency of Players (%)"],
                   name="Overall Team Consistency"),
        secondary_y=False,
    )

    fig.add_trace(
        go.Scatter(x=plot_df["match_winner"], y=plot_df["Number of Matches Won"], name="Matches Won"),
        secondary_y=True,
    )

    # Add figure title
    fig.update_layout(
        title_text=f"Player's selection consistency VS Number of matches won by their team : Season {season_to_check}"
    )

    # Set x-axis title
    # fig.update_xaxes(title_text="Team")
    fig.update_xaxes(
        tickangle=45,
        title_text="Team",
        title_font={"size": 20},
        title_standoff=25)

    # Set y-axes titles
    fig.update_yaxes(title_text="Team Consistency (%)", secondary_y=False)
    fig.update_yaxes(title_text="Number of Matches Won", secondary_y=True)
    # fig.update_layout(xaxis={'categoryorder': 'total descending'})

    # return plot_df["Number of Matches Won"]
    return fig.show()

In [None]:
# Line plot of relation between Team consistency vs Number of Matches won by that team throughout the season.
hypo_plot(win_info, team_consistency)