In [381]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("ggplot")

In [382]:
class Database:
    """Base database class providing common database operations."""

    def __init__(self) -> None:
        """Initialize an empty DataFrame."""
        self._df = (
            pd.DataFrame()
        )  # Changed from __df to _df (protected instead of private)

    def get_number_of_records(self) -> int:
        """Return the number of records in the database.
        Returns:
            int: Number of records
        """
        return self._df.shape[0]

    @property
    def shape(self) -> tuple[int, int]:
        """Return the shape of the database (rows, columns).
        Returns:
            tuple: (number of rows, number of columns)
        """
        return self._df.shape

    @property
    def info(self):
        return self._df.info()

    def get_dataframe(self):
        return self._df

In [None]:
class DatabaseIPL(Database):
    """Database class specialized for IPL cricket data."""

    def __init__(self, csv_files: list[str]) -> None:  # Changed list[int] to list[str]
        """Initialize the database by loading IPL data from CSV files.
        Args:
            csv_files: List of paths to CSV files containing IPL data
        """
        super().__init__()  # Initialize parent class
        for file in csv_files:
            if not os.path.exists(file):
                raise FileNotFoundError(f"CSV file not found: {file}")
            df = pd.read_csv(file)
            self._df = pd.concat([self._df, df], ignore_index=True)

        self._create_city_col()


    def get_all_players(self) -> list[str]:
        players = (
            list(self._df.striker.unique())
            + list(self._df.bowler.unique())
            + list(self._df.fielder.unique())
        )
        return players

    def get_player_count(self) -> int:
        return len(self.get_all_players())

    def get_players_per_season(self):
        df = self._df.groupby(by="season", as_index=False)


In [384]:
file_paths = [
        f"../data/cricketData/ipl_{year}_deliveries.csv" for year in range(2022, 2025)
    ]

try:
    db = DatabaseIPL(file_paths)
    print(f"Number of records: {db.get_number_of_records():,d}")
    print(f"Database shape: {db.shape}")
    print(f"\Database Info: {db.info}")
    print(f"Database All Players: {db.get_all_players()}")
    print(f"Number Of total Players: {db.get_player_count()}")
except FileNotFoundError as e:
    print(f"Error loading data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Number of records: 52,351
Database shape: (52351, 21)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52351 entries, 0 to 52350
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   match_id          52351 non-null  int64  
 1   season            52351 non-null  int64  
 2   match_no          52351 non-null  int64  
 3   date              52351 non-null  object 
 4   venue             52351 non-null  object 
 5   batting_team      52351 non-null  object 
 6   bowling_team      52351 non-null  object 
 7   innings           52351 non-null  int64  
 8   over              52351 non-null  float64
 9   striker           52351 non-null  object 
 10  bowler            52351 non-null  object 
 11  runs_of_bat       52351 non-null  int64  
 12  extras            52351 non-null  int64  
 13  wide              52351 non-null  int64  
 14  legbyes           52351 non-null  int64  
 15  byes              52351 non-null 

In [385]:
df = db.get_dataframe()

In [386]:
df

Unnamed: 0,match_id,season,match_no,date,venue,batting_team,bowling_team,innings,over,striker,...,runs_of_bat,extras,wide,legbyes,byes,noballs,wicket_type,player_dismissed,fielder,city
0,202201,2022,1,"Mar 26, 2022","Wankhede Stadium, Mumbai",CSK,KKR,1,0.1,Gaikwad,...,0,1,0,0,0,1,,,,Mumbai
1,202201,2022,1,"Mar 26, 2022","Wankhede Stadium, Mumbai",CSK,KKR,1,0.1,Gaikwad,...,0,0,0,0,0,0,,,,Mumbai
2,202201,2022,1,"Mar 26, 2022","Wankhede Stadium, Mumbai",CSK,KKR,1,0.2,Gaikwad,...,0,1,1,0,0,0,,,,Mumbai
3,202201,2022,1,"Mar 26, 2022","Wankhede Stadium, Mumbai",CSK,KKR,1,0.2,Gaikwad,...,0,0,0,0,0,0,,,,Mumbai
4,202201,2022,1,"Mar 26, 2022","Wankhede Stadium, Mumbai",CSK,KKR,1,0.3,Gaikwad,...,0,0,0,0,0,0,caught,Gaikwad,Nitish Rana,Mumbai
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52346,202474,2024,74,"May 26, 2024","MA Chidambaram Stadium, Chennai",KKR,SRH,2,9.5,Shreyas Iyer,...,1,0,0,0,0,0,,,,Chennai
52347,202474,2024,74,"May 26, 2024","MA Chidambaram Stadium, Chennai",KKR,SRH,2,9.6,Venkatesh Iyer,...,1,0,0,0,0,0,,,,Chennai
52348,202474,2024,74,"May 26, 2024","MA Chidambaram Stadium, Chennai",KKR,SRH,2,10.1,Venkatesh Iyer,...,1,0,0,0,0,0,,,,Chennai
52349,202474,2024,74,"May 26, 2024","MA Chidambaram Stadium, Chennai",KKR,SRH,2,10.2,Shreyas Iyer,...,1,0,0,0,0,0,,,,Chennai


In [387]:
df.season.unique()

array([2022, 2023, 2024])

In [388]:
len(df.striker.unique())

277

In [389]:
len(df.bowler.unique())

215

In [390]:
len(df.fielder.unique())

381

In [391]:
players = set(list(df.striker.unique()) + list(df.bowler.unique()))

len(players)

311

In [392]:
players

{'Aaron',
 'Abbott',
 'Abdul Basith',
 'Abdul Samad',
 'Abhijeet Tomar',
 'Abhinav Manohar',
 'Abhishek Sharma',
 'Abishek Porel',
 'Adil Rashid',
 'Akash Deep',
 'Akash Madhwal',
 'Akash Singh',
 'Akeal Hosein',
 'Alzarri Joseph',
 'Aman Hakim Khan',
 'Angkrish Raghuvanshi',
 'Anmolpreet Singh',
 'Anshul Kamboj',
 'Anuj Rawat',
 'Anukul Roy',
 'Arjun Tendulkar',
 'Arshad Khan',
 'Arshdeep Singh',
 'Arshin Kulkarni',
 'Ashutosh Sharma',
 'Ashwin',
 'Atharva Taide',
 'Avesh Khan',
 'Axar',
 'Ayush Badoni',
 'Azmatullah',
 'Bairstow',
 'Basil Thampi',
 'Behrendorff',
 'Bhui',
 'Bhuvneshwar',
 'Billings',
 'Boult',
 'Brevis',
 'Bumrah',
 'Buttler',
 'Chahal',
 'Chahar',
 'Chakaravarthy',
 'Chameera',
 'Chawla',
 'Chetan Sakariya',
 'Conway',
 'Coulter-Nile',
 'Cummins',
 'Daniel Sams',
 'Darshan Nalkande',
 'Daryl Mitchell',
 'Dhawan',
 'Dhoni',
 'Dhruv Jurel',
 'Dwayne Bravo',
 'Fabian Allen',
 'Fazalhaq Farooqi',
 'Ferguson',
 'Ferreira',
 'Finch',
 'Fraser-McGurk',
 'Gaikwad',
 'Gerald

In [393]:
df.groupby(by="venue")["match_id"].nunique().sort_values(ascending=False)


venue
Wankhede Stadium, Mumbai                                                          35
Dr DY Patil Sports Academy, Mumbai                                                20
Narendra Modi Stadium, Ahmedabad                                                  19
MA Chidambaram Stadium, Chennai                                                   17
Eden Gardens, Kolkata                                                             16
Brabourne Stadium, Mumbai                                                         16
M.Chinnaswamy Stadium, Bengaluru                                                  14
Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow             14
Maharashtra Cricket Association Stadium, Pune                                     13
Rajiv Gandhi International Stadium, Hyderabad                                     13
Arun Jaitley Stadium, Delhi                                                       12
Sawai Mansingh Stadium, Jaipur                             

In [394]:
df_group = df.groupby(by=["season","venue"],as_index=False)["match_id"].nunique()
df_group.sort_values("match_id", ascending=False, inplace=True)
df_group

Unnamed: 0,season,venue,match_id
5,2022,"Wankhede Stadium, Mumbai",21
1,2022,"Dr DY Patil Sports Academy, Mumbai",20
0,2022,"Brabourne Stadium, Mumbai",16
3,2022,"Maharashtra Cricket Association Stadium, Pune",13
25,2024,"MA Chidambaram Stadium, Chennai",9
13,2023,"Narendra Modi Stadium, Ahmedabad",9
12,2023,"MA Chidambaram Stadium, Chennai",8
27,2024,"Narendra Modi Stadium, Ahmedabad",8
24,2024,"M.Chinnaswamy Stadium, Bengaluru",7
22,2024,"Eden Gardens, Kolkata",7


In [395]:
title = f"Matches Per Venue From {df_group.season.min()} to {df_group.season.max()}"
title

'Matches Per Venue From 2022 to 2024'

In [396]:
df_group[df_group.venue == "Wankhede Stadium, Mumbai"]

Unnamed: 0,season,venue,match_id
5,2022,"Wankhede Stadium, Mumbai",21
17,2023,"Wankhede Stadium, Mumbai",7
30,2024,"Wankhede Stadium, Mumbai",7
