In [45]:
from pathlib import Path
import polars as pl
from polars import col, lit, when
import polars.selectors as cs
import orjson
import yaml

from cricket.conf.conf import Conf
from cricket.functions.functions import Functions as F

In [2]:
filepath = str(Conf.catalog.processed.cricket_dataset)

In [29]:
F.sql(f"select * from read_parquet('{filepath}')").to_view('t1')

┌─────────────┬──────────┬────────────┬─────────────┬──────────────┬──────────────┬──────────┬─────────────────┬────────────────────┬────────────────────┬────────────┬──────────┬───────┬───────┬─────────┬─────────────┬────────────┬────────────┬───────┬───────┬─────────────┬───────────┬───────────┬────────────────┬───────────────┬───────────┬──────────────────┬────────────────┬────────────┬─────────┬───────────────────┬──────────────┬─────────┬────────────┬──────────────┬─────────────────┬──────────────┬──────────┬───────────────┬──────────────┬──────────────────────────────────┬───────────────────────────────┬───────────────┬───────────────┬────────────────────┬──────────────┐
│ delivery_id │ match_id │ innings_id │   innings   │ batting_team │ bowling_team │ declared │     bowler      │       batter       │    non_striker     │ player_out │ delivery │ over  │ ball  │ wickets │ batter_runs │ extra_runs │ total_runs │  six  │ four  │ wicket_type │ batter_id │ bowler_id │ non_striker_id 

In [30]:
F.sql(
    f"""
    select
        distinct
        match_type,
        event

    from t1
    where
        gender = 'male'
        and event.lower().contains('world')
        and event.lower().contains('cup')
        and match_type = 'ODI'
    """
)

┌────────────┬──────────────────────────────────────────────┐
│ match_type │                    event                     │
│  varchar   │                   varchar                    │
├────────────┼──────────────────────────────────────────────┤
│ ODI        │ ICC World Cup                                │
│ ODI        │ ICC World Cup Qualifiers                     │
│ ODI        │ ICC Men's Cricket World Cup Super League     │
│ ODI        │ World Cup                                    │
│ ODI        │ ICC Cricket World Cup Qualifier              │
│ ODI        │ ICC Cricket World Cup Qualifier Play-off     │
│ ODI        │ ICC Men's Cricket World Cup League 2         │
│ ODI        │ ICC Cricket World Cup Qualifier (ICC Trophy) │
│ ODI        │ ICC Cricket World Cup                        │
└────────────┴──────────────────────────────────────────────┘

In [39]:
F.sql(
    """
    select
        *

    from t1
    where
        event like '%Cricket World Cup Super League%'
        and wicket_type = ''
    """
)

┌─────────────┬──────────┬────────────┬─────────────┬──────────────┬──────────────┬──────────┬──────────────────────┬─────────────────┬─────────────────┬────────────┬──────────┬───────┬───────┬─────────┬─────────────┬────────────┬────────────┬───────┬───────┬─────────────┬───────────┬───────────┬────────────────┬───────────────┬────────────┬──────────────────┬────────────────┬────────────┬─────────┬────────────────┬──────────────┬─────────┬────────────┬────────────┬───────────────────┬─────────────┬────────────┬───────────────┬─────────────┬───────────────────────────────┬──────────────────────────────────────────┬───────────────┬───────────────┬────────────────────┬──────────────┐
│ delivery_id │ match_id │ innings_id │   innings   │ batting_team │ bowling_team │ declared │        bowler        │     batter      │   non_striker   │ player_out │ delivery │ over  │ ball  │ wickets │ batter_runs │ extra_runs │ total_runs │  six  │ four  │ wicket_type │ batter_id │ bowler_id │ non_striker

In [48]:
(
    pl.scan_parquet("../data/04_preprocessed/deliveries.parquet")
    .pipe(
        lambda df: df.with_columns(
            **{
                c: when(col(c) == '').then(None).otherwise(col(c))
                for c 
                in df.select(cs.string()).collect_schema().names()
            }
        )
    )
    .collect()
)

match_id,innings,batting_team,bowling_team,declared,delivery,batter,bowler,non_striker,batter_runs,extra_runs,total_runs,wicket_type,player_out
str,str,str,str,i64,f64,str,str,str,i64,i64,i64,str,str
"""65252""","""1st innings""","""England""","""Namibia""",0,0.1,"""ME Trescothick""","""G Snyman""","""NV Knight""",0,0,0,,
"""65252""","""1st innings""","""England""","""Namibia""",0,0.2,"""ME Trescothick""","""G Snyman""","""NV Knight""",0,0,0,,
"""65252""","""1st innings""","""England""","""Namibia""",0,0.3,"""ME Trescothick""","""G Snyman""","""NV Knight""",0,0,0,,
"""65252""","""1st innings""","""England""","""Namibia""",0,0.4,"""ME Trescothick""","""G Snyman""","""NV Knight""",1,0,1,,
"""65252""","""1st innings""","""England""","""Namibia""",0,0.5,"""NV Knight""","""G Snyman""","""ME Trescothick""",0,0,0,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""1160957""","""2nd innings""","""Singapore""","""Thailand""",0,9.5,"""CR Kumarage""","""N Senamontree""","""CR Suryawanshi""",2,0,2,,
"""1160957""","""2nd innings""","""Singapore""","""Thailand""",0,9.6,"""CR Kumarage""","""N Senamontree""","""CR Suryawanshi""",0,0,0,,
"""1160957""","""2nd innings""","""Singapore""","""Thailand""",0,10.1,"""CR Suryawanshi""","""P Sungnard""","""CR Kumarage""",1,0,1,,
"""1160957""","""2nd innings""","""Singapore""","""Thailand""",0,10.2,"""CR Kumarage""","""P Sungnard""","""CR Suryawanshi""",0,0,0,,
