In [1]:
import pandas as pd
import json

**Processing Match Results**

In [10]:
with open('/content/t20_world_cup_results.json') as f:
    data = json.load(f)

# Access the 'matchSummary' key directly instead of using index 0
df_match = pd.DataFrame(data['matchSummary'])
df_match.head()

Unnamed: 0,team1,team2,winner,margin,ground,matchDate,scorecard
0,England,Pakistan,England,5 wickets,Melbourne,"Nov 13, 2022",T20I # 1879
1,England,India,England,10 wickets,Adelaide,"Nov 10, 2022",T20I # 1878
2,New Zealand,Pakistan,Pakistan,7 wickets,Sydney,"Nov 9, 2022",T20I # 1877
3,India,Zimbabwe,India,71 runs,Melbourne,"Nov 6, 2022",T20I # 1873
4,Bangladesh,Pakistan,Pakistan,5 wickets,Adelaide,"Nov 6, 2022",T20I # 1872


In [11]:

df_match.shape

(42, 7)

In [12]:
df_match.rename({'scorecard': 'match_id'}, axis = 1, inplace = True)
df_match.head()

Unnamed: 0,team1,team2,winner,margin,ground,matchDate,match_id
0,England,Pakistan,England,5 wickets,Melbourne,"Nov 13, 2022",T20I # 1879
1,England,India,England,10 wickets,Adelaide,"Nov 10, 2022",T20I # 1878
2,New Zealand,Pakistan,Pakistan,7 wickets,Sydney,"Nov 9, 2022",T20I # 1877
3,India,Zimbabwe,India,71 runs,Melbourne,"Nov 6, 2022",T20I # 1873
4,Bangladesh,Pakistan,Pakistan,5 wickets,Adelaide,"Nov 6, 2022",T20I # 1872


In [13]:
match_ids_dict = {}

for index, row in df_match.iterrows():
    key1 = row['team1'] + ' Vs ' + row['team2']
    key2 = row['team2'] + ' Vs ' + row['team1']
    match_ids_dict[key1] = row['match_id']
    match_ids_dict[key2] = row['match_id']

In [7]:
df_match.to_csv('/content/t20_world_cup_results.csv', index = False)

**Processing Batting Summary**

In [8]:
with open('/content/t20_world_cup_batting_summary.json') as f:
    data = json.load(f)
    all_records = []
    for rec in data:
        all_records.extend(rec['battingSummary'])

df_batting = pd.DataFrame(all_records)
df_batting.head(11)

Unnamed: 0,match,teamInnings,battingPos,batsmanName,dismissal,runs,balls,4s,6s,SR
0,Pakistan Vs England,Pakistan,1,Mohammad Rizwan †,b Curran,15,14,0,1,107.14
1,Pakistan Vs England,Pakistan,3,Babar Azam (c),c & b Rashid,32,28,2,0,114.28
2,Pakistan Vs England,Pakistan,5,Mohammad Haris,c Stokes b Rashid,8,12,1,0,66.66
3,Pakistan Vs England,Pakistan,7,Shan Masood,c Livingstone b Curran,38,28,2,1,135.71
4,Pakistan Vs England,Pakistan,9,Iftikhar Ahmed,c †Buttler b Stokes,0,6,0,0,0.0
5,Pakistan Vs England,Pakistan,11,Shadab Khan,c Woakes b Jordan,20,14,2,0,142.85
6,Pakistan Vs England,Pakistan,13,Mohammad Nawaz,c Livingstone b Curran,5,7,0,0,71.42
7,Pakistan Vs England,Pakistan,15,Mohammad Wasim,c Livingstone b Jordan,4,8,0,0,50.0
8,Pakistan Vs England,Pakistan,17,Shaheen Shah Afridi,not out,5,3,1,0,166.66
9,Pakistan Vs England,Pakistan,18,Haris Rauf,not out,1,1,0,0,100.0


In [9]:
df_batting['out/not_out'] = df_batting.dismissal.apply(lambda x: "out" if len(x)>0 else "not_out")
df_batting.head(11)

Unnamed: 0,match,teamInnings,battingPos,batsmanName,dismissal,runs,balls,4s,6s,SR,out/not_out
0,Pakistan Vs England,Pakistan,1,Mohammad Rizwan †,b Curran,15,14,0,1,107.14,out
1,Pakistan Vs England,Pakistan,3,Babar Azam (c),c & b Rashid,32,28,2,0,114.28,out
2,Pakistan Vs England,Pakistan,5,Mohammad Haris,c Stokes b Rashid,8,12,1,0,66.66,out
3,Pakistan Vs England,Pakistan,7,Shan Masood,c Livingstone b Curran,38,28,2,1,135.71,out
4,Pakistan Vs England,Pakistan,9,Iftikhar Ahmed,c †Buttler b Stokes,0,6,0,0,0.0,out
5,Pakistan Vs England,Pakistan,11,Shadab Khan,c Woakes b Jordan,20,14,2,0,142.85,out
6,Pakistan Vs England,Pakistan,13,Mohammad Nawaz,c Livingstone b Curran,5,7,0,0,71.42,out
7,Pakistan Vs England,Pakistan,15,Mohammad Wasim,c Livingstone b Jordan,4,8,0,0,50.0,out
8,Pakistan Vs England,Pakistan,17,Shaheen Shah Afridi,not out,5,3,1,0,166.66,out
9,Pakistan Vs England,Pakistan,18,Haris Rauf,not out,1,1,0,0,100.0,out


In [10]:
df_batting['match_id'] = df_batting['match'].map(match_ids_dict)
df_batting.head()

Unnamed: 0,match,teamInnings,battingPos,batsmanName,dismissal,runs,balls,4s,6s,SR,out/not_out,match_id
0,Pakistan Vs England,Pakistan,1,Mohammad Rizwan †,b Curran,15,14,0,1,107.14,out,T20I # 1879
1,Pakistan Vs England,Pakistan,3,Babar Azam (c),c & b Rashid,32,28,2,0,114.28,out,T20I # 1879
2,Pakistan Vs England,Pakistan,5,Mohammad Haris,c Stokes b Rashid,8,12,1,0,66.66,out,T20I # 1879
3,Pakistan Vs England,Pakistan,7,Shan Masood,c Livingstone b Curran,38,28,2,1,135.71,out,T20I # 1879
4,Pakistan Vs England,Pakistan,9,Iftikhar Ahmed,c †Buttler b Stokes,0,6,0,0,0.0,out,T20I # 1879


In [11]:
df_batting.drop(columns=["dismissal"], inplace=True)
df_batting.head(10)

Unnamed: 0,match,teamInnings,battingPos,batsmanName,runs,balls,4s,6s,SR,out/not_out,match_id
0,Pakistan Vs England,Pakistan,1,Mohammad Rizwan †,15,14,0,1,107.14,out,T20I # 1879
1,Pakistan Vs England,Pakistan,3,Babar Azam (c),32,28,2,0,114.28,out,T20I # 1879
2,Pakistan Vs England,Pakistan,5,Mohammad Haris,8,12,1,0,66.66,out,T20I # 1879
3,Pakistan Vs England,Pakistan,7,Shan Masood,38,28,2,1,135.71,out,T20I # 1879
4,Pakistan Vs England,Pakistan,9,Iftikhar Ahmed,0,6,0,0,0.0,out,T20I # 1879
5,Pakistan Vs England,Pakistan,11,Shadab Khan,20,14,2,0,142.85,out,T20I # 1879
6,Pakistan Vs England,Pakistan,13,Mohammad Nawaz,5,7,0,0,71.42,out,T20I # 1879
7,Pakistan Vs England,Pakistan,15,Mohammad Wasim,4,8,0,0,50.0,out,T20I # 1879
8,Pakistan Vs England,Pakistan,17,Shaheen Shah Afridi,5,3,1,0,166.66,out,T20I # 1879
9,Pakistan Vs England,Pakistan,18,Haris Rauf,1,1,0,0,100.0,out,T20I # 1879


In [12]:
df_batting['batsmanName'] = df_batting['batsmanName'].apply(lambda x: x.replace('â€', ''))
df_batting['batsmanName'] = df_batting['batsmanName'].apply(lambda x: x.replace('\xa0', ''))
df_batting.head()

Unnamed: 0,match,teamInnings,battingPos,batsmanName,runs,balls,4s,6s,SR,out/not_out,match_id
0,Pakistan Vs England,Pakistan,1,Mohammad Rizwan†,15,14,0,1,107.14,out,T20I # 1879
1,Pakistan Vs England,Pakistan,3,Babar Azam(c),32,28,2,0,114.28,out,T20I # 1879
2,Pakistan Vs England,Pakistan,5,Mohammad Haris,8,12,1,0,66.66,out,T20I # 1879
3,Pakistan Vs England,Pakistan,7,Shan Masood,38,28,2,1,135.71,out,T20I # 1879
4,Pakistan Vs England,Pakistan,9,Iftikhar Ahmed,0,6,0,0,0.0,out,T20I # 1879


In [13]:
df_batting.shape

(699, 11)

In [15]:
df_batting.to_csv('/content/t20_world_cup_batting_summary.csv', index = False)

**Process Bowling Summary**

In [5]:
with open('/content/t20_world_cup_bowling_summary.json') as f:
    data = json.load(f)
    all_records = []

    # Extract all bowling summaries from each record
    for rec in data:
        if 'bowlingSummary' in rec and isinstance(rec['bowlingSummary'], list):
            all_records.extend(rec['bowlingSummary'])

# Optionally create a DataFrame
df_bowling = pd.DataFrame(all_records)

# Convert DataFrame back to list of dictionaries if needed
bowling_list_of_dicts = df_bowling.to_dict(orient='records')

# Show the list of dictionaries (first few items)
print(bowling_list_of_dicts[:2])

[{'match': 'Pakistan Vs England', 'bowlingTeam': 'England', 'bowlerName': 'Ben Stokes', 'overs': '4', 'maiden': '0', 'runs': '32', 'wickets': '1', 'economy': '8.00', '0s': '6', '4s': '1', '6s': '0', 'wides': '2', 'noBalls': '1'}, {'match': 'Pakistan Vs England', 'bowlingTeam': 'England', 'bowlerName': 'Chris Woakes', 'overs': '3', 'maiden': '0', 'runs': '26', 'wickets': '0', 'economy': '8.66', '0s': '7', '4s': '2', '6s': '1', 'wides': '2', 'noBalls': '0'}]


In [6]:
df_bowling = pd.DataFrame(all_records)
print(df_bowling.shape)
df_bowling.head()

(500, 13)


Unnamed: 0,match,bowlingTeam,bowlerName,overs,maiden,runs,wickets,economy,0s,4s,6s,wides,noBalls
0,Pakistan Vs England,England,Ben Stokes,4,0,32,1,8.0,6,1,0,2,1
1,Pakistan Vs England,England,Chris Woakes,3,0,26,0,8.66,7,2,1,2,0
2,Pakistan Vs England,England,Sam Curran,4,0,12,3,3.0,15,0,0,0,0
3,Pakistan Vs England,England,Adil Rashid,4,1,22,2,5.5,10,1,0,1,0
4,Pakistan Vs England,England,Chris Jordan,4,0,27,2,6.75,9,3,0,0,0


In [14]:
df_bowling['match_id'] = df_bowling['match'].map(match_ids_dict)
df_bowling.head()

Unnamed: 0,match,bowlingTeam,bowlerName,overs,maiden,runs,wickets,economy,0s,4s,6s,wides,noBalls,match_id
0,Pakistan Vs England,England,Ben Stokes,4,0,32,1,8.0,6,1,0,2,1,T20I # 1879
1,Pakistan Vs England,England,Chris Woakes,3,0,26,0,8.66,7,2,1,2,0,T20I # 1879
2,Pakistan Vs England,England,Sam Curran,4,0,12,3,3.0,15,0,0,0,0,T20I # 1879
3,Pakistan Vs England,England,Adil Rashid,4,1,22,2,5.5,10,1,0,1,0,T20I # 1879
4,Pakistan Vs England,England,Chris Jordan,4,0,27,2,6.75,9,3,0,0,0,T20I # 1879


In [16]:
df_bowling.to_csv('/content/t20_world_cup_bowling_summary.csv', index = False)

**Process Players Information**

In [2]:
with open('/content/t20_world_cup_player_info.json') as f:
    data = json.load(f)

In [3]:
df_players = pd.DataFrame(data)

print(df_players.shape)
df_players.head(10)

(1199, 6)


Unnamed: 0,name,team,battingStyle,bowlingStyle,playingRole,description
0,Mohammad Rizwan †,Pakistan,Right hand Bat,Right arm Medium,Wicketkeeper Batter,"For several years, it appeared Mohammad Rizwan..."
1,Babar Azam (c),Pakistan,Right hand Bat,Right arm Offbreak,Batter,"A right-hand, top-order batsman known for his ..."
2,Mohammad Haris,Pakistan,Right hand Bat,Right arm Offbreak,Wicketkeeper Batter,
3,Shan Masood,Pakistan,Left hand Bat,Right arm Medium fast,Opening Batter,A solid and technically sound left-hand opener...
4,Iftikhar Ahmed,Pakistan,Right hand Bat,Right arm Offbreak,Middle order Batter,"In 2023, making what seemed to be his nth come..."
5,Shadab Khan,Pakistan,Right hand Bat,Legbreak,Allrounder,"A prodigious turner of the ball, teenage legsp..."
6,Mohammad Nawaz,Pakistan,Left hand Bat,Slow Left arm Orthodox,Allrounder,Mohammad Nawaz started out in cricket at the a...
7,Mohammad Wasim,Pakistan,Right hand Bat,Right arm Fast medium,Bowling Allrounder,
8,Shaheen Shah Afridi,Pakistan,Left hand Bat,Left arm Fast,Bowler,"A baby face perched on a two-metre body, Shahe..."
9,Haris Rauf,Pakistan,Right hand Bat,Right arm Fast,Bowler,"By and large, the quixotic idea of a Pakistan ..."


In [4]:
df_players['name'] = df_players['name'].apply(lambda x: x.replace('â€', ''))
df_players['name'] = df_players['name'].apply(lambda x: x.replace('†', ''))
df_players['name'] = df_players['name'].apply(lambda x: x.replace('\xa0', ''))
df_players.head(10)

Unnamed: 0,name,team,battingStyle,bowlingStyle,playingRole,description
0,Mohammad Rizwan,Pakistan,Right hand Bat,Right arm Medium,Wicketkeeper Batter,"For several years, it appeared Mohammad Rizwan..."
1,Babar Azam(c),Pakistan,Right hand Bat,Right arm Offbreak,Batter,"A right-hand, top-order batsman known for his ..."
2,Mohammad Haris,Pakistan,Right hand Bat,Right arm Offbreak,Wicketkeeper Batter,
3,Shan Masood,Pakistan,Left hand Bat,Right arm Medium fast,Opening Batter,A solid and technically sound left-hand opener...
4,Iftikhar Ahmed,Pakistan,Right hand Bat,Right arm Offbreak,Middle order Batter,"In 2023, making what seemed to be his nth come..."
5,Shadab Khan,Pakistan,Right hand Bat,Legbreak,Allrounder,"A prodigious turner of the ball, teenage legsp..."
6,Mohammad Nawaz,Pakistan,Left hand Bat,Slow Left arm Orthodox,Allrounder,Mohammad Nawaz started out in cricket at the a...
7,Mohammad Wasim,Pakistan,Right hand Bat,Right arm Fast medium,Bowling Allrounder,
8,Shaheen Shah Afridi,Pakistan,Left hand Bat,Left arm Fast,Bowler,"A baby face perched on a two-metre body, Shahe..."
9,Haris Rauf,Pakistan,Right hand Bat,Right arm Fast,Bowler,"By and large, the quixotic idea of a Pakistan ..."


In [5]:
df_players[df_players['team'] == 'India']

Unnamed: 0,name,team,battingStyle,bowlingStyle,playingRole,description
29,KL Rahul,India,Right hand Bat,,Wicketkeeper Batter,KL Rahul player profile
30,Rohit Sharma(c),India,Right hand Bat,Right arm Offbreak,Top order Batter,Rohit Sharma player profile
31,Virat Kohli,India,Right hand Bat,Right arm Medium,Top order Batter,Virat Kohli player profile
32,Suryakumar Yadav,India,Right hand Bat,"Right arm Medium, Right arm Offbreak",Batter,Suryakumar Yadav player profile
33,Hardik Pandya,India,Right hand Bat,Right arm Medium fast,Allrounder,Hardik Pandya player profile
...,...,...,...,...,...,...
759,Arshdeep Singh,India,Left hand Bat,Left arm Medium fast,Bowler,Arshdeep Singh player profile
760,Mohammed Shami,India,Right hand Bat,Right arm Fast,Bowler,Mohammed Shami was India's leading fast bowler...
761,Hardik Pandya,India,Right hand Bat,Right arm Medium fast,Allrounder,Hardik Pandya player profile
762,Ravichandran Ashwin,India,Right hand Bat,Right arm Offbreak,Bowling Allrounder,R Ashwin player profile


In [6]:
df_players.to_csv('/content/t20_world_cup_player_info.csv', index = False)