# Renaming csv file in dataset
---

In [1]:
import re
from os import listdir, rename
from os.path import join

import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

##### URL: https://www.kaggle.com/datasets/deepcontractor/ipl-2021-ball-by-ball-dataset
##### *delete [ALL_2021_IPL_MATCHES_BALL_BY_BALL.csv]
#### *rename files with same date manually (after executing this file)
#### For reference use https://www.cricbuzz.com/cricket-series/3472/indian-premier-league-2021/matches
#### *Point Table is added manually

In [2]:
path = join('dataset', 'raw')

In [3]:
files = [join(path, x) for x in listdir(path)]
files[:5]

['dataset\\raw\\CSK vs DC 2021-04-10.csv',
 'dataset\\raw\\CSK vs DC 2021-10-04.csv',
 'dataset\\raw\\CSK vs KKR 2021-04-21.csv',
 'dataset\\raw\\CSK vs KKR 2021-10-15.csv',
 'dataset\\raw\\CSK vs MI 2021-05-01.csv']

In [4]:
files_list = dict()

for index, file in enumerate(files):
    file = file.replace('SH', 'SRH')
    file = file.replace('PK', 'PBKS')
    split_name = re.split(r'[ (.)]', file.split('\\')[2])
    files_list[index] = {
        'Team1': split_name[0],
        'Team2': split_name[2],
        'dates': split_name[3]
    }
    
files_list[0]

{'Team1': 'CSK', 'Team2': 'DC', 'dates': '2021-04-10'}

In [5]:
url = 'https://www.business-standard.com/sports/ipl-2021/schedule'

In [6]:
soup = BeautifulSoup(requests.get(url).text, 'html.parser')

In [7]:
names = soup.find_all('h2', {'class': 'title3'})

team_1 = list()
team_2 = list()
for i, name in enumerate(names):
    if i % 2 == 0:
        team_1.append(name.text)
    else:
        team_2.append(name.text)

In [8]:
team_3 = list()

for x in zip(team_1, team_2):
    team_3.append(f'{x[0]} vs {x[1]}')

team_3[:5]

['MI vs RCB', 'CSK vs DC', 'SRH vs KKR', 'RR vs PBKS', 'KKR vs MI']

In [9]:
files_df = pd.DataFrame(files_list.values())
files_df.astype({
    'dates': 'datetime64',
    'Team1': 'category',
    'Team2': 'category',
})
files_df['check_index'] = np.arange(len(files_df))
files_df.set_index(files_df['dates'], inplace=True)
files_df.sort_index(inplace=True)
files_df['TvT'] = team_3

files_df.head()

Unnamed: 0_level_0,Team1,Team2,dates,check_index,TvT
dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-04-09,MI,RCB,2021-04-09,28,MI vs RCB
2021-04-10,CSK,DC,2021-04-10,0,CSK vs DC
2021-04-11,KKR,SRH,2021-04-11,22,SRH vs KKR
2021-04-12,PBKS,RR,2021-04-12,37,RR vs PBKS
2021-04-13,MI,KKR,2021-04-13,25,KKR vs MI


In [10]:
match_name = list()

for i, file in enumerate(files_df.iterrows()):
    file = file[1]
    match_name.append(f"Match {'%02d'%(i+1)} ({file[4]}) {file[2]}.csv")
match_name[:5]

['Match 01 (MI vs RCB) 2021-04-09.csv',
 'Match 02 (CSK vs DC) 2021-04-10.csv',
 'Match 03 (SRH vs KKR) 2021-04-11.csv',
 'Match 04 (RR vs PBKS) 2021-04-12.csv',
 'Match 05 (KKR vs MI) 2021-04-13.csv']

In [11]:
files_df['Match Name'] = match_name
files_df.sort_values('check_index', inplace=True)
files_df.head()

Unnamed: 0_level_0,Team1,Team2,dates,check_index,TvT,Match Name
dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-10,CSK,DC,2021-04-10,0,CSK vs DC,Match 02 (CSK vs DC) 2021-04-10.csv
2021-10-04,CSK,DC,2021-10-04,1,DC vs CSK,Match 50 (DC vs CSK) 2021-10-04.csv
2021-04-21,CSK,KKR,2021-04-21,2,KKR vs CSK,Match 15 (KKR vs CSK) 2021-04-21.csv
2021-10-15,CSK,KKR,2021-10-15,3,CSK vs KKR,Match 60 (CSK vs KKR) 2021-10-15.csv
2021-05-01,CSK,MI,2021-05-01,4,MI vs CSK,Match 27 (MI vs CSK) 2021-05-01.csv


In [12]:
new_names = files_df['Match Name'].tolist()
new_file_names = [join(path, x) for x in new_names]
new_file_names[:5]

['dataset\\raw\\Match 02 (CSK vs DC) 2021-04-10.csv',
 'dataset\\raw\\Match 50 (DC vs CSK) 2021-10-04.csv',
 'dataset\\raw\\Match 15 (KKR vs CSK) 2021-04-21.csv',
 'dataset\\raw\\Match 60 (CSK vs KKR) 2021-10-15.csv',
 'dataset\\raw\\Match 27 (MI vs CSK) 2021-05-01.csv']

In [13]:
for x in zip(files, new_file_names):
    rename(x[0], x[1])

---