# Introduction

- Webscrape https://www.tennislive.net for pro-level match data
- Output two .csv files 
    - Single row dataframe for summary statistics dashboard
    - point by point (Pbp) level csv file for Match Viewer website => to attach timestamps using website tagger

# Install Packages

In [1]:
import requests

from bs4 import BeautifulSoup

import pandas as pd

In [2]:
page = requests.get('https://www.tennislive.net/atp/match/bernard-tomic-VS-govind-nanda/m25-tulsa-2024/')

In [3]:
soup = BeautifulSoup(page.text, 'html')

### Check Status Code
- 200 = success
- 404 = failed

In [4]:
page.status_code

200

# Single Row For Summary Statistics Dashboard

### Match Info

In [5]:
table = soup.find('table', class_ = 'table_pmatches')

date = soup.find('td', class_ = 'w50').text.strip().split(' ')[0]
round_info = soup.find_all('td', class_='w50')[1].text.strip()
player1 = soup.find_all('td', class_='w130')[0].text.strip()
player2 = soup.find_all('td', class_='w130')[1].text.strip()
score = soup.find('span', id='score').text.strip()
tournament = soup.find('td', class_='w200').find('a').text.strip()

# Create a DataFrame
data = {
    'Date': [date],
    'Round': [round_info],
    'Player 1': [player1],
    'Player 2': [player2],
    'Score': [score],
    'Tournament': [tournament]
}

df_match_info = pd.DataFrame(data)

In [6]:
df_match_info

Unnamed: 0,Date,Round,Player 1,Player 2,Score,Tournament
0,21.06.24,1/4,Bernard Tomic,Govind Nanda,"6-3, 6-1",M25 Tulsa


### Match Statistics

In [7]:
# Function to extract data for a given statistic
def extract_statistic(statistic):
    row = soup.find('td', string=statistic).parent
    player1_stat = row.find_all('td')[1].text.strip().split(' ')[0]
    player2_stat = row.find_all('td')[2].text.strip().split(' ')[0]
    return player1_stat, player2_stat

# List of statistics to extract
statistics = [
    '1st SERVE %',
    '1st SERVE POINTS WON',
    '2nd SERVE POINTS WON',
    'BREAK POINTS WON',
    'TOTAL RETURN POINTS WON',
    'TOTAL POINTS WON',
    'DOUBLE FAULTS',
    'ACES'
]

# Dictionary to hold the statistics
data = {}

# Extract and store statistics for both players
for stat in statistics:
    player1_stat, player2_stat = extract_statistic(stat)
    stat_name = stat.lower().replace(' ', '_').replace('%', 'percentage').replace('/', '_').replace('(','').replace(')','')
    data[f'{stat_name}_player1'] = player1_stat
    data[f'{stat_name}_player2'] = player2_stat

# Convert the dictionary to a DataFrame
df_stats = pd.DataFrame([data])

In [8]:
df_stats

Unnamed: 0,1st_serve_percentage_player1,1st_serve_percentage_player2,1st_serve_points_won_player1,1st_serve_points_won_player2,2nd_serve_points_won_player1,2nd_serve_points_won_player2,break_points_won_player1,break_points_won_player2,total_return_points_won_player1,total_return_points_won_player2,total_points_won_player1,total_points_won_player2,double_faults_player1,double_faults_player2,aces_player1,aces_player2
0,33/70,28/48,27/33,16/28,16/37,8/20,5/7,1/10,24/48,27/70,67/118,51/118,2,1,7,0


### Combine the DataFrames by column binding them


In [9]:
# Combine the DataFrames by column binding them
df_combined = pd.concat([df_match_info, df_stats], axis=1)

df_combined

Unnamed: 0,Date,Round,Player 1,Player 2,Score,Tournament,1st_serve_percentage_player1,1st_serve_percentage_player2,1st_serve_points_won_player1,1st_serve_points_won_player2,...,break_points_won_player1,break_points_won_player2,total_return_points_won_player1,total_return_points_won_player2,total_points_won_player1,total_points_won_player2,double_faults_player1,double_faults_player2,aces_player1,aces_player2
0,21.06.24,1/4,Bernard Tomic,Govind Nanda,"6-3, 6-1",M25 Tulsa,33/70,28/48,27/33,16/28,...,5/7,1/10,24/48,27/70,67/118,51/118,2,1,7,0


In [10]:

# Extract match information
date = soup.find('td', class_='w50').text.strip().split(' ')[0]
round_info = soup.find_all('td', class_='w50')[1].text.strip()
player1 = soup.find_all('td', class_='w130')[0].text.strip()
player2 = soup.find_all('td', class_='w130')[1].text.strip()
score = soup.find('span', id='score').text.strip()
tournament = soup.find('td', class_='w200').find('a').text.strip()

# Create a DataFrame for match info
match_data = {
    'Date': [date],
    'Round': [round_info],
    'Player 1': [player1],
    'Player 2': [player2],
    'Score': [score],
    'Tournament': [tournament]
}
df_match_info = pd.DataFrame(match_data)

# Function to extract data for a given statistic
def extract_statistic(statistic):
    row = soup.find('td', string=statistic).parent
    player1_stat = row.find_all('td')[1].text.strip().split(' ')[0]
    player2_stat = row.find_all('td')[2].text.strip().split(' ')[0]
    return player1_stat, player2_stat

# List of statistics to extract
statistics = [
    '1st SERVE %',
    '1st SERVE POINTS WON',
    '2nd SERVE POINTS WON',
    'BREAK POINTS WON',
    'TOTAL RETURN POINTS WON',
    'TOTAL POINTS WON',
    'DOUBLE FAULTS',
    'ACES'
]

# Dictionary to hold the statistics
stat_data = {}

# Extract and store statistics for both players
for stat in statistics:
    player1_stat, player2_stat = extract_statistic(stat)
    stat_name = stat.lower().replace(' ', '_').replace('%', 'percentage').replace('/', '_').replace('(','').replace(')','')
    stat_data[f'{stat_name}_player1'] = player1_stat
    stat_data[f'{stat_name}_player2'] = player2_stat

# Convert the dictionary to a DataFrame
df_stats = pd.DataFrame([stat_data])

# Combine the DataFrames by column binding them
df_combined = pd.concat([df_match_info, df_stats], axis=1)

df_combined

Unnamed: 0,Date,Round,Player 1,Player 2,Score,Tournament,1st_serve_percentage_player1,1st_serve_percentage_player2,1st_serve_points_won_player1,1st_serve_points_won_player2,...,break_points_won_player1,break_points_won_player2,total_return_points_won_player1,total_return_points_won_player2,total_points_won_player1,total_points_won_player2,double_faults_player1,double_faults_player2,aces_player1,aces_player2
0,21.06.24,1/4,Bernard Tomic,Govind Nanda,"6-3, 6-1",M25 Tulsa,33/70,28/48,27/33,16/28,...,5/7,1/10,24/48,27/70,67/118,51/118,2,1,7,0


# Extract Data from Multiple Links 

In [11]:
import pandas as pd

import requests

from bs4 import BeautifulSoup

In [12]:

def extract_match_data(player_name, urls):
    data_list = []
    
    for url in urls:
        # Fetch HTML content from the URL
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract match information
        date = soup.find('td', class_='w50').text.strip().split(' ')[0]
        round_info = soup.find_all('td', class_='w50')[1].text.strip()
        player1 = soup.find_all('td', class_='w130')[0].text.strip()
        player2 = soup.find_all('td', class_='w130')[1].text.strip()
        score = soup.find('span', id='score').text.strip()
        tournament = soup.find('td', class_='w200').find('a').text.strip()
        
        # Extract statistics function
        def extract_statistic(statistic):
            row = soup.find('td', string=statistic).parent
            player1_stat = row.find_all('td')[1].text.strip().split(' ')[0]
            player2_stat = row.find_all('td')[2].text.strip().split(' ')[0]
            return player1_stat, player2_stat
        
        statistics = [
            '1st SERVE %',
            '1st SERVE POINTS WON',
            '2nd SERVE POINTS WON',
            'BREAK POINTS WON',
            'TOTAL RETURN POINTS WON',
            'TOTAL POINTS WON',
            'DOUBLE FAULTS',
            'ACES'
        ]
        
        # Check if player1 is not equal to player_name
        if player1 != player_name:
            # Swap player1 and player2
            player1, player2 = player2, player1
            
            # Reverse the score format
            score_parts = score.split(',')
            if len(score_parts) == 2:
                set1, set2 = score_parts[0].strip(), score_parts[1].strip()
                game1, game2 = set1.split('-'), set2.split('-')
                reversed_score = f'{game1[1]}-{game1[0]}, {game2[1]}-{game2[0]}'
                score = reversed_score
            if len(score_parts) == 3:
                set1, set2, set3 = score_parts[0].strip(), score_parts[1].strip(), score_parts[2].strip()
                game1, game2, game3 = set1.split('-'), set2.split('-'), set3.split('-')
                reversed_score = f'{game1[1]}-{game1[0]}, {game2[1]}-{game2[0]}, {game3[1]}-{game3[0]}'
                score = reversed_score
                
            
            # Extract statistics for swapped players
            stat_data = {}
            for stat in statistics:
                player2_stat, player1_stat = extract_statistic(stat)
                stat_name = stat.lower().replace(' ', '_').replace('%', 'percentage').replace('/', '_').replace('(','').replace(')','')
                stat_data[f'{stat_name}_player1'] = player1_stat
                stat_data[f'{stat_name}_player2'] = player2_stat
        else:
            # Extract statistics for original players
            stat_data = {}
            for stat in statistics:
                player1_stat, player2_stat = extract_statistic(stat)
                stat_name = stat.lower().replace(' ', '_').replace('%', 'percentage').replace('/', '_').replace('(','').replace(')','')
                stat_data[f'{stat_name}_player1'] = player1_stat
                stat_data[f'{stat_name}_player2'] = player2_stat
        
        # Append match data to the list
        match_data = {
            'Date': date,
            'Round': round_info,
            'Player 1': player1,
            'Player 2': player2,
            'Score': score,
            'Tournament': tournament,
            **stat_data
        }
        data_list.append(match_data)
    
    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(data_list)
    
    # Sort DataFrame by Date in descending order
    df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%y')
    
    df = df.sort_values(by='Date', ascending=True).reset_index(drop=True)
    
    return df


### INPUT (playerName) and (links) HERE

In [13]:
# Example usage:
player_name = "Rudy Quan"

urls = [
    'https://www.tennislive.net/atp/match/rudy-quan-VS-shintaro-imai/little-rock-challenger-2024/',
    'https://www.tennislive.net/atp/match/rudy-quan-VS-filip-peliwo/little-rock-challenger-2024/',
    'https://www.tennislive.net/atp/match/rudy-quan-VS-stefan-kozlov/little-rock-challenger-2024/',
    'https://www.tennislive.net/atp/match/yuta-shimizu-VS-rudy-quan/little-rock-challenger-2024/',
    'https://www.tennislive.net/atp/match/rudy-quan-VS-andres-andrade/little-rock-challenger-2024/'
]

df = extract_match_data(player_name, urls)
df

Unnamed: 0,Date,Round,Player 1,Player 2,Score,Tournament,1st_serve_percentage_player1,1st_serve_percentage_player2,1st_serve_points_won_player1,1st_serve_points_won_player2,...,break_points_won_player1,break_points_won_player2,total_return_points_won_player1,total_return_points_won_player2,total_points_won_player1,total_points_won_player2,double_faults_player1,double_faults_player2,aces_player1,aces_player2
0,2024-05-26,q 1,Rudy Quan,Shintaro Imai,"4-6, 6-4, 6-2",Little Rock,55/76,54/118,36/55,34/54,...,6/19,4/6,55/118,30/76,101/194,93/194,2,9,0,15
1,2024-05-27,qual.,Rudy Quan,Filip Peliwo,"6-4, 6-2",Little Rock,36/51,23/49,20/36,12/23,...,7/7,4/8,32/49,25/51,58/100,42/100,1,4,0,1
2,2024-05-28,1st round,Rudy Quan,Stefan Kozlov,"6-1, 7-5",Little Rock,42/62,41/58,25/42,19/41,...,6/11,3/6,34/58,27/62,69/120,51/120,1,3,0,0
3,2024-05-30,2nd round,Rudy Quan,Andres Andrade,"6-3, 3-6, 6-3",Little Rock,59/84,44/89,32/59,25/44,...,8/17,6/9,48/89,42/84,90/173,83/173,7,10,1,3
4,2024-05-31,1/4,Rudy Quan,Yuta Shimizu,"4-6, 3-6",Little Rock,64/80,37/55,29/64,21/37,...,4/7,7/16,25/55,44/80,61/135,74/135,2,1,0,4


# Point by Point (PBP) Dataframe

In [14]:
page = requests.get('https://www.tennislive.net/atp/match/yuta-shimizu-VS-rudy-quan/little-rock-challenger-2024/')

In [15]:
soup = BeautifulSoup(page.text, 'html')

### Check Status Code
- 200 = success
- 404 = failed

In [16]:
page.status_code

200

### Find HTML Tags that Have Point By Point (PBP) Data

The `table` tag with the class `table_stats_match` contains all the info needed for the point by point data.

In [17]:
table = soup.find_all('table', class_ = 'table_stats_match')
# table[1]

### First Set HTML

In [18]:
set_1 = soup.find_all('table', class_='table_stats_match')[1].find_all('tr', class_=None)
set_1

[<tr><td class="mp_serve" width="40%"></td><td class="mp_info_txt" width="18%">0-0</td><td class="mp_serve" width="40%">Rudy Quan <img alt="Rudy Quan serve" height="8" src="https://www.tennislive.net/styles/images/tennis_ball.gif" title="Rudy Quan serve" width="8"/></td></tr>,
 <tr><td class="mp_15" colspan="3" width="99%">0-0, 0-15, 15-15, 15-30, 30-30, 30-40, 40-40, A-40<span title="Break point">[BP]</span>, 40-40, 40-A, 40-40, 40-A, 40-40, 40-A, 40-40, 40-A, 40-40, 40-A, 40-40, 40-A</td></tr>,
 <tr><td class="mp_serve" width="40%">Yuta Shimizu <img alt="Yuta Shimizu serve" height="8" src="https://www.tennislive.net/styles/images/tennis_ball.gif" title="Yuta Shimizu serve" width="8"/></td><td class="mp_info_txt" width="18%">0-1</td><td class="mp_serve" width="40%"></td></tr>,
 <tr><td class="mp_15" colspan="3" width="99%">0-0, 0-15, 0-30, 15-30, 15-40<span title="Break point">[BP]</span></td></tr>,
 <tr><td class="mp_serve" width="40%"></td><td class="mp_info_txt" width="18%">0-2</td

### Example of Getting Server of First Game

In [19]:
' '.join(set_1[0].find('img')['alt'].split()[0:2])

'Rudy Quan'

### Example of Extracting PBP From First Game

In [20]:
set_1[1].find('td')

<td class="mp_15" colspan="3" width="99%">0-0, 0-15, 15-15, 15-30, 30-30, 30-40, 40-40, A-40<span title="Break point">[BP]</span>, 40-40, 40-A, 40-40, 40-A, 40-40, 40-A, 40-40, 40-A, 40-40, 40-A, 40-40, 40-A</td>

Here we replace `[BP]` with signifies a "break-point" in the game with empty strings to keep the point data consistent. Then we can simply split the string into the points.

In [21]:
set_1[1].find('td').text.replace('[BP]', '').split(', ')

['0-0',
 '0-15',
 '15-15',
 '15-30',
 '30-30',
 '30-40',
 '40-40',
 'A-40',
 '40-40',
 '40-A',
 '40-40',
 '40-A',
 '40-40',
 '40-A',
 '40-40',
 '40-A',
 '40-40',
 '40-A',
 '40-40',
 '40-A']

### Example of Creating a PBP DataFrame from Set 1

In [22]:
# initialize lists to hold each of the designated values
server_names = []
point_scores = []
game_scores = []

# iterate over all the `tr` tags in set_1
# step over by 2 since each server and PBP data is paired together
for i in range(0, len(set_1), 2):
    # extract the data from the tr
    server_name = ' '.join(set_1[i].find('img')['alt'].split()[0:2])
    game_score = set_1[i].find('td', class_='mp_info_txt').text.strip()
    points = set_1[i + 1].find('td').text.replace('[BP]', '').split(', ')
    
    # create a row for each point in the game
    for point in points:
        server_names.append(server_name)
        point_scores.append(point)
        game_scores.append(game_score)


In [23]:
# construct the dataframe 
df = pd.DataFrame({
    'point_score': point_scores,
    'server_name': server_names,
    'game_score': game_scores
})

# replace the last known score with 0-0 since the score
# doesn't update between sets
df = df.replace(df['game_score'].iloc[-1], '0-0')
df

Unnamed: 0,point_score,server_name,game_score
0,0-0,Rudy Quan,0-0
1,0-15,Rudy Quan,0-0
2,15-15,Rudy Quan,0-0
3,15-30,Rudy Quan,0-0
4,30-30,Rudy Quan,0-0
...,...,...,...
75,15-0,Rudy Quan,0-0
76,30-0,Rudy Quan,0-0
77,30-15,Rudy Quan,0-0
78,40-15,Rudy Quan,0-0


### Find and Denote Each Player to Correctly Swap the Scores

We do this because the PBP is always in the order of `player1`-`player2`. To make the dataframe easier to understand, we swap the point and game scores when the server is `player2` as the server's points are read first.

In [24]:
table = soup.find('table', class_ = 'table_pmatches')

player1 = soup.find_all('td', class_='w130')[0].text.strip()
player2 = soup.find_all('td', class_='w130')[1].text.strip()

player1, player2

('Yuta Shimizu', 'Rudy Quan')

In [25]:
df.iloc[3]

point_score        15-30
server_name    Rudy Quan
game_score           0-0
Name: 3, dtype: object

In [26]:
'-'.join(df['point_score'][3].split('-')[::-1])

'30-15'

In [27]:
df['point_score'] = df.apply(lambda x : '-'.join(x['point_score'].split('-')[::-1]) if x['server_name'] == player2 else x['point_score'], axis=1)
df['game_score'] = df.apply(lambda x : '-'.join(x['game_score'].split('-')[::-1]) if x['server_name'] == player2 else x['game_score'], axis=1)

In [28]:
df

Unnamed: 0,point_score,server_name,game_score
0,0-0,Rudy Quan,0-0
1,15-0,Rudy Quan,0-0
2,15-15,Rudy Quan,0-0
3,30-15,Rudy Quan,0-0
4,30-30,Rudy Quan,0-0
...,...,...,...
75,0-15,Rudy Quan,0-0
76,0-30,Rudy Quan,0-0
77,15-30,Rudy Quan,0-0
78,15-40,Rudy Quan,0-0


### Generalize the Scraping to Apply to All Sets

We'll add a new column to keep track of the set number as well to help with readability in the dataframe.

In [29]:
# 2 set example
page = requests.get('https://www.tennislive.net/atp/match/yuta-shimizu-VS-rudy-quan/little-rock-challenger-2024/')

In [30]:
soup = BeautifulSoup(page.text, 'html')

In [31]:
def extract_point_by_point(url):
    # get HTML from url and convert to BeautifulSoup
    page = requests.get(url)
    soup = BeautifulSoup(page.text, 'html')
    
    # get all set data
    content = soup.find_all('table', class_='table_stats_match')[1:]

    # initialize list to hold data for all sets
    set_dfs = []

    for j in range(len(content)):
        # initialize lists to hold each of the designated values
        server_names = []
        point_scores = []
        game_scores = []
        set_num = []

        # get all table data for the current set
        current_set = content[j].find_all('tr', class_=None)


        # determine starting tag by how the table is formatted
        start = 0
        if not current_set[1].find('td', class_='mp_15'):
            start = 1

        # iterate over all the `tr` tags in set_1
        # step over by 2 since each server and PBP data is paired together
        for i in range(start, len(current_set), 2):
            # extract the data from the tr
            server_name = ' '.join(current_set[i].find('img')['alt'].split()[0:-1])
            game_score = current_set[i].find('td', class_='mp_info_txt').text.strip()
            points = current_set[i + 1].find('td').text.replace('[BP]', '').split(', ')

            # create a new row for each point
            for point in points:
                server_names.append(server_name)
                point_scores.append(point)
                game_scores.append(game_score)
                set_num.append(j + 1)

        # construct the dataframe
        df = pd.DataFrame({
            'point_score': point_scores,
            'server_name': server_names,
            'game_score': game_scores,
            'set_num': set_num
        })

        # update set number to match if not final set
        if j + 1 != len(content):
            df.loc[df['game_score'] == df['game_score'].iloc[-1], 'set_num'] += 1

        # replace the last known score with 0-0 since the score doesn't update between sets, unless tiebreak
        if df['game_score'].iloc[-1] != '6-6':
            df.loc[df['game_score'] == df['game_score'].iloc[-1], 'game_score'] = '0-0'

        # add the df to set_dfs
        set_dfs.append(df)


    df = pd.concat(set_dfs).reset_index(drop=True)

    # find player names
    table = soup.find('table', class_ = 'table_pmatches')
    player1 = soup.find_all('td', class_='w130')[0].text.strip()
    player2 = soup.find_all('td', class_='w130')[1].text.strip()

    # flip point_score and game_score of player2
    df['point_score'] = df.apply(lambda x : '-'.join(x['point_score'].split('-')[::-1]) if x['server_name'] == player2 else x['point_score'], axis=1)
    df['game_score'] = df.apply(lambda x : '-'.join(x['game_score'].split('-')[::-1]) if x['server_name'] == player2 else x['game_score'], axis=1)
    
    return df

In [32]:
url = 'https://www.tennislive.net/atp/match/yuta-shimizu-VS-rudy-quan/little-rock-challenger-2024/'
extract_point_by_point(url)

Unnamed: 0,point_score,server_name,game_score,set_num
0,0-0,Rudy Quan,0-0,1
1,15-0,Rudy Quan,0-0,1
2,15-15,Rudy Quan,0-0,1
3,30-15,Rudy Quan,0-0,1
4,30-30,Rudy Quan,0-0,1
...,...,...,...,...
128,0-15,Rudy Quan,0-0,2
129,0-30,Rudy Quan,0-0,2
130,0-40,Rudy Quan,0-0,2
131,15-40,Rudy Quan,0-0,2


### More Examples

Some other interesting match urls are tested below to test functionality of varying sets and number of tiebreaks.

In [33]:
url = 'https://www.tennislive.net/atp/match/jorge-plans-VS-mikel-lopez-hernaez/m25-bakio-2024/'
extract_point_by_point(url)

Unnamed: 0,point_score,server_name,game_score,set_num
0,0-0,Jorge Plans,0-0,1
1,15-0,Jorge Plans,0-0,1
2,30-0,Jorge Plans,0-0,1
3,30-15,Jorge Plans,0-0,1
4,30-30,Jorge Plans,0-0,1
...,...,...,...,...
198,4-8,Mikel Lopez Hernaez,0-0,3
199,4-9,Mikel Lopez Hernaez,0-0,3
200,5-9,Mikel Lopez Hernaez,0-0,3
201,6-9,Mikel Lopez Hernaez,0-0,3


In [34]:
url = 'https://www.tennislive.net/atp/match/mitchell-krueger-VS-abedallah-shelbayh/little-rock-challenger-2024/'
extract_point_by_point(url)

Unnamed: 0,point_score,server_name,game_score,set_num
0,0-0,Abedallah Shelbayh,0-0,1
1,0-15,Abedallah Shelbayh,0-0,1
2,15-15,Abedallah Shelbayh,0-0,1
3,30-15,Abedallah Shelbayh,0-0,1
4,40-15,Abedallah Shelbayh,0-0,1
...,...,...,...,...
157,0-15,Mitchell Krueger,0-0,3
158,15-15,Mitchell Krueger,0-0,3
159,30-15,Mitchell Krueger,0-0,3
160,30-30,Mitchell Krueger,0-0,3


# Example Dataframe

In [35]:
import pandas as pd

# Define the column names
columns = [
    "Date", "Match", "Duration", "Player Name", "Total serves", "Aces", "1st Serve In %",
    "2nd Serve In %", "1st Serve Won %", "2nd Serve Won %", "Double Faults", "1st serve Ad",
    "1st serve De", "1st serve Ad %", "1st serve De %", "2nd serve Ad %", "2nd serve De %",
    "Double Fault Ad %", "Double Fault De %", "Average Rally Count", "3 Shot Rally Count",
    "Break Points", "Break Points Won", "Break Points Won %", "Total Points Won on Serve",
    "Break Points Saved %", "Total Groundstrokes", "Groundstrokes Won", "Total Returns",
    "Total Returns Won", "Volley Count", "Volley Winner Count", "At Net Count", "Total Slices",
    "Number of Dropshots", "Forehand/Backhand Errors (Count)"
]

# Create an empty DataFrame with one row
df = pd.DataFrame(columns=columns, index=[0])



In [36]:
list(df)

['Date',
 'Match',
 'Duration',
 'Player Name',
 'Total serves',
 'Aces',
 '1st Serve In %',
 '2nd Serve In %',
 '1st Serve Won %',
 '2nd Serve Won %',
 'Double Faults',
 '1st serve Ad',
 '1st serve De',
 '1st serve Ad %',
 '1st serve De %',
 '2nd serve Ad %',
 '2nd serve De %',
 'Double Fault Ad %',
 'Double Fault De %',
 'Average Rally Count',
 '3 Shot Rally Count',
 'Break Points',
 'Break Points Won',
 'Break Points Won %',
 'Total Points Won on Serve',
 'Break Points Saved %',
 'Total Groundstrokes',
 'Groundstrokes Won',
 'Total Returns',
 'Total Returns Won',
 'Volley Count',
 'Volley Winner Count',
 'At Net Count',
 'Total Slices',
 'Number of Dropshots',
 'Forehand/Backhand Errors (Count)']