In [1]:
import requests

url = "https://api.sportradar.com/tennis/trial/v3/en/double_competitors_rankings.json?api_key=T4dKFeUEd6HOp7vCzavuI5zobB0AIcrGzEXzujJs"

headers = {"accept": "application/json"}

response = requests.get(url, headers=headers)

print(response.text)

{"generated_at":"2025-04-18T20:38:29+00:00","rankings":[{"type_id":2,"name":"ATP","year":2025,"week":16,"gender":"men","competitor_rankings":[{"rank":1,"movement":0,"points":9440,"competitions_played":23,"competitor":{"id":"sr:competitor:49363","name":"Pavic, Mate","country":"Croatia","country_code":"HRV","abbreviation":"PAV"}},{"rank":1,"movement":0,"points":9440,"competitions_played":23,"competitor":{"id":"sr:competitor:51836","name":"Arevalo-Gonzalez, Marcelo","country":"El Salvador","country_code":"SLV","abbreviation":"ARE"}},{"rank":3,"movement":0,"points":7590,"competitions_played":26,"competitor":{"id":"sr:competitor:14898","name":"Heliovaara, Harri","country":"Finland","country_code":"FIN","abbreviation":"HEL"}},{"rank":4,"movement":0,"points":7590,"competitions_played":27,"competitor":{"id":"sr:competitor:637970","name":"Patten, Henry","country":"Great Britain","country_code":"GBR","abbreviation":"PAT"}},{"rank":5,"movement":0,"points":6460,"competitions_played":20,"competitor

In [2]:
# #2. Transform nested JSON structures into a flat relational schema for analysis.
import json
import pandas as pd

data=json.loads(response.text)

# #Flatten the competitor rankings JSON data into a list of dictionaries.
def transform_competitor_rankings(data):
    competitor_rankings=[]
    for ranking in data['rankings']:
        for comp in ranking['competitor_rankings']:
            competitor_rankings.append({
            'rank': comp['rank'],
            'movement': comp['movement'],
            'points': comp['points'],
            'competitions_played': comp['competitions_played'],
            'competitor_id': comp['competitor']['id']
        })
    df = pd.DataFrame(competitor_rankings)
    return df

competitor_rankings_df=transform_competitor_rankings(data).drop_duplicates()

def transform_competitor_table(data):
    competitors = {}
    for ranking in data["rankings"]:
        for comp in ranking["competitor_rankings"]:
            competitor_id=comp['competitor']['id']
            if competitor_id not in competitors:
                competitors[competitor_id]={
                'competitor_id': comp['competitor']["id"],
                'name': comp['competitor']["name"],
                'country': comp['competitor'].get('country',None),
                'country_code': comp['competitor'].get("country_code",None),
                'abbreviation': comp['competitor'].get("abbreviation",None),
                }
            
    return pd.DataFrame(competitors.values())

competitors_df=transform_competitor_table(data).drop_duplicates()

print("Missing Values in Competitor_rankings_df:\n", competitor_rankings_df.isnull().sum())
print("Missing Values in Competitors:\n", competitors_df.isnull().sum())

competitor_rankings_df.to_csv("competitor_rankings.csv", index=False)
competitors_df.to_csv("competitor.csv", index=False)

Missing Values in Competitor_rankings_df:
 rank                   0
movement               0
points                 0
competitions_played    0
competitor_id          0
dtype: int64
Missing Values in Competitors:
 competitor_id     0
name              0
country           0
country_code     62
abbreviation      0
dtype: int64


In [3]:
#3. Create a SQL database with well-designed schema (e.g., defining appropriate data types and primary keys).
import mysql.connector

mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    password='',
    port=3307
)

cursor_db=mydb.cursor(buffered=True)

cursor_db.execute("CREATE DATABASE IF NOT EXISTS double_competitor_rankings_db")

cursor_db.execute("USE double_competitor_rankings_db")
cursor_db.execute("""
CREATE TABLE IF NOT EXISTS Competitors (
    competitor_id VARCHAR(50) PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    country VARCHAR(100) NOT NULL,
    country_code CHAR(3) NOT NULL,
    abbreviation VARCHAR(10) NOT NULL
)
""")

cursor_db.execute (""" 
CREATE TABLE IF NOT EXISTS Competitor_Rankings (
    rank_id INT PRIMARY KEY AUTO_INCREMENT,
    rank INT NOT NULL,
    movement INT NOT NULL,
    points INT NOT NULL,
    competitions_played INT NOT NULL,
    competitor_id VARCHAR(50),
    FOREIGN KEY (competitor_id) REFERENCES Competitors(competitor_id)
)
""")
# Applying Indexing for Faster Queries
try:
    cursor_db.execute("CREATE INDEX idx_competitor_rankings ON Competitor_Rankings(competitor_id)")
    print("Index idx_competitor_rankings created")
except mysql.connector.errors.ProgrammingError as e:
    if "Duplicate key name" in str(e):
        print("Index idx_competitor_rankings already exists, skipping creation")
    else:
        raise  # Re-raises the exception if it's not a duplicate key error

try:
    cursor_db.execute("CREATE INDEX idx_Competitors ON Competitors(competitor_id)")
    print("Index idx_Competitors created")
except mysql.connector.errors.ProgrammingError as e:
    if "Duplicate key name" in str(e):
        print("Index idx_Competitors already exists, skipping creation")
    else:
        raise

#Insert competitors Data
competitors_df = pd.read_csv("competitor.csv")  
competitors_df = competitors_df.dropna(subset=['country_code'])
competitors_df['country_code'] = competitors_df['country_code'].fillna('UNK')
competitors_df = competitors_df.where(pd.notnull(competitors_df), None)
competitors_df = competitors_df.drop_duplicates(subset=['competitor_id'])

for _, row in competitors_df.iterrows():
    cursor_db.execute("""
INSERT iGNORE INTO Competitors (competitor_id, name, country, country_code, abbreviation)
VALUES (%s, %s, %s, %s, %s)
""",
(
    row['competitor_id'],
    row['name'],
    row['country'],
    row['country_code'],
    row['abbreviation']
))

competitor_rankings_df = pd.read_csv("competitor_rankings.csv")
cursor_db.execute("SELECT competitor_id FROM Competitors")
valid_ids = set(row[0] for row in cursor_db.fetchall())
competitor_rankings_df = competitor_rankings_df[
    competitor_rankings_df['competitor_id'].isin(valid_ids)
]
for _, row in competitor_rankings_df.iterrows():
    cursor_db.execute(("""
INSERT INTO Competitor_Rankings (rank, movement, points, competitions_played, competitor_id)
VALUES (%s, %s, %s, %s, %s)
"""
),
(row['rank'],
        row['movement'],
        row['points'],
        row['competitions_played'],
        row['competitor_id']
))

print("Competitor_Rankings inserted successfully!")


# Insert into MySQL

mydb.commit()
print("Competitors inserted successfully!")





Index idx_competitor_rankings already exists, skipping creation
Index idx_Competitors already exists, skipping creation
Competitor_Rankings inserted successfully!
Competitors inserted successfully!


In [4]:
# 1. Get all competitors with their rank and points.
from tabulate import tabulate
cursor_db.execute("""
SELECT
    c.competitor_id,
    c.name,
    c.country,
    cr.rank,
    cr.points
FROM
    Competitors c
JOIN Competitor_Rankings cr ON c.competitor_id = cr.competitor_id
"""
)
results_21=cursor_db.fetchall()
print(tabulate(results_21, headers=[i[0] for i in cursor_db.description],  tablefmt='psql'))

+-----------------------+--------------------------------------+--------------------------+--------+----------+
| competitor_id         | name                                 | country                  |   rank |   points |
|-----------------------+--------------------------------------+--------------------------+--------+----------|
| sr:competitor:100069  | Sachko, Vitaliy                      | Ukraine                  |    137 |      591 |
| sr:competitor:100069  | Sachko, Vitaliy                      | Ukraine                  |    137 |      591 |
| sr:competitor:100317  | Turker, Mert Naci                    | Turkiye                  |    480 |      117 |
| sr:competitor:100317  | Turker, Mert Naci                    | Turkiye                  |    480 |      117 |
| sr:competitor:100341  | Bolkvadze, Mariam                    | Georgia                  |    398 |      169 |
| sr:competitor:100341  | Bolkvadze, Mariam                    | Georgia                  |    398 |    

In [5]:
# 2. Find competitors ranked in the top 5
cursor_db.execute("""
SELECT 
    c.name,
    cr.rank,
    cr.points
FROM 
    Competitors c
JOIN 
    Competitor_Rankings cr ON c.competitor_id = cr.competitor_id
WHERE 
    cr.rank <= 5
ORDER BY 
    cr.rank ASC;
"""
)
results_22=cursor_db.fetchall()
print(tabulate(results_22, headers=[i[0] for i in cursor_db.description],  tablefmt='psql'))

+---------------------------+--------+----------+
| name                      |   rank |   points |
|---------------------------+--------+----------|
| Arevalo-Gonzalez, Marcelo |      1 |     9440 |
| Siniakova, Katerina       |      1 |    10665 |
| Arevalo-Gonzalez, Marcelo |      1 |     9440 |
| Pavic, Mate               |      1 |     9440 |
| Siniakova, Katerina       |      1 |    10665 |
| Pavic, Mate               |      1 |     9440 |
| Townsend, Taylor          |      2 |     8825 |
| Townsend, Taylor          |      2 |     8825 |
| Heliovaara, Harri         |      3 |     7590 |
| Routliffe, Erin           |      3 |     7840 |
| Heliovaara, Harri         |      3 |     7590 |
| Routliffe, Erin           |      3 |     7840 |
| Patten, Henry             |      4 |     7590 |
| Ostapenko, Jelena         |      4 |     6775 |
| Patten, Henry             |      4 |     7590 |
| Ostapenko, Jelena         |      4 |     6775 |
| Dabrowski, Gabriela       |      5 |     5983 |


In [6]:
# 3. List competitors with no rank movement (stable rank)
cursor_db.execute("""
SELECT 
    c.name,
    cr.rank,
    cr.movement
FROM 
    Competitors c
JOIN 
    Competitor_Rankings cr ON c.competitor_id = cr.competitor_id
WHERE 
    cr.movement = 0
""")
results_23=cursor_db.fetchall()
print(tabulate(results_23, headers=[i[0] for i in cursor_db.description],  tablefmt='psql'))

+---------------------------------+--------+------------+
| name                            |   rank |   movement |
|---------------------------------+--------+------------|
| Adams, Julia                    |    404 |          0 |
| Adams, Julia                    |    404 |          0 |
| Huang, Yujia                    |    358 |          0 |
| Huang, Yujia                    |    358 |          0 |
| Pawlikowska,  Zuzanna           |    396 |          0 |
| Pawlikowska,  Zuzanna           |    396 |          0 |
| Bianchi, Juan Jose              |    429 |          0 |
| Bianchi, Juan Jose              |    429 |          0 |
| McAdoo, Rasheeda                |    163 |          0 |
| McAdoo, Rasheeda                |    163 |          0 |
| Ruse, Elena-Gabriela            |     48 |          0 |
| Ruse, Elena-Gabriela            |     48 |          0 |
| Bouzkova, Marie                 |     76 |          0 |
| Bouzkova, Marie                 |     76 |          0 |
| Ricca, Giorg

In [7]:
# 4. Get the total points of competitors from a specific country (e.g., Croatia)
cursor_db.execute("""
SELECT 
    c.country,
    SUM(cr.points) AS total_points
FROM 
    Competitors c
JOIN 
    Competitor_Rankings cr ON c.competitor_id = cr.competitor_id
WHERE 
    c.country = 'Croatia'
GROUP BY 
    c.country;
""")
results_24=cursor_db.fetchall()
print(tabulate(results_24, headers=[i[0] for i in cursor_db.description],  tablefmt='psql'))

+-----------+----------------+
| country   |   total_points |
|-----------+----------------|
| Croatia   |          33914 |
+-----------+----------------+


In [8]:
# 5. Count the number of competitors per country

cursor_db.execute("""
SELECT 
    country,
    COUNT(*) AS num_competitors
FROM 
    Competitors
GROUP BY 
    country
ORDER BY 
    num_competitors DESC""")
results_25=cursor_db.fetchall()
print(tabulate(results_25, headers=[i[0] for i in cursor_db.description],  tablefmt='psql'))

+--------------------------+-------------------+
| country                  |   num_competitors |
|--------------------------+-------------------|
| USA                      |               103 |
| Japan                    |                56 |
| France                   |                53 |
| Great Britain            |                50 |
| Australia                |                48 |
| Italy                    |                43 |
| Czechia                  |                40 |
| Netherlands              |                32 |
| China                    |                31 |
| Argentina                |                31 |
| Germany                  |                30 |
| India                    |                29 |
| Spain                    |                25 |
| Brazil                   |                23 |
| Switzerland              |                21 |
| Romania                  |                17 |
| Poland                   |                16 |
| Ukraine           

In [10]:
# 6. Find competitors with the highest points in the current week
cursor_db.execute("""
SELECT DISTINCT
    c.name,
    cr.points
FROM 
    Competitors c
JOIN 
    Competitor_Rankings cr ON c.competitor_id = cr.competitor_id
WHERE 
    cr.points = (SELECT MAX(points) FROM Competitor_Rankings);
""")
results_26=cursor_db.fetchall()
print(tabulate(results_26, headers=[i[0] for i in cursor_db.description],  tablefmt='psql'))

+---------------------+----------+
| name                |   points |
|---------------------+----------|
| Siniakova, Katerina |    10665 |
+---------------------+----------+
