In [1]:
import requests
import pandas as pd
import json
import numpy as np
import os
from datetime import datetime
import importlib

# SkillCorner Python Client
from skillcorner.client import SkillcornerClient

# Import credentials from credentials.py file
# Use importlib to reload in case credentials were updated
import credentials
importlib.reload(credentials)
from credentials import username, password

# Strip any whitespace that might cause issues
username = username.strip()
password = password.strip()

# Initialize the client
client = SkillcornerClient(username=username, password=password)

# 1.0 Get J1 League Matches for 2024 Season

In [2]:
# Get all available competitions
competitions = client.get_competitions()
competitions_df = pd.DataFrame(competitions)

print("Available competitions:")
print(competitions_df[['name', 'id']].to_string())

# Find J1 League (Japan)
j1_competitions = competitions_df[competitions_df['name'].str.contains('J1|Japan', case=False, na=False)]
print(f"\n\nJ1 League competitions found:")
print(j1_competitions[['name', 'id']])

Available competitions:
                                                   name   id
0              AFC Olympic Qualification Women Playoffs  117
1                                     AFC U17 Asian Cup  493
2                                        AFF Suzuki Cup  115
3                               ASEAN Club Championship  445
4                                             Asian Cup   35
5                               Asian Cup Qualification  521
6                                      CAFA Nations Cup  602
7                                      Champions League  147
8                                  Champions League Two  448
9                        EAFF E-1 Football Championship  182
10                                  Gulf Cup of Nations  225
11                                       U-23 Asian Cup  588
12                                WC Qualification Asia  181
13                           West Asia Championship U23  373
14                             Women's Champions League  567


# Set the J1 League competition ID (update this based on the output above)
# You may need to manually set this after seeing the available competitions
j1_competition_id = j1_competitions.iloc[0]['id'] if len(j1_competitions) > 0 else None

if j1_competition_id:
    print(f"Using competition ID: {j1_competition_id}")
else:
    print("Please manually set j1_competition_id based on the competitions list above")

In [3]:
# Get all matches for J1 League 2024 season
params = {
    'competition': 87,
}

matches = client.get_matches(params=params)
matches_df = pd.DataFrame(matches)

print(f"Found {len(matches_df)} matches for J1 League 2024 season")
print("\nSample matches:")
print(matches_df[['id', 'date_time', 'home_team', 'away_team']].head(10))

Found 380 matches for J1 League 2024 season

Sample matches:
        id             date_time  \
0  1900931  2024-12-08T05:00:00Z   
1  1901679  2024-12-08T05:00:00Z   
2  1901680  2024-12-08T05:00:00Z   
3  1901681  2024-12-08T05:00:00Z   
4  1901682  2024-12-08T05:00:00Z   
5  1901683  2024-12-08T05:00:00Z   
6  1901684  2024-12-08T05:00:00Z   
7  1901685  2024-12-08T05:00:00Z   
8  1901686  2024-12-08T05:00:00Z   
9  1901687  2024-12-08T05:00:00Z   

                                       home_team  \
0          {'id': 1473, 'short_name': 'Sapporo'}   
1  {'id': 1460, 'short_name': 'Kashima Antlers'}   
2       {'id': 1459, 'short_name': 'Urawa Reds'}   
3            {'id': 1465, 'short_name': 'Tokyo'}   
4         {'id': 1469, 'short_name': 'Kawasaki'}   
5          {'id': 1458, 'short_name': 'Marinos'}   
6      {'id': 2660, 'short_name': 'Kyoto Sanga'}   
7      {'id': 1463, 'short_name': 'Gamba Osaka'}   
8       {'id': 929, 'short_name': 'Vissel Kobe'}   
9       {'id': 1470, '

In [4]:
match_dates = pd.to_datetime(matches_df["date_time"], errors="coerce")
print(f"Min date: {match_dates.min()}")
print(f"Max date: {match_dates.max()}")

Min date: 2024-02-23 05:00:00+00:00
Max date: 2024-12-08 05:00:00+00:00


In [None]:
# 2.0 Extract Tracking Data for All Matches

In [5]:
# Create output directory
output_dir = 'data/tracking_j1_2024'
os.makedirs(output_dir, exist_ok=True)

print(f"Will save tracking data to: {output_dir}")
print(f"Total matches to process: {len(matches_df)}")

Will save tracking data to: data/tracking_j1_2024
Total matches to process: 380


In [8]:
# Extract tracking data for all matches
results = []

for idx, match in matches_df.iterrows():
    match_id = match['id']
    home_team = match.get('home_team', {}).get('name', 'Unknown') if isinstance(match.get('home_team'), dict) else 'Unknown'
    away_team = match.get('away_team', {}).get('name', 'Unknown') if isinstance(match.get('away_team'), dict) else 'Unknown'
    
    # Check if file already exists
    filepath = os.path.join(output_dir, f"{match_id}_tracking_extrapolated.jsonl")
    
    if os.path.exists(filepath):
        print(f"\n[{idx+1}/{len(matches_df)}] Match ID: {match_id} - Already exists, skipping")
        results.append({
            'match_id': match_id,
            'home_team': home_team,
            'away_team': away_team,
            'date': match.get('date_time'),
            'status': 'success',
            'filepath': filepath
        })
        continue
    
    print(f"\n[{idx+1}/{len(matches_df)}] Processing Match ID: {match_id}")
    print(f"   {home_team} vs {away_team}")
    
    try:
        # Save tracking data using SkillCorner client
        # The data_version=3 parameter gets the extrapolated tracking data
        client.save_match_tracking_data(
            match_id,
            params={'data_version': 3},
            filepath=filepath
        )
        
        print(f"   ✓ Saved to {filepath}")
        
        results.append({
            'match_id': match_id,
            'home_team': home_team,
            'away_team': away_team,
            'date': match.get('date_time'),
            'status': 'success',
            'filepath': filepath
        })
        
    except Exception as e:
        print(f"   ✗ Error: {str(e)}")
        results.append({
            'match_id': match_id,
            'home_team': home_team,
            'away_team': away_team,
            'date': match.get('date_time'),
            'status': 'failed',
            'error': str(e)
        })

# Create results summary
results_df = pd.DataFrame(results)

print("\n" + "="*70)
print("EXTRACTION COMPLETE")
print("="*70)
print(f"Total matches: {len(results_df)}")
print(f"Successful: {len(results_df[results_df['status'] == 'success'])}")
print(f"Failed: {len(results_df[results_df['status'] == 'failed'])}")



[1/380] Match ID: 1900931 - Already exists, skipping

[2/380] Match ID: 1901679 - Already exists, skipping

[3/380] Match ID: 1901680 - Already exists, skipping

[4/380] Match ID: 1901681 - Already exists, skipping

[5/380] Match ID: 1901682 - Already exists, skipping

[6/380] Match ID: 1901683 - Already exists, skipping

[7/380] Match ID: 1901684 - Already exists, skipping

[8/380] Match ID: 1901685 - Already exists, skipping

[9/380] Match ID: 1901686 - Already exists, skipping

[10/380] Match ID: 1901687 - Already exists, skipping

[11/380] Match ID: 1888503 - Already exists, skipping

[12/380] Match ID: 1886285 - Already exists, skipping

[13/380] Match ID: 1886286 - Already exists, skipping

[14/380] Match ID: 1886287 - Already exists, skipping

[15/380] Match ID: 1886288 - Already exists, skipping

[16/380] Match ID: 1886289 - Already exists, skipping

[17/380] Match ID: 1886290 - Already exists, skipping

[18/380] Match ID: 1886291 - Already exists, skipping

[19/380] Match ID:

KeyboardInterrupt: 

In [None]:
# Save extraction summary
summary_file = 'data/j1_2024_extraction_summary.csv'
results_df.to_csv(summary_file, index=False)
print(f"\n✓ Extraction summary saved to {summary_file}")

# Display results
results_df

### Extracting phases of play data


In [None]:
request_URL= "https://skillcorner.com/api/match/1900931/dynamic_events/phases_of_play/?file_format=csv&ignore_dynamic_events_check=false"


# 3.0 Extract Match Metadata for All Matches

In [None]:
# Extract match metadata (player info, team info, etc.)
metadata_dir = 'data/metadata_j1_2024'
os.makedirs(metadata_dir, exist_ok=True)

metadata_results = []

for idx, match in matches_df.iterrows():
    match_id = match['id']
    
    print(f"[{idx+1}/{len(matches_df)}] Getting metadata for Match ID: {match_id}")
    
    try:
        # Get match metadata
        match_metadata = client.get_match(match_id)
        
        # Save metadata to JSON file
        filepath = os.path.join(metadata_dir, f"{match_id}_metadata.json")
        with open(filepath, 'w') as f:
            json.dump(match_metadata, f, indent=2)
        
        print(f"   ✓ Saved metadata to {filepath}")
        
        metadata_results.append({
            'match_id': match_id,
            'status': 'success',
            'filepath': filepath
        })
        
    except Exception as e:
        print(f"   ✗ Error: {str(e)}")
        metadata_results.append({
            'match_id': match_id,
            'status': 'failed',
            'error': str(e)
        })

print(f"\n✓ Metadata extraction complete")
print(f"Successful: {len([r for r in metadata_results if r['status'] == 'success'])}")
print(f"Failed: {len([r for r in metadata_results if r['status'] == 'failed'])}")

# 4.0 Load and Process Sample Tracking Data

In [None]:
# Load a sample tracking file (following the tutorial pattern)
successful_matches = results_df[results_df['status'] == 'success']

if len(successful_matches) > 0:
    sample_match = successful_matches.iloc[0]
    sample_file = sample_match['filepath']
    match_id = sample_match['match_id']
    
    print(f"Loading sample tracking data from:")
    print(f"  Match ID: {match_id}")
    print(f"  File: {sample_file}")
    
    # Load tracking data (following tutorial pattern)
    raw_data = pd.read_json(sample_file, lines=True)
    
    # Process tracking data (following tutorial)
    tracking_df = pd.json_normalize(
        raw_data.to_dict('records'), 
        'player_data',
        ['frame', 'timestamp', 'period', 'possession', 'ball_data']
    )
    
    # Extract possession info
    tracking_df['possession_player_id'] = tracking_df['possession'].apply(lambda x: x.get('player_id') if isinstance(x, dict) else None)
    tracking_df['possession_group'] = tracking_df['possession'].apply(lambda x: x.get('group') if isinstance(x, dict) else None)
    
    # Expand ball_data
    tracking_df[['ball_x', 'ball_y', 'ball_z', 'is_detected_ball']] = pd.json_normalize(tracking_df.ball_data)
    
    # Clean up
    tracking_df = tracking_df.drop(columns=['possession', 'ball_data'])
    tracking_df['match_id'] = match_id
    
    print(f"\n✓ Loaded {len(tracking_df)} tracking frames")
    print(f"\nSample data:")
    tracking_df.head()
else:
    print("No successful tracking data extractions found")

# 5.0 Process Match Metadata (Player Info)

In [1]:
# Load corresponding metadata file
metadata_file = os.path.join(metadata_dir, f"{match_id}_metadata.json")

if os.path.exists(metadata_file):
    with open(metadata_file, 'r') as f:
        match_metadata = json.load(f)
    
    # Process metadata (following tutorial pattern)
    raw_match_df = pd.json_normalize(match_metadata, max_level=2)
    raw_match_df['home_team_side'] = raw_match_df['home_team_side'].astype(str)
    
    # Extract player information
    players_df = pd.json_normalize(
        raw_match_df.to_dict('records'),
        record_path='players',
        meta=['home_team_score', 'away_team_score', 'date_time',
              'home_team_side',
              'home_team.name', 'home_team.id',
              'away_team.name', 'away_team.id']
    )
    
    # Helper function for time conversion
    def time_to_seconds(time_str):
        if time_str is None:
            return 90 * 60
        h, m, s = map(int, time_str.split(':'))
        return h * 3600 + m * 60 + s
    
    # Process players who actually played
    players_df = players_df[~((players_df.start_time.isna()) & (players_df.end_time.isna()))]
    players_df['total_time'] = players_df['end_time'].apply(time_to_seconds) - players_df['start_time'].apply(time_to_seconds)
    
    # Add flags
    players_df['is_gk'] = players_df['player_role.acronym'] == 'GK'
    players_df['home_away_player'] = np.where(players_df.team_id == players_df['home_team.id'], 'Home', 'Away')
    players_df['team_name'] = np.where(players_df.team_id == players_df['home_team.id'], 
                                        players_df['home_team.name'], 
                                        players_df['away_team.name'])
    
    print(f"✓ Loaded metadata for {len(players_df)} players")
    print(f"\nPlayers overview:")
    players_df[['short_name', 'number', 'team_name', 'player_role.name', 'total_time']].head(10)
else:
    print(f"Metadata file not found: {metadata_file}")

NameError: name 'os' is not defined

# All J1 League 2024 tracking data has been extracted!

The data is now saved in:
- **Tracking data**: `data/tracking_j1_2024/` (one .jsonl file per match)
- **Metadata**: `data/metadata_j1_2024/` (one .json file per match)
- **Summary**: `data/j1_2024_extraction_summary.csv`

You can now process each match's tracking data following the patterns shown in the SkillCorner tutorial.