In [1]:
# ===================================================================
# CELL 1: Setup and Configuration (UPDATED)
# ===================================================================

import requests
import pandas as pd
import time
from datetime import datetime
import json

print("="*70)
print("TFT DATA COLLECTION - RIOT GAMES API")
print("="*70)
print(f"Execution Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*70)

# ===================================================================
# CONFIGURATION PARAMETERS
# ===================================================================

# API Configuration
API_KEY = "RGAPI-7e9eecf0-b428-4f16-9cd9-8deed27237da"
REGION = "sea"  # Southeast Asia routing value

# Player Information
GAME_NAME = "Wintermelon"
TAG_LINE = "Ella"

# ‚≠ê YOUR PUUID (Already Retrieved) ‚≠ê
MY_PUUID = "JIByjp0oJk1zzKDopJHvnUsjtXcHvec-B04Kt5l-Tb7rxjzKupQ_R6X1WHHHAqHxTr1RBkE6wmvByQ"

print(f"\n‚úÖ Player Configuration:")
print(f"   Name: {GAME_NAME}#{TAG_LINE}")
print(f"   Region: {REGION}")
print(f"   PUUID: {MY_PUUID}")

# Data Collection Parameters
NUM_MATCHES = 100  # Number of recent matches to fetch
RATE_LIMIT_DELAY = 1.2  # Seconds between API calls (stay under 100 req/2min)

print(f"\nüìä Collection Settings:")
print(f"   Matches to fetch: {NUM_MATCHES}")
print(f"   Rate limit delay: {RATE_LIMIT_DELAY}s per request")
print(f"   Estimated time: ~{NUM_MATCHES * RATE_LIMIT_DELAY / 60:.1f} minutes")

# Verify API key is set
if "PASTE-YOUR" in API_KEY:
    print("\n‚ö†Ô∏è WARNING: API_KEY not updated!")
    print("   Get a fresh key from: https://developer.riotgames.com/")
else:
    print(f"\n‚úÖ API Key configured (length: {len(API_KEY)} chars)")

print("="*70)

TFT DATA COLLECTION - RIOT GAMES API
Execution Time: 2026-02-16 22:32:28

‚úÖ Player Configuration:
   Name: Wintermelon#Ella
   Region: sea
   PUUID: JIByjp0oJk1zzKDopJHvnUsjtXcHvec-B04Kt5l-Tb7rxjzKupQ_R6X1WHHHAqHxTr1RBkE6wmvByQ

üìä Collection Settings:
   Matches to fetch: 100
   Rate limit delay: 1.2s per request
   Estimated time: ~2.0 minutes

‚úÖ API Key configured (length: 42 chars)


In [2]:
# ===================================================================
# CELL 2: Fetch Match ID List
# ===================================================================

print("\n" + "="*70)
print("STEP 1: FETCHING MATCH HISTORY")
print("="*70)

def get_match_ids(puuid, api_key, region, count=100):
    """
    Fetch list of recent match IDs for a player.

    Parameters:
    -----------
    puuid : str
        Player's unique identifier
    api_key : str
        Valid Riot API key
    region : str
        Region routing value
    count : int
        Number of recent matches to fetch (max 100)

    Returns:
    --------
    list
        List of match ID strings
    """
    url = f"https://{region}.api.riotgames.com/tft/match/v1/matches/by-puuid/{puuid}/ids"
    headers = {"X-Riot-Token": api_key}
    params = {"count": count}

    print(f"\nFetching match history for PUUID...")
    print(f"  Endpoint: {url}")
    print(f"  Requesting: {count} matches")

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"\n‚ùå Error {response.status_code}: {response.text}")
        if response.status_code == 401:
            print("\n‚ö†Ô∏è 401 Unauthorized - Your API key expired!")
            print("   Get a new key at: https://developer.riotgames.com/")
            print("   Update API_KEY in Cell 1 and re-run")
        return []

# Fetch match IDs
match_ids = get_match_ids(MY_PUUID, API_KEY, REGION, NUM_MATCHES)

if match_ids:
    print(f"\n‚úÖ Successfully retrieved {len(match_ids)} match IDs")
    print(f"\nSample match IDs:")
    for i, match_id in enumerate(match_ids[:5]):
        print(f"   {i+1}. {match_id}")
    if len(match_ids) > 5:
        print(f"   ...")
        print(f"   {len(match_ids)}. {match_ids[-1]}")
else:
    print("\n‚ùå Failed to retrieve match IDs")
    print("‚ö†Ô∏è Cannot proceed without match IDs")

print("="*70)


STEP 1: FETCHING MATCH HISTORY

Fetching match history for PUUID...
  Endpoint: https://sea.api.riotgames.com/tft/match/v1/matches/by-puuid/JIByjp0oJk1zzKDopJHvnUsjtXcHvec-B04Kt5l-Tb7rxjzKupQ_R6X1WHHHAqHxTr1RBkE6wmvByQ/ids
  Requesting: 100 matches

‚úÖ Successfully retrieved 100 match IDs

Sample match IDs:
   1. SG2_133897261
   2. SG2_133864236
   3. SG2_133849997
   4. SG2_133352026
   5. SG2_133178865
   ...
   100. SG2_128196943


In [3]:
# ===================================================================
# CELL 3: Fetch Detailed Match Data
# ===================================================================

print("\n" + "="*70)
print("STEP 2: FETCHING DETAILED MATCH DATA")
print("="*70)
print(f"This will fetch data for {len(match_ids)} matches...")
print(f"Estimated time: ~{len(match_ids) * RATE_LIMIT_DELAY / 60:.1f} minutes")
print("="*70)

def get_match_details(match_id, api_key, region):
    """
    Fetch detailed match data including all participants.

    Parameters:
    -----------
    match_id : str
        Unique match identifier
    api_key : str
        Valid Riot API key
    region : str
        Region routing value

    Returns:
    --------
    dict or None
        Complete match data including metadata and all 8 participants
    """
    url = f"https://{region}.api.riotgames.com/tft/match/v1/matches/{match_id}"
    headers = {"X-Riot-Token": api_key}

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        return response.json()
    elif response.status_code == 429:
        print(f"\n‚ö†Ô∏è Rate limited. Waiting 2 seconds...")
        time.sleep(2)
        return get_match_details(match_id, api_key, region)  # Retry
    else:
        return None

# Fetch all match data
all_participants = []
successful = 0
failed = 0

start_time = time.time()

for i, match_id in enumerate(match_ids):
    # Progress indicator
    progress_pct = (i + 1) / len(match_ids) * 100
    elapsed = time.time() - start_time
    eta = (elapsed / (i + 1)) * (len(match_ids) - i - 1) if i > 0 else 0

    print(f"Progress: {i+1}/{len(match_ids)} ({progress_pct:.1f}%) | "
          f"Elapsed: {elapsed/60:.1f}min | ETA: {eta/60:.1f}min | "
          f"Success: {successful} | Failed: {failed}   ", end='\r')

    # Fetch match data
    match_data = get_match_details(match_id, API_KEY, REGION)

    if match_data:
        successful += 1

        # Extract all participants (8 players per match)
        for participant in match_data['info']['participants']:
            # Add match-level metadata to each participant
            participant['match_id'] = match_id
            participant['game_datetime'] = match_data['info']['game_datetime']
            participant['game_length'] = match_data['info']['game_length']
            participant['game_version'] = match_data['info']['game_version']
            participant['tft_set_number'] = match_data['info']['tft_set_number']
            participant['tft_game_type'] = match_data['info']['tft_game_type']
            participant['queue_id'] = match_data['info']['queue_id']

            # ‚≠ê CRITICAL: participant already contains 'puuid' from API
            all_participants.append(participant)
    else:
        failed += 1

    # Rate limiting
    time.sleep(RATE_LIMIT_DELAY)

print()  # New line after progress
total_time = time.time() - start_time

print(f"\n" + "="*70)
print("DATA COLLECTION COMPLETE")
print("="*70)
print(f"‚úÖ Successful matches: {successful}/{len(match_ids)}")
print(f"‚ùå Failed matches: {failed}/{len(match_ids)}")
print(f"‚è±Ô∏è  Total time: {total_time/60:.2f} minutes")
print(f"üìä Total participant records: {len(all_participants)}")
print(f"   Expected: {successful * 8} (8 players per match)")

if len(all_participants) == successful * 8:
    print(f"\n‚úÖ Data integrity check: PASSED")
else:
    print(f"\n‚ö†Ô∏è Data integrity check: Some participant data missing")

print("="*70)


STEP 2: FETCHING DETAILED MATCH DATA
This will fetch data for 100 matches...
Estimated time: ~2.0 minutes


DATA COLLECTION COMPLETE
‚úÖ Successful matches: 100/100
‚ùå Failed matches: 0/100
‚è±Ô∏è  Total time: 2.72 minutes
üìä Total participant records: 800
   Expected: 800 (8 players per match)

‚úÖ Data integrity check: PASSED


In [4]:
# ===================================================================
# CELL 4: Create and Validate Dataset
# ===================================================================

print("\n" + "="*70)
print("STEP 3: CREATING DATASET")
print("="*70)

# Convert to DataFrame
df_complete = pd.DataFrame(all_participants)

print(f"\n‚úÖ DataFrame created successfully")

# Add derived columns
df_complete['win'] = df_complete['placement'] == 1
df_complete['top_4'] = df_complete['placement'] <= 4

print(f"‚úÖ Added derived columns: 'win' and 'top_4'")

# Dataset validation
print(f"\n" + "="*70)
print("DATASET STRUCTURE")
print("="*70)
print(f"  Total rows: {len(df_complete)}")
print(f"  Total columns: {len(df_complete.columns)}")
print(f"  Unique matches: {df_complete['match_id'].nunique()}")
print(f"  Unique players (PUUIDs): {df_complete['puuid'].nunique()}")

# Verify PUUID column exists
print(f"\n" + "="*70)
print("PUUID VERIFICATION")
print("="*70)

if 'puuid' in df_complete.columns:
    print(f"‚úÖ 'puuid' column is present in dataset")

    # Count YOUR matches
    my_data = df_complete[df_complete['puuid'] == MY_PUUID]
    print(f"‚úÖ Your matches found: {len(my_data)} rows")

    if len(my_data) > 0:
        print(f"\n" + "="*70)
        print("YOUR PERFORMANCE PREVIEW")
        print("="*70)
        print(f"  Total Games: {len(my_data)}")
        print(f"  Average Placement: {my_data['placement'].mean():.2f}")
        print(f"  Median Placement: {my_data['placement'].median():.1f}")
        print(f"  Win Rate: {my_data['win'].mean()*100:.1f}%")
        print(f"  Top 4 Rate: {my_data['top_4'].mean()*100:.1f}%")
        print(f"  Best Placement: {my_data['placement'].min()}")
        print(f"  Worst Placement: {my_data['placement'].max()}")

        # Placement distribution
        print(f"\n  Placement Distribution:")
        for place in range(1, 9):
            count = (my_data['placement'] == place).sum()
            pct = count / len(my_data) * 100 # Corrected the incomplete line
            print(f"    Place {place}: {count} ({pct:.1f}%)")

else:
    print(f"‚ùå 'puuid' column not found in dataset. Cannot filter your matches.")

print("="*70)


STEP 3: CREATING DATASET

‚úÖ DataFrame created successfully
‚úÖ Added derived columns: 'win' and 'top_4'

DATASET STRUCTURE
  Total rows: 800
  Total columns: 24
  Unique matches: 100
  Unique players (PUUIDs): 604

PUUID VERIFICATION
‚úÖ 'puuid' column is present in dataset
‚úÖ Your matches found: 100 rows

YOUR PERFORMANCE PREVIEW
  Total Games: 100
  Average Placement: 4.39
  Median Placement: 5.0
  Win Rate: 22.0%
  Top 4 Rate: 46.0%
  Best Placement: 1
  Worst Placement: 8

  Placement Distribution:
    Place 1: 22 (22.0%)
    Place 2: 8 (8.0%)
    Place 3: 10 (10.0%)
    Place 4: 6 (6.0%)
    Place 5: 13 (13.0%)
    Place 6: 15 (15.0%)
    Place 7: 16 (16.0%)
    Place 8: 10 (10.0%)


In [5]:
# ===================================================================
# CELL 5: Save Datasets
# ===================================================================

print("\n" + "="*70)
print("STEP 4: SAVING DATASETS")
print("="*70)

# Save complete dataset (all 8 players from all matches)
filename_complete = 'tft_complete_dataset.csv'
df_complete.to_csv(filename_complete, index=False)
print(f"\n‚úÖ Saved: {filename_complete}")
print(f"   Size: {len(df_complete)} rows √ó {len(df_complete.columns)} columns")
print(f"   Contains: All 8 players from {df_complete['match_id'].nunique()} matches")

# Filter and save YOUR data only
df_my_matches = df_complete[df_complete['puuid'] == MY_PUUID].copy()
df_my_matches = df_my_matches.reset_index(drop=True)

filename_filtered = 'tft_my_matches_only.csv'
df_my_matches.to_csv(filename_filtered, index=False)
print(f"\n‚úÖ Saved: {filename_filtered}")
print(f"   Size: {len(df_my_matches)} rows √ó {len(df_my_matches.columns)} columns")
print(f"   Contains: ONLY your match data (filtered by PUUID)")
print(f"   ‚≠ê USE THIS FILE FOR YOUR ANALYSIS ‚≠ê")

# Save metadata for documentation
metadata = {
    'collection_info': {
        'date': datetime.now().isoformat(),
        'execution_time_minutes': round((time.time() - start_time) / 60, 2),
        'total_matches_fetched': successful,
        'failed_matches': failed
    },
    'player_info': {
        'puuid': MY_PUUID,
        'player_name': f"{GAME_NAME}#{TAG_LINE}",
        'region': REGION
    },
    'dataset_structure': {
        'complete_dataset_rows': len(df_complete),
        'my_matches_rows': len(df_my_matches),
        'unique_matches': df_complete['match_id'].nunique(),
        'unique_players': df_complete['puuid'].nunique(),
        'total_columns': len(df_complete.columns)
    },
    'performance_summary': {
        'total_games': len(df_my_matches),
        'average_placement': round(df_my_matches['placement'].mean(), 2),
        'median_placement': df_my_matches['placement'].median(),
        'win_rate': round(df_my_matches['win'].mean() * 100, 1),
        'top_4_rate': round(df_my_matches['top_4'].mean() * 100, 1),
        'best_placement': int(df_my_matches['placement'].min()),
        'worst_placement': int(df_my_matches['placement'].max())
    },
    'columns': list(df_complete.columns),
    'api_details': {
        'api_version': 'v1',
        'rate_limit_delay': RATE_LIMIT_DELAY,
        'endpoint_used': f'https://{REGION}.api.riotgames.com/tft/match/v1/'
    }
}

metadata_filename = 'dataset_metadata.json'
with open(metadata_filename, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"\n‚úÖ Saved: {metadata_filename}")
print(f"   Contains: Collection metadata and performance summary")

# Save your PUUID for reference
puuid_filename = 'my_puuid.txt'
with open(puuid_filename, 'w') as f:
    f.write(f"TFT Player Information\n")
    f.write(f"=" * 50 + "\n")
    f.write(f"Player: {GAME_NAME}#{TAG_LINE}\n")
    f.write(f"Region: {REGION}\n")
    f.write(f"PUUID: {MY_PUUID}\n")
    f.write(f"Retrieved: {datetime.now().isoformat()}\n")
    f.write(f"\nPerformance Summary:\n")
    f.write(f"  Total Games: {len(df_my_matches)}\n")
    f.write(f"  Average Placement: {df_my_matches['placement'].mean():.2f}\n")
    f.write(f"  Win Rate: {df_my_matches['win'].mean()*100:.1f}%\n")
    f.write(f"  Top 4 Rate: {df_my_matches['top_4'].mean()*100:.1f}%\n")
print(f"\n‚úÖ Saved: {puuid_filename}")
print(f"   Contains: Your PUUID and quick reference info")

# Create a summary report
print(f"\n" + "="*70)
print("FILES CREATED - SUMMARY")
print("="*70)
print(f"""
üìÅ Data Files:
   1. {filename_complete}
      - 800 rows (all 8 players √ó 100 matches)
      - Use for: Comparative analysis with other players

   2. {filename_filtered} ‚≠ê PRIMARY FILE
      - 100 rows (only YOUR matches)
      - Use for: Your performance analysis

üìÑ Documentation Files:
   3. {metadata_filename}
      - Collection metadata
      - Performance summary
      - API details

   4. {puuid_filename}
      - Your PUUID reference
      - Quick stats summary

‚úÖ All files saved successfully!
""")

print("="*70)
print("DATA COLLECTION COMPLETE!")
print("="*70)
print(f"""
üéØ Next Steps:
   1. Use '{filename_filtered}' for your analysis
   2. Your performance stats:
      - Win Rate: 22.0% (Expected: 12.5%)
      - Top 4 Rate: 46.0% (Expected: 50.0%)
      - Average Placement: 4.39/8

   3. You're performing ABOVE AVERAGE for wins! üèÜ
""")
print("="*70)


STEP 4: SAVING DATASETS

‚úÖ Saved: tft_complete_dataset.csv
   Size: 800 rows √ó 24 columns
   Contains: All 8 players from 100 matches

‚úÖ Saved: tft_my_matches_only.csv
   Size: 100 rows √ó 24 columns
   Contains: ONLY your match data (filtered by PUUID)
   ‚≠ê USE THIS FILE FOR YOUR ANALYSIS ‚≠ê

‚úÖ Saved: dataset_metadata.json
   Contains: Collection metadata and performance summary

‚úÖ Saved: my_puuid.txt
   Contains: Your PUUID and quick reference info

FILES CREATED - SUMMARY

üìÅ Data Files:
   1. tft_complete_dataset.csv
      - 800 rows (all 8 players √ó 100 matches)
      - Use for: Comparative analysis with other players
   
   2. tft_my_matches_only.csv ‚≠ê PRIMARY FILE
      - 100 rows (only YOUR matches)
      - Use for: Your performance analysis
   
üìÑ Documentation Files:
   3. dataset_metadata.json
      - Collection metadata
      - Performance summary
      - API details
   
   4. my_puuid.txt
      - Your PUUID reference
      - Quick stats summary

‚úÖ All