# Wage Data Exploration

This notebook explores the Premier League wage data collected for the football prediction project.

## Objectives:
- Load wage data from available sources
- Explore data structure and quality
- Identify patterns and insights
- Prepare data for modeling

In [1]:
import pandas as pd
import numpy as np
import json
import os
from pathlib import Path

# Set up paths
project_root = Path().resolve().parent.parent.parent
data_dev_path = project_root / 'data' / 'dev' / 'raw'
data_prod_path = project_root / 'data' / 'prod' / 'raw'

print(f"Project root: {project_root}")
print(f"Dev data path: {data_dev_path}")
print(f"Prod data path: {data_prod_path}")

  from pandas.core import (


Project root: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football
Dev data path: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\dev\raw
Prod data path: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\prod\raw


## Available Wage Data Files

Let's check what wage data files are available in both dev and prod environments.

In [2]:
# Check available wage files in dev environment
dev_wage_files = []
if data_dev_path.exists():
    dev_wage_files = [f for f in os.listdir(data_dev_path) if 'wage' in f.lower()]
    print("Dev environment wage files:")
    for file in dev_wage_files:
        print(f"  - {file}")
else:
    print("Dev data path does not exist")

print()

# Check available wage files in prod environment
prod_wage_files = []
if data_prod_path.exists():
    prod_wage_files = [f for f in os.listdir(data_prod_path) if 'wage' in f.lower()]
    print("Prod environment wage files:")
    for file in prod_wage_files:
        print(f"  - {file}")
else:
    print("Prod data path does not exist")

Dev environment wage files:
  - premier_league_wages_2019_2024.json
  - premier_league_wages_2019_2024_v2.json
  - premier_league_wages_2019_2025_v2.json

Prod environment wage files:
  - premier_league_wages.json
  - premier_league_wages_dataframe.csv
  - premier_league_wages_dataframe.json
  - premier_league_wages_summary.json


## Load Wage Data

Let's load the most comprehensive wage dataset available.

In [3]:
# Function to load JSON data
def load_json_data(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

# Load production wage data (most complete)
wage_data = None
wage_df = None

# Try to load from prod first (most complete dataset)
if 'premier_league_wages_dataframe.json' in prod_wage_files:
    wage_file_path = data_prod_path / 'premier_league_wages_dataframe.json'
    print(f"Loading wage data from: {wage_file_path}")
    wage_data = load_json_data(wage_file_path)
    
    if wage_data:
        wage_df = pd.DataFrame(wage_data)
        print(f"Successfully loaded wage data: {wage_df.shape}")
        
elif 'premier_league_wages.json' in prod_wage_files:
    wage_file_path = data_prod_path / 'premier_league_wages.json'
    print(f"Loading wage data from: {wage_file_path}")
    wage_data = load_json_data(wage_file_path)
    
    if wage_data:
        wage_df = pd.DataFrame(wage_data)
        print(f"Successfully loaded wage data: {wage_df.shape}")
        
# Fallback to dev data if prod not available
elif dev_wage_files:
    for file in dev_wage_files:
        if file.endswith('.json'):
            wage_file_path = data_dev_path / file
            print(f"Loading wage data from dev: {wage_file_path}")
            wage_data = load_json_data(wage_file_path)
            
            if wage_data:
                wage_df = pd.DataFrame(wage_data)
                print(f"Successfully loaded wage data: {wage_df.shape}")
                break

if wage_df is None:
    print("No wage data could be loaded!")
else:
    print(f"Wage data loaded successfully with shape: {wage_df.shape}")

Loading wage data from: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\prod\raw\premier_league_wages_dataframe.json
Successfully loaded wage data: (3341, 12)
Wage data loaded successfully with shape: (3341, 12)


## Data Overview

Let's explore the structure and content of the wage data.

In [4]:
if wage_df is not None:
    print("=== Wage Data Overview ===")
    print(f"Shape: {wage_df.shape}")
    print(f"\nColumns: {list(wage_df.columns)}")
    print(f"\nData types:")
    print(wage_df.dtypes)
    print(f"\nFirst few rows:")
    display(wage_df.head())
else:
    print("No wage data available for analysis")

=== Wage Data Overview ===
Shape: (3341, 12)

Columns: ['team_name', 'season', 'player_name', 'age', 'annual_wages', 'weekly_wages', 'team_id', 'tables_found', 'table_source', 'nationality', 'position', 'notes']

Data types:
team_name       object
season          object
player_name     object
age             object
annual_wages    object
weekly_wages    object
team_id         object
tables_found    object
table_source    object
nationality     object
position        object
notes           object
dtype: object

First few rows:


Unnamed: 0,team_name,season,player_name,age,annual_wages,weekly_wages,team_id,tables_found,table_source,nationality,position,notes
0,Arsenal,2019-2020,Mesut Özil,30,"£ 18,200,000 (€ 21,704,678, $22,117,026)","£ 350,000 (€ 417,398, $425,327)",18bb7c10,wages,wages,de GER,MF,
1,Arsenal,2019-2020,Pierre-Emerick Aubameyang,30,"£ 13,000,000 (€ 15,503,341, $15,797,876)","£ 250,000 (€ 298,141, $303,805)",18bb7c10,wages,wages,ga GAB,FW,
2,Arsenal,2019-2020,Alexandre Lacazette,28,"£ 9,470,000 (€ 11,293,588, $11,508,145)","£ 182,115 (€ 217,184, $221,310)",18bb7c10,wages,wages,fr FRA,FW,
3,Arsenal,2019-2020,Héctor Bellerín,24,"£ 5,720,000 (€ 6,821,470, $6,951,065)","£ 110,000 (€ 131,182, $133,674)",18bb7c10,wages,wages,es ESP,"DF,MF",
4,Arsenal,2019-2020,David Luiz,32,"£ 5,250,000 (€ 6,260,965, $6,379,911)","£ 100,962 (€ 120,403, $122,691)",18bb7c10,wages,wages,br BRA,DF,


In [5]:
if wage_df is not None:
    print("=== Missing Values ===")
    missing_info = wage_df.isnull().sum()
    missing_pct = (missing_info / len(wage_df)) * 100
    
    missing_df = pd.DataFrame({
        'Missing Count': missing_info,
        'Missing Percentage': missing_pct
    })
    
    print(missing_df[missing_df['Missing Count'] > 0])
    
    print("\n=== Basic Statistics ===")
    display(wage_df.describe(include='all'))

=== Missing Values ===
              Missing Count  Missing Percentage
annual_wages             30            0.897935
weekly_wages             30            0.897935
nationality             179            5.357677
position                123            3.681532
notes                  2221           66.477103

=== Basic Statistics ===


Unnamed: 0,team_name,season,player_name,age,annual_wages,weekly_wages,team_id,tables_found,table_source,nationality,position,notes
count,3341,3341,3341,3341,3311,3311,3341,3341,3341,3162,3218,1120
unique,27,6,1327,25,502,501,27,1,1,86,19,1
top,Chelsea,2024-2025,Ben Davies,24,"£ 2,600,000 (€ 3,100,668, $3,159,575)","£ 50,000 (€ 59,628, $60,761)",cff3d9bb,wages,wages,eng ENG,DF,Unverified estimation
freq,180,579,7,282,72,72,180,3341,3341,1050,937,1120


## Data Quality Assessment

Let's assess the quality and completeness of the wage data.

In [6]:
if wage_df is not None:
    print("=== Unique Values ===")
    
    for col in wage_df.columns:
        unique_count = wage_df[col].nunique()
        print(f"{col}: {unique_count} unique values")
        
        # Show sample values for categorical columns
        if unique_count < 20 and wage_df[col].dtype == 'object':
            print(f"  Sample values: {list(wage_df[col].unique()[:10])}")
        elif unique_count < 20:
            print(f"  Values: {sorted(wage_df[col].unique())}")
        print()

=== Unique Values ===
team_name: 27 unique values

season: 6 unique values
  Sample values: ['2019-2020', '2020-2021', '2021-2022', '2022-2023', '2023-2024', '2024-2025']

player_name: 1327 unique values

age: 25 unique values

annual_wages: 502 unique values

weekly_wages: 501 unique values

team_id: 27 unique values

tables_found: 1 unique values
  Sample values: ['wages']

table_source: 1 unique values
  Sample values: ['wages']

nationality: 86 unique values

position: 19 unique values
  Sample values: ['MF', 'FW', 'DF,MF', 'DF', 'GK', 'MF,FW', 'DF,FW', None, 'MF,DF', 'FW,MF']

notes: 1 unique values
  Sample values: [None, 'Unverified estimation']



In [7]:
if wage_df is not None:
    print("=== Data Sample ===")
    # Show a random sample of the data
    sample_size = min(10, len(wage_df))
    display(wage_df.sample(sample_size))

=== Data Sample ===


Unnamed: 0,team_name,season,player_name,age,annual_wages,weekly_wages,team_id,tables_found,table_source,nationality,position,notes
2341,Tottenham,2020-2021,Ben Davies,27,"£ 3,120,000 (€ 3,720,802, $3,791,490)","£ 60,000 (€ 71,554, $72,913)",361ca564,wages,wages,wls WAL,DF,
3295,Luton Town,2023-2024,Issa Kaboré,22,"£ 1,040,000 (€ 1,231,034, $1,322,920)","£ 20,000 (€ 23,674, $25,441)",e297cd13,wages,wages,bf BFA,DF,Unverified estimation
3283,Nott'ham Forest,2024-2025,Zach Abbott,18,"£ 156,000 (€ 184,821, $190,515)","£ 3,000 (€ 3,554, $3,664)",e4a775cb,wages,wages,,CB,Unverified estimation
923,Crystal Palace,2020-2021,Scott Dann,33,"£ 3,120,000 (€ 3,720,802, $3,791,490)","£ 60,000 (€ 71,554, $72,913)",47c64c55,wages,wages,eng ENG,DF,
3039,West Brom,2020-2021,Kamil Grosicki,32,"£ 1,300,000 (€ 1,550,334, $1,579,788)","£ 25,000 (€ 29,814, $30,381)",60c6b05f,wages,wages,pl POL,"MF,FW",
2693,Wolves,2019-2020,Harry Burgoyne,22,"£ 130,000 (€ 155,033, $157,979)","£ 2,500 (€ 2,981, $3,038)",8cec06e1,wages,wages,eng ENG,,Unverified estimation
1676,Manchester Utd,2019-2020,Jesse Lingard,26,"£ 3,900,000 (€ 4,651,002, $4,739,363)","£ 75,000 (€ 89,442, $91,142)",19538871,wages,wages,eng ENG,"MF,FW",
2512,Watford,2021-2022,Oghenekaro Etebo,25,"£ 860,000 (€ 1,025,606, $1,045,090)","£ 16,538 (€ 19,723, $20,098)",2abfe087,wages,wages,ng NGA,MF,Unverified estimation
2951,Leeds United,2020-2021,Raphinha,23,"£ 3,302,000 (€ 3,937,849, $4,012,660)","£ 63,500 (€ 75,728, $77,167)",5bfb9659,wages,wages,br BRA,"MF,FW",
225,Aston Villa,2021-2022,Jacob Ramsey,20,"£ 390,000 (€ 465,100, $473,936)","£ 7,500 (€ 8,944, $9,114)",8602292d,wages,wages,eng ENG,MF,


## Next Steps

Based on the wage data exploration, we can now:

1. **Data Cleaning**: Address any missing values, inconsistencies, or formatting issues
2. **Feature Engineering**: Create derived features from wage data
3. **Data Integration**: Combine wage data with match results and team performance
4. **Analysis**: Explore relationships between wages and team performance

In [8]:
import re

def separate_currencies(wage_string):
    """
    Separates mixed currency wage string into individual currency amounts.
    
    Format expected: "£ amount (€ amount, $amount)"
    Example: "£ 18,200,000 (€ 21,704,678, $22,117,026)"
    
    Returns:
        dict: {'pounds': float, 'euros': float, 'dollars': float}
    """
    if pd.isna(wage_string) or wage_string == '':
        return {'pounds': None, 'euros': None, 'dollars': None}
    
    try:
        # Remove extra spaces and normalize
        wage_string = str(wage_string).strip()
        
        # Pattern to extract currencies
        # £ amount (€ amount, $amount)
        pattern = r'£\s?([\d,]+)\s*\(€\s?([\d,]+),\s*\$\s?([\d,]+)\)'
        match = re.search(pattern, wage_string)
        
        if match:
            pounds_str, euros_str, dollars_str = match.groups()
            
            # Convert to float by removing commas
            pounds = float(pounds_str.replace(',', ''))
            euros = float(euros_str.replace(',', ''))
            dollars = float(dollars_str.replace(',', ''))
            
            return {
                'pounds': pounds,
                'euros': euros, 
                'dollars': dollars
            }
        else:
            # Try alternative patterns or handle edge cases
            print(f"Could not parse: {wage_string}")
            return {'pounds': None, 'euros': None, 'dollars': None}
            
    except Exception as e:
        print(f"Error parsing '{wage_string}': {e}")
        return {'pounds': None, 'euros': None, 'dollars': None}

# Test the function with sample data
if wage_df is not None and 'annual_wages' in wage_df.columns:
    print("=== Testing Currency Separation Function ===")
    
    # Get sample wage strings to test
    sample_wages = wage_df['annual_wages'].dropna().head(5).tolist()
    
    for i, wage_str in enumerate(sample_wages):
        print(f"\nSample {i+1}: {wage_str}")
        result = separate_currencies(wage_str)
        print(f"  Result: {result}")
else:
    print("No wage data available for testing")

=== Testing Currency Separation Function ===

Sample 1: £ 18,200,000 (€ 21,704,678, $22,117,026)
  Result: {'pounds': 18200000.0, 'euros': 21704678.0, 'dollars': 22117026.0}

Sample 2: £ 13,000,000 (€ 15,503,341, $15,797,876)
  Result: {'pounds': 13000000.0, 'euros': 15503341.0, 'dollars': 15797876.0}

Sample 3: £ 9,470,000 (€ 11,293,588, $11,508,145)
  Result: {'pounds': 9470000.0, 'euros': 11293588.0, 'dollars': 11508145.0}

Sample 4: £ 5,720,000 (€ 6,821,470, $6,951,065)
  Result: {'pounds': 5720000.0, 'euros': 6821470.0, 'dollars': 6951065.0}

Sample 5: £ 5,250,000 (€ 6,260,965, $6,379,911)
  Result: {'pounds': 5250000.0, 'euros': 6260965.0, 'dollars': 6379911.0}


## Apply Currency Separation to DataFrame

Now let's apply the function to create separate currency columns in our dataframe."

In [9]:
if wage_df is not None and 'annual_wages' in wage_df.columns:
    print("=== Applying Currency Separation to DataFrame ===")
    
    # Create a copy to work with
    wage_df_clean = wage_df.copy()
    
    # Apply the separation function to all rows
    print("Processing currency separation...")
    currency_data = wage_df_clean['annual_wages'].apply(separate_currencies)
    
    # Convert the list of dictionaries to separate columns
    currency_df = pd.DataFrame(currency_data.tolist())
    
    # Add the new columns to the main dataframe
    wage_df_clean['annual_wages_pounds'] = currency_df['pounds']
    wage_df_clean['annual_wages_euros'] = currency_df['euros']
    wage_df_clean['annual_wages_dollars'] = currency_df['dollars']
    
    print(f"\nOriginal dataframe shape: {wage_df.shape}")
    print(f"Cleaned dataframe shape: {wage_df_clean.shape}")
    
    # Show the new columns
    print("\n=== New Currency Columns ===")
    new_cols = ['annual_wages', 'annual_wages_pounds', 'annual_wages_euros', 'annual_wages_dollars']
    display(wage_df_clean[new_cols].head(10))
    
    # Check for any parsing issues
    null_pounds = wage_df_clean['annual_wages_pounds'].isnull().sum()
    null_euros = wage_df_clean['annual_wages_euros'].isnull().sum()
    null_dollars = wage_df_clean['annual_wages_dollars'].isnull().sum()
    
    print(f"\n=== Parsing Results ===")
    print(f"Total rows: {len(wage_df_clean)}")
    print(f"Null pounds: {null_pounds}")
    print(f"Null euros: {null_euros}")
    print(f"Null dollars: {null_dollars}")
    
    if null_pounds > 0 or null_euros > 0 or null_dollars > 0:
        print("\nRows with parsing issues:")
        issues_mask = (wage_df_clean['annual_wages_pounds'].isnull() | 
                      wage_df_clean['annual_wages_euros'].isnull() | 
                      wage_df_clean['annual_wages_dollars'].isnull())
        display(wage_df_clean[issues_mask][['annual_wages'] + new_cols[1:]].head())
    
    print("\n=== Summary Statistics for Currency Columns ===")
    currency_stats = wage_df_clean[['annual_wages_pounds', 'annual_wages_euros', 'annual_wages_dollars']].describe()
    display(currency_stats)
    
else:
    print("No wage data available for currency separation")

=== Applying Currency Separation to DataFrame ===
Processing currency separation...

Original dataframe shape: (3341, 12)
Cleaned dataframe shape: (3341, 15)

=== New Currency Columns ===


Unnamed: 0,annual_wages,annual_wages_pounds,annual_wages_euros,annual_wages_dollars
0,"£ 18,200,000 (€ 21,704,678, $22,117,026)",18200000.0,21704678.0,22117026.0
1,"£ 13,000,000 (€ 15,503,341, $15,797,876)",13000000.0,15503341.0,15797876.0
2,"£ 9,470,000 (€ 11,293,588, $11,508,145)",9470000.0,11293588.0,11508145.0
3,"£ 5,720,000 (€ 6,821,470, $6,951,065)",5720000.0,6821470.0,6951065.0
4,"£ 5,250,000 (€ 6,260,965, $6,379,911)",5250000.0,6260965.0,6379911.0
5,"£ 5,200,000 (€ 6,201,336, $6,319,150)",5200000.0,6201336.0,6319150.0
6,"£ 5,200,000 (€ 6,201,336, $6,319,150)",5200000.0,6201336.0,6319150.0
7,"£ 5,200,000 (€ 6,201,336, $6,319,150)",5200000.0,6201336.0,6319150.0
8,"£ 5,200,000 (€ 6,201,336, $6,319,150)",5200000.0,6201336.0,6319150.0
9,"£ 4,780,000 (€ 5,700,459, $5,808,757)",4780000.0,5700459.0,5808757.0



=== Parsing Results ===
Total rows: 3341
Null pounds: 30
Null euros: 30
Null dollars: 30

Rows with parsing issues:


Unnamed: 0,annual_wages,annual_wages_pounds,annual_wages_euros,annual_wages_dollars
150,,,,
282,,,,
310,,,,
311,,,,
312,,,,



=== Summary Statistics for Currency Columns ===


Unnamed: 0,annual_wages_pounds,annual_wages_euros,annual_wages_dollars
count,3311.0,3311.0,3311.0
mean,3156124.0,3735860.0,3905733.0
std,3166710.0,3745138.0,3927922.0
min,20000.0,23851.0,24304.0
25%,1040000.0,1240267.0,1322920.0
50%,2340000.0,2719949.0,2843618.0
75%,4160000.0,4868452.0,5055320.0
max,31200000.0,37208020.0,37914900.0


In [None]:
wage_df_clean.

Unnamed: 0,team_name,season,player_name,age,annual_wages,weekly_wages,team_id,tables_found,table_source,nationality,position,notes,annual_wages_pounds,annual_wages_euros,annual_wages_dollars
0,Arsenal,2019-2020,Mesut Özil,30,"£ 18,200,000 (€ 21,704,678, $22,117,026)","£ 350,000 (€ 417,398, $425,327)",18bb7c10,wages,wages,de GER,MF,,18200000.0,21704678.0,22117026.0
1,Arsenal,2019-2020,Pierre-Emerick Aubameyang,30,"£ 13,000,000 (€ 15,503,341, $15,797,876)","£ 250,000 (€ 298,141, $303,805)",18bb7c10,wages,wages,ga GAB,FW,,13000000.0,15503341.0,15797876.0
2,Arsenal,2019-2020,Alexandre Lacazette,28,"£ 9,470,000 (€ 11,293,588, $11,508,145)","£ 182,115 (€ 217,184, $221,310)",18bb7c10,wages,wages,fr FRA,FW,,9470000.0,11293588.0,11508145.0
3,Arsenal,2019-2020,Héctor Bellerín,24,"£ 5,720,000 (€ 6,821,470, $6,951,065)","£ 110,000 (€ 131,182, $133,674)",18bb7c10,wages,wages,es ESP,"DF,MF",,5720000.0,6821470.0,6951065.0
4,Arsenal,2019-2020,David Luiz,32,"£ 5,250,000 (€ 6,260,965, $6,379,911)","£ 100,962 (€ 120,403, $122,691)",18bb7c10,wages,wages,br BRA,DF,,5250000.0,6260965.0,6379911.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3336,Ipswich Town,2024-2025,Nathan Broadhead,26,"£ 312,000 (€ 369,641, $381,031)","£ 6,000 (€ 7,108, $7,328)",b74092de,wages,wages,wls WAL,"MF,FW",Unverified estimation,312000.0,369641.0,381031.0
3337,Ipswich Town,2024-2025,Omari Hutchinson,20,"£ 312,000 (€ 369,641, $381,031)","£ 6,000 (€ 7,108, $7,328)",b74092de,wages,wages,,RW,Unverified estimation,312000.0,369641.0,381031.0
3338,Ipswich Town,2024-2025,Cieran Slicker,22,"£ 156,000 (€ 184,821, $190,515)","£ 3,000 (€ 3,554, $3,664)",b74092de,wages,wages,,GK,Unverified estimation,156000.0,184821.0,190515.0
3339,Ipswich Town,2024-2025,Elkan Baggott,22,"£ 78,000 (€ 91,585, $102,108)","£ 1,500 (€ 1,761, $1,964)",b74092de,wages,wages,,CB,Unverified estimation,78000.0,91585.0,102108.0


## Save Processed Wage Data

Now let's save the cleaned wage data to the processed directory in multiple formats for downstream use.

In [11]:
import os
from pathlib import Path

# Define output directory
processed_dir = project_root / 'data' / 'prod' / 'processed'
processed_dir.mkdir(parents=True, exist_ok=True)

print(f"Processed data directory: {processed_dir}")

if wage_df_clean is not None:
    print(f"Saving cleaned wage data with shape: {wage_df_clean.shape}")
    
    # Save in multiple formats following the pattern of existing processed data
    base_filename = "premier_league_wages_processed"
    
    # 1. CSV format
    csv_path = processed_dir / f"{base_filename}.csv"
    wage_df_clean.to_csv(csv_path, index=False)
    print(f"✓ Saved CSV: {csv_path}")
    
    # 2. JSON format
    json_path = processed_dir / f"{base_filename}.json"
    wage_df_clean.to_json(json_path, orient='records', indent=2)
    print(f"✓ Saved JSON: {json_path}")
    
    # 3. Parquet format (efficient for large datasets)
    parquet_path = processed_dir / f"{base_filename}.parquet"
    wage_df_clean.to_parquet(parquet_path, index=False)
    print(f"✓ Saved Parquet: {parquet_path}")
    
    # 4. Pickle format (preserves data types)
    pkl_path = processed_dir / f"{base_filename}.pkl"
    wage_df_clean.to_pickle(pkl_path)
    print(f"✓ Saved Pickle: {pkl_path}")
    
    # 5. Excel format
    xlsx_path = processed_dir / f"{base_filename}.xlsx"
    wage_df_clean.to_excel(xlsx_path, index=False, engine='openpyxl')
    print(f"✓ Saved Excel: {xlsx_path}")
    
    print(f"\n📊 Successfully saved wage data in 5 formats to: {processed_dir}")
    print(f"   Total records: {len(wage_df_clean):,}")
    print(f"   Columns: {len(wage_df_clean.columns)}")
    
else:
    print("❌ No wage data available to save")

Processed data directory: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\prod\processed
Saving cleaned wage data with shape: (3341, 15)
✓ Saved CSV: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\prod\processed\premier_league_wages_processed.csv
✓ Saved JSON: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\prod\processed\premier_league_wages_processed.json
✓ Saved Parquet: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\prod\processed\premier_league_wages_processed.parquet
✓ Saved Pickle: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\prod\processed\premier_league_wages_processed.pkl
✓ Saved Excel: C:\Users\50230\OneDrive\Escritorio\Proyectos y trabajos\Personales\Pronósticos Football\data\prod\processed\premier_league_wages_processed.xlsx

📊 Successfully saved 