In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
os.chdir("..")

In [None]:
from src.soccer_agent import SoccerAgent

In [None]:
#from dotenv import load_dotenv
#load_dotenv()
soccer_agent = SoccerAgent()


In [None]:
soccer_agent.analyze_player_general_performance("Lionel Messi")

In [None]:
from src.data_collector import DataAggregator
data_aggregator = DataAggregator()
list(int(i) for i in data_aggregator.statsbomb.get_competitions()["competition_id"].tolist() )
list(int(i) for i in data_aggregator.statsbomb.get_competitions()["competition_id"].unique() )

In [None]:
events, related, freeze, tactics = data_aggregator.statsbomb.get_events(3895302)


In [None]:
data_aggregator.statsbomb.get_player_career_data("Lionel Messi")

In [None]:
data_aggregator.statsbomb.get_competitions()["season_id"].unique()

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)


events, related, freeze, tactics = data_aggregator.statsbomb.get_events(3895302)
events.head()

df_example = data_aggregator.statsbomb.get_player_stats(3895302, "Nick Woltemade")

In [None]:
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple

from typing import Dict, List, Tuple
from collections import defaultdict

def calculate_player_statistics(df: pd.DataFrame) -> Dict:
    """
    Calculate comprehensive player statistics from StatsBomb events dataframe
    
    Args:
        df: DataFrame with player events from StatsBomb
        
    Returns:
        Dictionary with calculated statistics
    """
    stats = {}
    
    # Add player and team information
    if 'player_name' in df.columns and not df.empty:
        stats['player_name'] = df['player_name'].iloc[0]
    if 'team_name' in df.columns and not df.empty:
        stats['team_name'] = df['team_name'].iloc[0]
    
    # 1. Pass Statistics
    pass_events = df[df['type_name'] == 'Pass']
    if not pass_events.empty:
        stats['total_passes'] = len(pass_events)
        stats['successful_passes'] = len(pass_events[pass_events['outcome_name'].isna()])
        stats['intercepted_passes'] = len(pass_events[pass_events['outcome_name'] == 'Incomplete'])
        stats['pass_accuracy'] = stats['successful_passes'] / stats['total_passes'] if stats['total_passes'] > 0 else 0
        
        # Average pass distance
        if 'pass_length' in pass_events.columns:
            stats['avg_pass_distance'] = pass_events['pass_length'].mean()
        
        # Favorite pass recipient
        if 'pass_recipient_name' in pass_events.columns:
            recipient_counts = pass_events['pass_recipient_name'].value_counts()
            if not recipient_counts.empty:
                stats['favorite_pass_recipient'] = recipient_counts.index[0]
                stats['passes_to_favorite_recipient'] = recipient_counts.iloc[0]
    
    # 2. Shot Statistics
    shot_events = df[df['type_name'] == 'Shot']
    if not shot_events.empty:
        stats['total_shots'] = len(shot_events)
        stats['shots_on_target'] = len(shot_events[shot_events['outcome_name'] == 'Goal'])
        stats['shot_accuracy'] = stats['shots_on_target'] / stats['total_shots'] if stats['total_shots'] > 0 else 0
    
    # 3. Ball Carry Statistics
    carry_events = df[df['type_name'] == 'Carry']
    if not carry_events.empty:
        stats['total_carries'] = len(carry_events)
        
        # Calculate carry distance using coordinates
        if 'x' in carry_events.columns and 'y' in carry_events.columns and 'end_x' in carry_events.columns and 'end_y' in carry_events.columns:
            carry_distances = []
            for _, row in carry_events.iterrows():
                if pd.notna(row['x']) and pd.notna(row['y']) and pd.notna(row['end_x']) and pd.notna(row['end_y']):
                    distance = np.sqrt((row['end_x'] - row['x'])**2 + (row['end_y'] - row['y'])**2)
                    carry_distances.append(distance)
            
            if carry_distances:
                stats['avg_carry_distance'] = np.mean(carry_distances)
                stats['total_carry_distance'] = np.sum(carry_distances)
        
        # Time carrying the ball (using duration field)
        if 'duration' in carry_events.columns:
            total_carry_time = carry_events['duration'].sum()
            stats['total_carry_time_seconds'] = total_carry_time
            stats['avg_carry_time_seconds'] = total_carry_time / len(carry_events) if len(carry_events) > 0 else 0
    
    # 4. Body Part Usage for Passes
    if not pass_events.empty and 'body_part_name' in pass_events.columns:
        body_part_counts = pass_events['body_part_name'].value_counts()
        if not body_part_counts.empty:
            stats['favorite_body_part_for_pass'] = body_part_counts.index[0]
            stats['passes_with_favorite_body_part'] = body_part_counts.iloc[0]
            stats['body_part_breakdown'] = body_part_counts.to_dict()
    
    # 5. Position Analysis
    if 'x' in df.columns and 'y' in df.columns:
        stats['avg_position_x'] = df['x'].mean()
        stats['avg_position_y'] = df['y'].mean()
        stats['position_range_x'] = df['x'].max() - df['x'].min()
        stats['position_range_y'] = df['y'].max() - df['y'].min()
    
    # 6. Time-based Statistics (FIXED for match timestamp)
    if 'timestamp' in df.columns:
        df_copy = df.copy()
        
        def timestamp_to_seconds(timestamp_str):
            try:
                time_parts = str(timestamp_str).split(':')
                if len(time_parts) >= 3:
                    hours = int(time_parts[0])
                    minutes = int(time_parts[1])
                    seconds_part = time_parts[2]
                    
                    if '.' in seconds_part:
                        seconds, microseconds = seconds_part.split('.')
                        seconds = int(seconds)
                        microseconds = int(microseconds)
                    else:
                        seconds = int(seconds_part)
                        microseconds = 0
                    
                    total_seconds = hours * 3600 + minutes * 60 + seconds + microseconds / 1000000
                    return total_seconds
                return 0
            except:
                return 0
        
        df_copy['time_seconds'] = df_copy['timestamp'].apply(timestamp_to_seconds)
        
        if len(df_copy) > 1:
            stats['first_action_time'] = df_copy['time_seconds'].min()
            stats['last_action_time'] = df_copy['time_seconds'].max()
            stats['total_playing_time_seconds'] = stats['last_action_time'] - stats['first_action_time']
            stats['actions_per_minute'] = len(df) / (stats['total_playing_time_seconds'] / 60) if stats['total_playing_time_seconds'] > 0 else 0
        else:
            stats['total_playing_time_seconds'] = 0
            stats['actions_per_minute'] = 0
    
    # 7. Pressure and Defensive Actions
    pressure_events = df[df['type_name'] == 'Pressure']
    if not pressure_events.empty:
        stats['total_pressures'] = len(pressure_events)
    
    # 8. Substitution Information
    substitution_events = df[df['type_name'] == 'Substitution']
    if not substitution_events.empty:
        stats['was_substituted'] = True
        if 'substitution_replacement_name' in substitution_events.columns:
            stats['replaced_by'] = substitution_events['substitution_replacement_name'].iloc[0]
    else:
        stats['was_substituted'] = False
    
    # 9. Formation and Tactical Information
    if 'tactics_formation' in df.columns:
        formation = df['tactics_formation'].iloc[0] if not df.empty else None
        stats['formation'] = formation
    
    # 10. Match Context
    if 'match_id' in df.columns:
        stats['match_id'] = df['match_id'].iloc[0]
    if 'position_name' in df.columns:
        stats['position'] = df['position_name'].iloc[0]
    
    return stats

def calculate_period_statistics(df: pd.DataFrame) -> Dict:
    """
    Calculate statistics per period (1st half = 45 min, 2nd half = 45 min, etc.)
    Each period has its own timestamp starting from 00:00:00
    """
    period_stats = {}
    
    # Add player and team information to period stats
    if 'player_name' in df.columns and not df.empty:
        period_stats['player_name'] = df['player_name'].iloc[0]
    if 'team_name' in df.columns and not df.empty:
        period_stats['team_name'] = df['team_name'].iloc[0]
    
    if 'period' not in df.columns:
        return period_stats
    
    for period in df['period'].unique():
        period_data = df[df['period'] == period]
        period_stats[f'period_{period}'] = calculate_player_statistics(period_data)
        
        # Add period-specific time analysis
        if 'timestamp' in period_data.columns and not period_data.empty:
            period_stats[f'period_{period}']['period_duration_minutes'] = 45  # Each period is 45 minutes
            
            # Convert timestamp to seconds within the period
            def period_timestamp_to_seconds(timestamp_str):
                try:
                    time_parts = str(timestamp_str).split(':')
                    if len(time_parts) >= 3:
                        minutes = int(time_parts[1])
                        seconds_part = time_parts[2]
                        
                        if '.' in seconds_part:
                            seconds, microseconds = seconds_part.split('.')
                            seconds = int(seconds)
                            microseconds = int(microseconds)
                        else:
                            seconds = int(seconds_part)
                            microseconds = 0
                        
                        total_seconds = minutes * 60 + seconds + microseconds / 1000000
                        return total_seconds
                    return 0
                except:
                    return 0
            
            period_data_copy = period_data.copy()
            period_data_copy['time_seconds'] = period_data_copy['timestamp'].apply(period_timestamp_to_seconds)
            
            if len(period_data_copy) > 1:
                period_stats[f'period_{period}']['period_first_action_minute'] = period_data_copy['time_seconds'].min() / 60
                period_stats[f'period_{period}']['period_last_action_minute'] = period_data_copy['time_seconds'].max() / 60
                period_stats[f'period_{period}']['period_playing_time_minutes'] = (period_data_copy['time_seconds'].max() - period_data_copy['time_seconds'].min()) / 60
                period_stats[f'period_{period}']['period_actions_per_minute'] = len(period_data) / period_stats[f'period_{period}']['period_playing_time_minutes'] if period_stats[f'period_{period}']['period_playing_time_minutes'] > 0 else 0
            else:
                period_stats[f'period_{period}']['period_playing_time_minutes'] = 0
                period_stats[f'period_{period}']['period_actions_per_minute'] = 0
    
    return period_stats

def calculate_advanced_metrics(df: pd.DataFrame) -> Dict:
    """
    Calculate advanced metrics and insights for football analysis
    """
    advanced_stats = {}
    
    # Add player and team information
    if 'player_name' in df.columns and not df.empty:
        advanced_stats['player_name'] = df['player_name'].iloc[0]
    if 'team_name' in df.columns and not df.empty:
        advanced_stats['team_name'] = df['team_name'].iloc[0]
    
    # 1. Pass Analysis by Distance
    pass_events = df[df['type_name'] == 'Pass']
    if not pass_events.empty and 'pass_length' in pass_events.columns:
        # Short passes (0-15m)
        short_passes = pass_events[pass_events['pass_length'] <= 15]
        if not short_passes.empty:
            short_success = len(short_passes[short_passes['outcome_name'].isna()])
            advanced_stats['short_pass_accuracy'] = short_success / len(short_passes)
            advanced_stats['short_passes_count'] = len(short_passes)
        
        # Medium passes (15-30m)
        medium_passes = pass_events[(pass_events['pass_length'] > 15) & (pass_events['pass_length'] <= 30)]
        if not medium_passes.empty:
            medium_success = len(medium_passes[medium_passes['outcome_name'].isna()])
            advanced_stats['medium_pass_accuracy'] = medium_success / len(medium_passes)
            advanced_stats['medium_passes_count'] = len(medium_passes)
        
        # Long passes (>30m)
        long_passes = pass_events[pass_events['pass_length'] > 30]
        if not long_passes.empty:
            long_success = len(long_passes[long_passes['outcome_name'].isna()])
            advanced_stats['long_pass_accuracy'] = long_success / len(long_passes)
            advanced_stats['long_passes_count'] = len(long_passes)
    
    # 2. Progressive Actions Analysis
    if 'x' in df.columns and 'y' in df.columns and 'end_x' in df.columns and 'end_y' in df.columns:
        progressive_actions = 0
        total_distance_gained = 0
        
        for _, row in df.iterrows():
            if pd.notna(row['x']) and pd.notna(row['y']) and pd.notna(row['end_x']) and pd.notna(row['end_y']):
                # Calculate distance gained towards opponent's goal
                distance_gained = row['end_x'] - row['x']
                if distance_gained > 5:  # Progressive if gained more than 5m forward
                    progressive_actions += 1
                    total_distance_gained += distance_gained
        
        advanced_stats['progressive_actions'] = progressive_actions
        advanced_stats['total_distance_gained'] = total_distance_gained
        advanced_stats['avg_distance_gained_per_action'] = total_distance_gained / len(df) if len(df) > 0 else 0
    
    # 3. Pressure Effectiveness
    pressure_events = df[df['type_name'] == 'Pressure']
    if not pressure_events.empty:
        # Count successful pressures (next event is turnover)
        successful_pressures = 0
        for i in range(len(pressure_events) - 1):
            if pressure_events.iloc[i+1]['type_name'] in ['Ball Recovery', 'Interception']:
                successful_pressures += 1
        advanced_stats['pressure_success_rate'] = successful_pressures / len(pressure_events) if len(pressure_events) > 0 else 0
        advanced_stats['successful_pressures'] = successful_pressures
    
    # 4. Ball Recovery and Interceptions
    recovery_events = df[df['type_name'].isin(['Ball Recovery', 'Interception'])]
    advanced_stats['ball_recoveries'] = len(recovery_events)
    
    # 5. Dribble Analysis
    dribble_events = df[df['type_name'] == 'Dribble']
    if not dribble_events.empty:
        successful_dribbles = len(dribble_events[dribble_events['outcome_name'] == 'Complete'])
        advanced_stats['dribble_success_rate'] = successful_dribbles / len(dribble_events)
        advanced_stats['successful_dribbles'] = successful_dribbles
        advanced_stats['total_dribbles'] = len(dribble_events)
    
    # 6. Foul Analysis
    foul_events = df[df['type_name'] == 'Foul Committed']
    advanced_stats['fouls_committed'] = len(foul_events)
    
    foul_won_events = df[df['type_name'] == 'Foul Won']
    advanced_stats['fouls_won'] = len(foul_won_events)
    
    # 7. Aerial Duels
    aerial_events = df[df['type_name'] == 'Duel']
    if not aerial_events.empty and 'duel_type_name' in aerial_events.columns:
        aerial_duels = aerial_events[aerial_events['duel_type_name'] == 'Aerial']
        if not aerial_duels.empty:
            aerial_won = len(aerial_duels[aerial_duels['outcome_name'] == 'Won'])
            advanced_stats['aerial_duels_won'] = aerial_won
            advanced_stats['aerial_duels_total'] = len(aerial_duels)
            advanced_stats['aerial_duel_success_rate'] = aerial_won / len(aerial_duels)
    
    # 8. Cross Analysis
    if not pass_events.empty and 'pass_cross' in pass_events.columns:
        crosses = pass_events[pass_events['pass_cross'] == True]
        if not crosses.empty:
            successful_crosses = len(crosses[crosses['outcome_name'].isna()])
            advanced_stats['cross_accuracy'] = successful_crosses / len(crosses)
            advanced_stats['successful_crosses'] = successful_crosses
            advanced_stats['total_crosses'] = len(crosses)
    
    # 9. Key Passes and Assists
    if not pass_events.empty and 'pass_shot_assist' in pass_events.columns:
        key_passes = pass_events[pass_events['pass_shot_assist'] == True]
        advanced_stats['key_passes'] = len(key_passes)
    
    if not pass_events.empty and 'pass_goal_assist' in pass_events.columns:
        assists = pass_events[pass_events['pass_goal_assist'] == True]
        advanced_stats['assists'] = len(assists)
    
    # 10. Shot Analysis
    shot_events = df[df['type_name'] == 'Shot']
    if not shot_events.empty:
        # Shot locations
        if 'x' in shot_events.columns and 'y' in shot_events.columns:
            advanced_stats['avg_shot_distance'] = shot_events['x'].mean()
            advanced_stats['shots_from_inside_box'] = len(shot_events[shot_events['x'] > 83])
            advanced_stats['shots_from_outside_box'] = len(shot_events[shot_events['x'] <= 83])
        
        # Shot types
        if 'technique_name' in shot_events.columns:
            shot_techniques = shot_events['technique_name'].value_counts()
            advanced_stats['shot_techniques'] = shot_techniques.to_dict()
    
    # 11. Possession Analysis
    if 'possession' in df.columns:
        possession_counts = df['possession'].value_counts()
        advanced_stats['possession_sequences'] = len(possession_counts)
        advanced_stats['avg_actions_per_possession'] = len(df) / len(possession_counts) if len(possession_counts) > 0 else 0
    
    # 12. Time-based Performance (FIXED for 45-minute periods)
    if 'timestamp' in df.columns and 'period' in df.columns:
        df_copy = df.copy()
        
        def period_timestamp_to_seconds(row):
            try:
                timestamp_str = str(row['timestamp'])
                time_parts = timestamp_str.split(':')
                if len(time_parts) >= 3:
                    minutes = int(time_parts[1])
                    seconds_part = time_parts[2]
                    
                    if '.' in seconds_part:
                        seconds, microseconds = seconds_part.split('.')
                        seconds = int(seconds)
                        microseconds = int(microseconds)
                    else:
                        seconds = int(seconds_part)
                        microseconds = 0
                    
                    total_seconds = minutes * 60 + seconds + microseconds / 1000000
                    return total_seconds
                return 0
            except:
                return 0
        
        df_copy['time_seconds'] = df_copy.apply(period_timestamp_to_seconds, axis=1)
        
        # Performance by match phase within each period
        for period in df_copy['period'].unique():
            period_data = df_copy[df_copy['period'] == period]
            
            # First 15 minutes of the period
            first_quarter = period_data[period_data['time_seconds'] <= 900]
            # Second 15 minutes of the period  
            second_quarter = period_data[(period_data['time_seconds'] > 900) & (period_data['time_seconds'] <= 1800)]
            # Third 15 minutes of the period
            third_quarter = period_data[(period_data['time_seconds'] > 1800) & (period_data['time_seconds'] <= 2700)]
            # Last 15 minutes of the period
            final_quarter = period_data[period_data['time_seconds'] > 2700]
            
            advanced_stats[f'period_{period}_actions_0_15min'] = len(first_quarter)
            advanced_stats[f'period_{period}_actions_15_30min'] = len(second_quarter)
            advanced_stats[f'period_{period}_actions_30_45min'] = len(third_quarter)
            advanced_stats[f'period_{period}_actions_45_60min'] = len(final_quarter)  # For extra time if any
    
    # 13. Defensive Actions
    defensive_events = df[df['type_name'].isin(['Tackle', 'Interception', 'Clearance', 'Block'])]
    advanced_stats['defensive_actions'] = len(defensive_events)
    
    # 14. Under Pressure Performance
    under_pressure_events = df[df['under_pressure'] == True]
    if not under_pressure_events.empty:
        advanced_stats['actions_under_pressure'] = len(under_pressure_events)
        advanced_stats['pressure_percentage'] = len(under_pressure_events) / len(df) * 100
        
        # Performance under pressure
        pressure_pass_events = under_pressure_events[under_pressure_events['type_name'] == 'Pass']
        if not pressure_pass_events.empty:
            pressure_pass_success = len(pressure_pass_events[pressure_pass_events['outcome_name'].isna()])
            advanced_stats['pass_accuracy_under_pressure'] = pressure_pass_success / len(pressure_pass_events)
    
    # 15. Set Piece Analysis
    set_piece_events = df[df['play_pattern_name'].isin(['From Free Kick', 'From Corner', 'From Throw In'])]
    advanced_stats['set_piece_actions'] = len(set_piece_events)
    
    return advanced_stats

def calculate_comprehensive_player_analysis(df: pd.DataFrame) -> Dict:
    """
    Comprehensive player analysis combining all statistics
    """
    analysis = {
        'basic_stats': calculate_player_statistics(df),
        'period_stats': calculate_period_statistics(df),
        'advanced_stats': calculate_advanced_metrics(df)
    }
    
    # Add player and team information to the main analysis
    if 'player_name' in df.columns and not df.empty:
        analysis['player_name'] = df['player_name'].iloc[0]
    if 'team_name' in df.columns and not df.empty:
        analysis['team_name'] = df['team_name'].iloc[0]
    
    # Add summary insights
    analysis['summary_insights'] = generate_summary_insights(analysis)
    
    return analysis

def generate_summary_insights(analysis: Dict) -> Dict:
    """
    Generate summary insights from the analysis
    """
    insights = {}
    
    # Add player and team information to insights
    if 'player_name' in analysis:
        insights['player_name'] = analysis['player_name']
    if 'team_name' in analysis:
        insights['team_name'] = analysis['team_name']
    
    basic = analysis['basic_stats']
    advanced = analysis['advanced_stats']
    period_stats = analysis['period_stats']
    
    # Performance rating
    if 'pass_accuracy' in basic and 'actions_per_minute' in basic:
        performance_score = (basic['pass_accuracy'] * 0.4 + 
                           min(basic['actions_per_minute'] / 10, 1.0) * 0.3 +
                           (advanced.get('progressive_actions', 0) / max(len(analysis), 1)) * 0.3)
        insights['performance_score'] = min(performance_score, 1.0)
    
    # Playing style classification
    if 'pass_accuracy' in basic and 'avg_pass_distance' in basic:
        if basic['pass_accuracy'] > 0.85 and basic['avg_pass_distance'] < 20:
            insights['playing_style'] = 'Possession-based midfielder'
        elif basic['avg_pass_distance'] > 30:
            insights['playing_style'] = 'Long-range passer'
        elif advanced.get('dribble_success_rate', 0) > 0.7:
            insights['playing_style'] = 'Dribbling specialist'
        else:
            insights['playing_style'] = 'Balanced player'
    
    # Period performance comparison
    if len(period_stats) >= 2:
        period_1_actions = period_stats.get('period_1', {}).get('total_passes', 0)
        period_2_actions = period_stats.get('period_2', {}).get('total_passes', 0)
        
        if period_1_actions > 0 and period_2_actions > 0:
            performance_change = (period_2_actions - period_1_actions) / period_1_actions
            if performance_change > 0.2:
                insights['performance_trend'] = 'Improved in second half'
            elif performance_change < -0.2:
                insights['performance_trend'] = 'Declined in second half'
            else:
                insights['performance_trend'] = 'Consistent performance'
    
    # Key strengths
    strengths = []
    if basic.get('pass_accuracy', 0) > 0.85:
        strengths.append('High pass accuracy')
    if advanced.get('progressive_actions', 0) > 10:
        strengths.append('Progressive play')
    if advanced.get('pressure_success_rate', 0) > 0.6:
        strengths.append('Effective pressing')
    
    insights['key_strengths'] = strengths
    
    return insights

In [None]:
calculate_comprehensive_player_analysis(df_example)

In [None]:
calculate_advanced_metrics(df_example)

In [None]:
df_example

In [None]:
df_example["timestamp"]
#df_copy['time_seconds'] = pd.to_timedelta(df_copy['timestamp']).dt.total_seconds()

In [None]:
df_example["timestamp"]
#df_copy['time_seconds'] = pd.to_timedelta(df_copy['timestamp']).dt.total_seconds()

In [None]:
df_example["timestamp"]
#df_copy['time_seconds'] = pd.to_timedelta(df_copy['timestamp']).dt.total_seconds()

In [None]:
df_example["timestamp"]
#df_copy['time_seconds'] = pd.to_timedelta(df_copy['timestamp']).dt.total_seconds()

In [None]:
df_example.head(1)

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()
import requests
url = "https://api-football-v1.p.rapidapi.com/v3/players/profiles"

payload={
    "player":"276"
}
headers = {
  'x-rapidapi-key': os.getenv('RAPID_API_KEY'),
  'x-rapidapi-host': "api-football-v1.p.rapidapi.com"
}

#response = requests.request("GET", url, headers=headers, data=payload)
response = requests.request("GET", url, headers=headers)
response

In [None]:
import requests

url = "https://api-football-v1.p.rapidapi.com/v3/leagues"

querystring = {"search":"la liga"}

headers = {
	"x-rapidapi-key": os.getenv('RAPID_API_KEY'),
	"x-rapidapi-host": "api-football-v1.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

dict_response = response.json()

In [None]:

dict_response["response"][0]["league"]["id"]

In [None]:
import requests

url = "https://api-football-v1.p.rapidapi.com/v3/players"

querystring = {"search":"messi", "league":"140"}

headers = {
	"x-rapidapi-key": os.getenv('RAPID_API_KEY'),
	"x-rapidapi-host": "api-football-v1.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

In [None]:
response.json()["response"][0]

In [None]:
import requests

url = "https://transfermarket.p.rapidapi.com/search"

querystring = {"query":"claudio pizarro","domain":"en"}

headers = {
	"x-rapidapi-key": os.getenv('RAPID_API_KEY'),
	"x-rapidapi-host": "transfermarket.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

In [None]:
response.json()["players"]

In [None]:
import requests

url = "https://transfermarket.p.rapidapi.com/players/get-profile"

querystring = {"id":"532","domain":"en"}

headers = {
	"x-rapidapi-key": os.getenv('RAPID_API_KEY'),
	"x-rapidapi-host": "transfermarket.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

In [None]:
response.json()["performanceSeasons"]

In [None]:
response.json()["playerProfile"].keys()

In [None]:
import requests

url = "https://transfermarket.p.rapidapi.com/players/get-transfer-history"

querystring = {"id":"532","domain":"en"}

headers = {
	"x-rapidapi-key": os.getenv('RAPID_API_KEY'),
	"x-rapidapi-host": "transfermarket.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

In [None]:
import pandas as pd
old_club = []
new_club = []
old_club_id = []
new_club_id = []
date = []
for i in response.json()["transferHistory"]:
    old_club.append(i["oldClubName"])
    new_club.append(i["newClubName"])
    old_club_id.append(i["oldClubID"])
    new_club_id.append(i["newClubID"])
    date.append(i["date"])

df_transfers = pd.DataFrame({"old_club": old_club, "new_club": new_club, "old_club_id": old_club_id, "new_club_id": new_club_id, "date": date})

In [None]:
clubs = list(set(list(int(i) for i in df_transfers["old_club_id"].unique() ) + list(int(i) for i in df_transfers["new_club_id"].unique() )))

In [None]:
import requests

url = "https://transfermarket.p.rapidapi.com/players/get-performance-summary"

querystring = {"id":"532","domain":"en", "seasonID":"2001"}

headers = {
	"x-rapidapi-key": os.getenv('RAPID_API_KEY'),
	"x-rapidapi-host": "transfermarket.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

response.json()

In [None]:
df_performance = pd.DataFrame()

competition = []
competition_id = []
yellow_cards = []
yellow_red_cards = []
red_cards = []
minutes_played = []
penalty_goals = []
minutes_per_goal = []
matches = []
goals = []
assists = []
to_nil = []
conceded_goals = []
for i in response.json()["competitionPerformanceSummery"]:
    competition.append(i["competition"]["name"])
    competition_id.append(i["competition"]["id"])
    yellow_cards.append(i["performance"]["yellowCards"])
    yellow_red_cards.append(i["performance"]["yellowRedCards"])
    red_cards.append(i["performance"]["redCards"])
    minutes_played.append(i["performance"]["minutesPlayed"])
    penalty_goals.append(i["performance"]["penaltyGoals"])
    minutes_per_goal.append(i["performance"]["minutesPerGoal"])
    matches.append(i["performance"]["matches"])
    goals.append(i["performance"]["goals"])
    assists.append(i["performance"]["assists"])
    to_nil.append(i["performance"]["toNil"])
    conceded_goals.append(i["performance"]["concededGoals"])

df_performance = pd.DataFrame({"competition": competition, "competition_id": competition_id, "yellow_cards": yellow_cards, "yellow_red_cards": yellow_red_cards, "red_cards": red_cards, "minutes_played": minutes_played, "penalty_goals": penalty_goals, "minutes_per_goal": minutes_per_goal, "matches": matches, "goals": goals, "assists": assists, "to_nil": to_nil, "conceded_goals": conceded_goals})

df_performance

In [None]:
# Putting together the data
import numpy as np
url_base = "https://transfermarket.p.rapidapi.com/"

player_name = "claudio pizarro"

url = f"{url_base}/search"
querystring = {"query":player_name,"domain":"en"}

headers = {
	"x-rapidapi-key": os.getenv('RAPID_API_KEY'),
	"x-rapidapi-host": "transfermarket.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring).json()

player_id = response["players"][0]["id"]

url_profile = f"{url_base}/players/get-profile"
querystring_profile = {"id":player_id,"domain":"en"}

response_profile = requests.get(url_profile, headers=headers, params=querystring_profile).json()

seasons_played = [i["key"] for i in response_profile["performanceSeasons"]]


url_transfer_history = f"{url_base}/players/get-transfer-history"
querystring_transfer_history = {"id":player_id,"domain":"en"}

response_transfer_history = requests.get(url_transfer_history, headers=headers, params=querystring_transfer_history).json()

old_club = []
new_club = []
old_club_id = []
new_club_id = []
date = []
for i in response_transfer_history["transferHistory"]:
    old_club.append(i["oldClubName"])
    new_club.append(i["newClubName"])
    old_club_id.append(i["oldClubID"])
    new_club_id.append(i["newClubID"])
    date.append(i["date"])

df_transfers = pd.DataFrame({"old_club": old_club, "new_club": new_club, "old_club_id": old_club_id, "new_club_id": new_club_id, "date": date})


clubs = list(set(list(int(i) for i in df_transfers["old_club_id"].unique() ) + list(int(i) for i in df_transfers["new_club_id"].unique() )))



url_performance = f"{url_base}/players/get-performance-summary"

df_final_performance = pd.DataFrame()

for season in seasons_played:
    querystring = {"id":player_id,"domain":"en", "seasonID":season}

    headers = {
        "x-rapidapi-key": os.getenv('RAPID_API_KEY'),
        "x-rapidapi-host": "transfermarket.p.rapidapi.com"
    }

    response = requests.get(url_performance, headers=headers, params=querystring).json()

    df_performance = pd.DataFrame()

    competition = []
    competition_id = []
    yellow_cards = []
    yellow_red_cards = []
    red_cards = []
    minutes_played = []
    penalty_goals = []
    minutes_per_goal = []
    matches = []
    goals = []
    assists = []
    to_nil = []
    conceded_goals = []
    club_id = []
    club_name = []
    is_national_team = []
    for i in response["competitionPerformanceSummery"]:
        competition.append(i["competition"]["name"])
        competition_id.append(i["competition"]["id"])
        yellow_cards.append(i["performance"]["yellowCards"])
        yellow_red_cards.append(i["performance"]["yellowRedCards"])
        red_cards.append(i["performance"]["redCards"])
        minutes_played.append(i["performance"]["minutesPlayed"])
        penalty_goals.append(i["performance"]["penaltyGoals"])
        minutes_per_goal.append(i["performance"]["minutesPerGoal"])
        matches.append(i["performance"]["matches"])
        goals.append(i["performance"]["goals"])
        assists.append(i["performance"]["assists"])
        to_nil.append(i["performance"]["toNil"])
        conceded_goals.append(i["performance"]["concededGoals"])
        club_id.append(i["clubs"][0]["id"])
        club_name.append(i["clubs"][0]["name"])
        is_national_team.append(np.where(i["clubs"][0]["nationalTeam"] == "x",True,False))

    df_performance = pd.DataFrame({"competition": competition, "competition_id": competition_id, "yellow_cards": yellow_cards, "yellow_red_cards": yellow_red_cards, "red_cards": red_cards, "minutes_played": minutes_played, "penalty_goals": penalty_goals, "minutes_per_goal": minutes_per_goal, "matches": matches, "goals": goals, "assists": assists, "to_nil": to_nil, "conceded_goals": conceded_goals, "club_id": club_id, "club_name": club_name, "is_national_team": is_national_team})
    df_performance["season"] = season
    df_final_performance = pd.concat([df_final_performance, df_performance])

In [None]:
df_transfers


In [None]:
# Get transfer history
url = f"{url_base}/get-transfer-history"

querystring = {"id":player_id,"domain":"en"}

headers = {
	"x-rapidapi-key": os.getenv('RAPID_API_KEY'),
	"x-rapidapi-host": "transfermarket.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

In [None]:
a = {'id': '28003', 'playerName': 'Lionel Messi', 'firstName': 'Lionel', 'lastName': 'Messi', 'alias': '', 'nationImage': 'https://tmssl.akamaized.net//images/flagge/verysmall/9.png?lm=1520611569', 'club': 'Inter Miami CF'}
a.get('club')