In [1]:
import pandas as pd

# Load your CSV file
df = pd.read_csv('/kaggle/input/cricket-features/Merged_Fantasy_Team_Data.csv')

In [2]:
import numpy as np
import pandas as pd

# Function to calculate the sum of overs for bowling, ensuring proper numeric conversion
def get_total_overs(overs_value):
    if isinstance(overs_value, list):
        return np.sum([float(ov) for ov in overs_value])  # Ensure the values are numeric
    else:
        try:
            return float(overs_value)  # Convert single value to float
        except ValueError:
            return 0  # Return 0 if conversion fails

# Combined function to calculate both batting and bowling statistics for the last five matches
def calculate_player_stats(player_name, current_match_id):
    # Filter player data for matches prior to the current match
    player_data = df[(df['fullName'] == player_name) & (df['match_id'] < current_match_id)]
    
    # Batting-related calculations
    batting_data = player_data[player_data['batting_position'] != 0]
    
    # Last five batting matches
    last_five_batting = batting_data.tail(5)
    mean_runs = last_five_batting['runs'].mean() if not last_five_batting.empty else 0
    
    total_runs = batting_data['runs'].sum()
    total_balls = batting_data['balls'].sum()  # Assuming 'balls' column exists
    
    if total_balls > 0:
        overall_strike_rate = (total_runs / total_balls) * 100
    else:
        overall_strike_rate = 0

    # Bowling-related calculations
    bowling_data = player_data[player_data['Overs_Bowled'] != 0]
    
    # Calculate total overs bowled by processing overs as lists or individual values
    bowling_data['Total_Overs_Bowled'] = bowling_data['Overs_Bowled'].apply(get_total_overs)
    
    # Last five bowling matches
    last_five_bowling = bowling_data.tail(5)
    mean_wickets = last_five_bowling['wickets'].mean() if not last_five_bowling.empty else 0
    
    total_runs_conceded = bowling_data['conceded'].sum()  # Assuming 'conceded' holds runs conceded by the bowler
    total_balls_bowled = bowling_data['total_balls'].sum()

    if total_balls_bowled > 0:
        overall_economy_rate = (total_runs_conceded / total_balls_bowled)*6
    else:
        overall_economy_rate = 0
    
    return mean_runs, overall_strike_rate, mean_wickets, overall_economy_rate


# Create new columns for batting and bowling statistics
df['mean_last_5_runs'] = 0.0
df['overall_strike_rate'] = 0.0
df['mean_last_5_wickets'] = 0.0
df['overall_economy_rate'] = 0.0

# Calculate stats for each player and update the DataFrame
for index, row in df.iterrows():
    player = row['fullName']
    current_match_id = row['match_id']
    
    # Get calculated statistics
    mean_runs, overall_strike_rate, mean_wickets, overall_economy_rate = calculate_player_stats(player, current_match_id)
    
    # Assign values to the corresponding columns
    df.at[index, 'mean_last_5_runs'] = mean_runs
    df.at[index, 'overall_strike_rate'] = overall_strike_rate
    df.at[index, 'mean_last_5_wickets'] = mean_wickets
    df.at[index, 'overall_economy_rate'] = overall_economy_rate

# Save the updated DataFrame to a new CSV file
df.to_csv('updated_fantasy_team_data_combined.csv', index=False)


In [3]:
de=df[df['fullName']=="Ajit Agarkar"]

In [4]:
de.to_csv("deve.csv",index=False)

In [20]:
import pandas as pd
df = pd.read_csv('./updated_fantasy_team_data_combined.csv')

In [21]:
# 1. Remove the 'match_name' column
df = df.drop(columns=['match_name','Bowling_FP_y','Fielding_FP_y','Fielding_FP_y'])


In [22]:
#2
def get_player_role(row):
    # Conditions for Batsman
    if row['batting_position'] > 0 and row['overs'] == 0 and row['wickets'] == 0:
        return 'Batsman'
    
    # Conditions for Bowler
    elif row['overs'] > 0 or row['wickets'] > 0:
        return 'Bowler'
    
    # Conditions for All-rounder
    elif row['batting_position'] > 0 and (row['overs'] > 0 or row['wickets'] > 0):
        return 'All-rounder'
    
    # Conditions for Wicketkeeper
    elif row['stumping_FP'] > 0:
        return 'Wicketkeeper'
    
    # Conditions for Fielder (default role if none of the above match)
    elif row['catching_FP'] > 0 or row['direct_runout_FP'] > 0 or row['stumping_FP'] > 0:
        return 'Fielder'
    
    # If no role matches, return 'Unknown'
    return 'Unknown'

# Apply the logic to the dataset
df['player_role'] = df.apply(get_player_role, axis=1)


In [23]:
# Calculate the team total fantasy points per match
df['team_total_fp'] = df.groupby(['team', 'match_id'])['Total_FP'].transform('sum')

def calculate_team_and_player_averages(team, player_name, current_match_id):
    # Filter data for the team and player for matches before the current match
    team_data = df[(df['team'] == team) & (df['match_id'] < current_match_id)]
    player_data = df[(df['fullName'] == player_name) & (df['match_id'] < current_match_id)]
    
    # Team form calculation (average of last 5 matches)
    last_five_team_matches = team_data.tail(5)
    team_form = last_five_team_matches['team_total_fp'].mean() if not last_five_team_matches.empty else 0
    
    # Batting moving average (last 5 matches)
    last_five_batting = player_data[player_data['batting_position'] != 0].tail(5)
    Batting_MA_5 = last_five_batting['Batting_FP_x'].mean() if not last_five_batting.empty else 0
    
    # Bowling moving average (last 5 matches)
    last_five_bowling = player_data[player_data['Overs_Bowled'] != 0].tail(5)
    Bowling_MA_5 = last_five_bowling['Bowling_FP_x'].mean() if not last_five_bowling.empty else 0
    
    # Fielding moving average (last 5 matches)
    last_five_fielding = player_data.tail(5)  # assuming fielding fantasy points apply to all rows
    Fielding_MA_5 = last_five_fielding['Fielding_FP_x'].mean() if not last_five_fielding.empty else 0
    
    return team_form, Batting_MA_5, Bowling_MA_5, Fielding_MA_5

# Create new columns for team and player moving averages
df['team_form'] = 0.0
df['Batting_MA_5'] = 0.0
df['Bowling_MA_5'] = 0.0
df['Fielding_MA_5'] = 0.0

# Calculate stats for each player and update the DataFrame
for index, row in df.iterrows():
    team = row['team']
    player = row['fullName']
    current_match_id = row['match_id']
    
    # Get calculated averages
    team_form, Batting_MA_5, Bowling_MA_5, Fielding_MA_5 = calculate_team_and_player_averages(team, player, current_match_id)
    
    # Assign values to the corresponding columns
    df.at[index, 'team_form'] = team_form
    df.at[index, 'Batting_MA_5'] = Batting_MA_5
    df.at[index, 'Bowling_MA_5'] = Bowling_MA_5
    df.at[index, 'Fielding_MA_5'] = Fielding_MA_5


In [24]:
# Display the updated DataFrame with new features
df.head(20)

Unnamed: 0,match_id,season,home_team,away_team,venue,fullName,batting_position,runs,balls,fours,...,mean_last_5_runs,overall_strike_rate,mean_last_5_wickets,overall_economy_rate,player_role,team_total_fp,team_form,Batting_MA_5,Bowling_MA_5,Fielding_MA_5
0,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Ajit Agarkar,0,0,0,0,...,0.0,0.0,0.0,0.0,Bowler,645,0.0,0.0,0.0,0.0
1,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Ashley Noffke,8,9,10,1,...,0.0,0.0,0.0,0.0,Bowler,199,0.0,0.0,0.0,0.0
2,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Ashok Dinda,0,0,0,0,...,0.0,0.0,0.0,0.0,Bowler,645,0.0,0.0,0.0,0.0
3,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Balachandra Akhil,7,0,2,0,...,0.0,0.0,0.0,0.0,Batsman,199,0.0,0.0,0.0,0.0
4,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Brendon McCullum,2,158,73,10,...,0.0,0.0,0.0,0.0,Batsman,645,0.0,0.0,0.0,0.0
5,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Cameron White,5,6,10,0,...,0.0,0.0,0.0,0.0,Bowler,199,0.0,0.0,0.0,0.0
6,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",David Hussey,4,12,12,1,...,0.0,0.0,0.0,0.0,Batsman,645,0.0,0.0,0.0,0.0
7,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Ishant Sharma,0,0,0,0,...,0.0,0.0,0.0,0.0,Bowler,645,0.0,0.0,0.0,0.0
8,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Jacques Kallis,4,8,7,0,...,0.0,0.0,0.0,0.0,Bowler,199,0.0,0.0,0.0,0.0
9,335982,2008,RCB,KKR,"M.Chinnaswamy Stadium, Bengaluru",Laxmi Shukla,0,0,0,0,...,0.0,0.0,0.0,0.0,Bowler,645,0.0,0.0,0.0,0.0


In [25]:
df[df['fullName']=="Sourav Ganguly"].to_csv('lol.csv',index=False)

In [None]:
# Save the updated DataFrame to a new CSV file
df.to_csv('3.csv', index=False)