In [1]:
import pandas as pd

# Load match reports data
match_reports = pd.read_csv("match_reports.csv")
train = pd.read_csv("train.csv")

# Merge data on match id
data = pd.merge(train, match_reports, on="id")

# Sort by match date
data = data.sort_values(by="match_date")

# Convert match_date to datetime
data['match_date'] = pd.to_datetime(data['match_date'])

# Define point rules
points = {"win": 3, "draw": 1, "lose": 0}

# Prepare a dictionary to keep track of each team's points
team_points = {}
last_year = data['match_date'].dt.year.min()

for idx, row in data.iterrows():
    # Get team ids
    home_team_id = row['home_team']
    away_team_id = row['away_team']

    # Check if a new year has started
    current_year = row['match_date'].year
    if current_year != last_year:
        # Reset team points for the new year
        team_points = {}
        last_year = current_year

    # Calculate result
    if row['home_team_score'] > row['away_team_score']:
        home_team_points = points["win"]
        away_team_points = points["lose"]
    elif row['home_team_score'] < row['away_team_score']:
        home_team_points = points["lose"]
        away_team_points = points["win"]
    else:
        home_team_points = points["draw"]
        away_team_points = points["draw"]

    # Update team points
    team_points[home_team_id] = team_points.get(home_team_id, 0) + home_team_points
    team_points[away_team_id] = team_points.get(away_team_id, 0) + away_team_points

    # Calculate team ranks and add to the data
    team_ranks = {team: rank for rank, (team, _) in enumerate(sorted(team_points.items(), key=lambda item: item[1], reverse=True), 1)}
    data.loc[idx, 'home_team_rank'] = team_ranks[home_team_id]
    data.loc[idx, 'away_team_rank'] = team_ranks[away_team_id]

# Save the updated train data
data.to_csv("train_ranked.csv", index=False)


In [1]:
import pandas as pd

# Load match reports data
match_reports = pd.read_csv("match_reports.csv")
test = pd.read_csv("test.csv")

# Merge data on match id
data = pd.merge(test, match_reports, on="id")

# Sort by match date
data = data.sort_values(by="match_date")

# Convert match_date to datetime
data['match_date'] = pd.to_datetime(data['match_date'])

# Define point rules
points = {"win": 3, "draw": 1, "lose": 0}

# Prepare a dictionary to keep track of each team's points
team_points = {}
last_year = data['match_date'].dt.year.min()

for idx, row in data.iterrows():
    # Get team ids
    home_team_id = row['home_team']
    away_team_id = row['away_team']

    # Check if a new year has started
    current_year = row['match_date'].year
    if current_year != last_year:
        # Reset team points for the new year
        team_points = {}
        last_year = current_year

    # Calculate result
    if row['home_team_score'] > row['away_team_score']:
        home_team_points = points["win"]
        away_team_points = points["lose"]
    elif row['home_team_score'] < row['away_team_score']:
        home_team_points = points["lose"]
        away_team_points = points["win"]
    else:
        home_team_points = points["draw"]
        away_team_points = points["draw"]

    # Update team points
    team_points[home_team_id] = team_points.get(home_team_id, 0) + home_team_points
    team_points[away_team_id] = team_points.get(away_team_id, 0) + away_team_points

    # Calculate team ranks and add to the data
    team_ranks = {team: rank for rank, (team, _) in enumerate(sorted(team_points.items(), key=lambda item: item[1], reverse=True), 1)}
    data.loc[idx, 'home_team_rank'] = team_ranks[home_team_id]
    data.loc[idx, 'away_team_rank'] = team_ranks[away_team_id]

# Save the updated test data
data.to_csv("test_ranked.csv", index=False)
