In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from sklearn.preprocessing import StandardScaler

In [40]:
test_df = pd.read_csv("../data/jobfair_test.csv")

test_df["global_competition_level"].fillna(0, inplace=True)

test_df.sort_values(by="league_id", inplace=True)

test_df.reset_index(inplace=True, drop=True)

columns_to_drop = ["season", "league_id", "registration_country", "registration_platform_specific"]
test_df.drop(columns_to_drop, axis=1, inplace=True)

encoding_map = {
    '0) NonPayer': 0,
    '1) ExPayer': 1,
    '2) Minnow': 2,
    '3) Dolphin': 3,
    '4) Whale': 4
}
test_df["dynamic_payment_segment"] = test_df["dynamic_payment_segment"].map(encoding_map)

test_df.head()

Unnamed: 0,club_id,dynamic_payment_segment,cohort_season,avg_age_top_11_players,avg_stars_top_11_players,avg_stars_top_14_players,avg_training_factor_top_11_players,days_active_last_28_days,league_match_watched_count_last_28_days,session_count_last_28_days,playtime_last_28_days,league_match_won_count_last_28_days,training_count_last_28_days,global_competition_level,tokens_spent_last_28_days,tokens_stash,rests_stash,morale_boosters_stash
0,14643992,0,1,23,3.126109,2.999238,0.627454,4,0,6,1173927,6,2,0.0,2,53,42,25
1,14621039,0,1,22,4.715927,4.563305,0.456146,22,0,48,46566612,18,87,6.0,155,9,52,62
2,14636682,0,1,22,3.309782,3.134048,0.640674,5,0,7,1932989,8,4,0.0,13,54,49,39
3,14632000,0,1,24,3.249491,3.048162,0.660963,2,0,12,6533791,7,5,0.0,9,58,44,16
4,14646103,0,1,25,4.752679,4.637295,0.441367,15,0,50,20609753,12,30,0.0,111,15,76,53


In [42]:
test_df.shape

(60270, 18)

**LOAD MODEL AND SCALER**

In [43]:
model = joblib.load("../models/xgbr_model.pkl")
scaler = joblib.load("../models/scaler.pkl")

**RUN PREDICTIONS**

In [50]:
rank_predictions = {}

for i in range(0, len(test_df), 14):
    league_df = test_df[i:i + 14].copy()
    
    club_ids = list(league_df["club_id"])
    league_df.drop("club_id", axis=1, inplace=True)
    
    scaled_data = scaler.transform(league_df)
    predictions = model.predict(scaled_data)
    
    combined = list(zip(club_ids, predictions))
    sorted_combined = sorted(combined, key=lambda x: x[1])
    
    sorted_club_ids = [pair[0] for pair in sorted_combined]
    rank_preds = [x for x in range(1, 15)]

    for id, rank in zip(sorted_club_ids, rank_preds):
        rank_predictions[id] = rank

In [52]:
results_df = pd.DataFrame(list(rank_predictions.items()), columns=['club_id', 'league_rank'])

In [53]:
results_df.head()

Unnamed: 0,club_id,league_rank
0,14640324,1
1,14657802,2
2,14621039,3
3,14646103,4
4,14641979,5
