## Exploratory analysis

In [7]:
import pandas as pd

In [2]:
df_a = pd.read_csv("group_stage_stats.csv")
df_a

Unnamed: 0,Player,PTS,Number of Matches,Kills,Assists,KDA,KD,KAS,Knocks,Kills/Knocks,...,Flashes Thrown,Swimming Distance (km),Walk Distance (km),Ride Distance (km),Time Survived,Avg Time Survived,Heals,Health Recovered,Boosts,Vehicle Destroys
0,DAY_TiGGER,93.07,12,25,14,3.5,2.3,0.8,22,1.1,...,6,0.0,15.5,34.4,16371,1364,28,1640.4,58,4
1,DAY_Flash,81.19,12,23,10,3.0,2.1,0.8,20,1.1,...,6,0.0,15.3,31.7,15618,1301,42,2287.7,32,2
2,TWIS_Lu,80.18,12,20,10,2.5,1.7,0.8,21,1.0,...,9,0.1,15.4,39.7,18371,1530,50,2691.1,42,0
3,TE_Clories,92.67,12,23,6,2.6,2.1,0.8,21,1.1,...,9,0.0,12.7,35.0,16462,1371,31,1993.2,49,5
4,BB_Bestoloch,81.66,12,19,11,3.0,1.9,0.8,13,1.5,...,0,0.0,13.6,40.1,17138,1428,38,2530.6,39,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,VP_Beami,16.96,12,2,6,0.7,0.2,0.3,6,0.3,...,0,0.0,9.4,67.7,13480,1123,77,3969.3,29,0
92,VP_Lukarux,18.22,12,4,1,0.4,0.3,0.3,4,1.0,...,0,0.0,9.8,67.7,12732,1061,35,1668.6,44,2
93,PeRo_Nannnnn,19.82,12,4,1,0.5,0.4,0.3,2,2.0,...,2,0.0,11.2,38.5,13396,1116,16,827.7,26,0
94,BST_SzylzEN,9.76,12,2,2,0.3,0.2,0.3,4,0.5,...,3,0.0,12.7,43.7,15186,1265,9,736.1,49,1


Correlation between KDA and KD

In [3]:
df_a["KDA"].corr(df_a["KD"]).item() * 100

92.76549930604604

In [4]:
df_a["KDA"].corr(df_a["PTS"]).item() * 100

92.43097925715648

## Model

Feature selection

In [5]:
# 选择特征列 (X) 和目标列 (y)
X = df_a[['KDA', 'Avg Time Survived']]  # 选择多列作为特征
y = df_a['PTS']                  # 选择单列作为目标

print("特征数据 (X):\n", X)
print("目标数据 (y):\n", y)

特征数据 (X):
     KDA  Avg Time Survived
0   3.5               1364
1   3.0               1301
2   2.5               1530
3   2.6               1371
4   3.0               1428
..  ...                ...
91  0.7               1123
92  0.4               1061
93  0.5               1116
94  0.3               1265
95  0.3               1169

[96 rows x 2 columns]
目标数据 (y):
 0     93.07
1     81.19
2     80.18
3     92.67
4     81.66
      ...  
91    16.96
92    18.22
93    19.82
94     9.76
95    11.89
Name: PTS, Length: 96, dtype: float64


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

Modeling and evaluation

In [9]:
from sklearn.linear_model import LinearRegression

# 初始化模型
model = LinearRegression()

# 训练模型
model.fit(X_train, y_train)  # 如果未划分训练集，直接用 model.fit(X, y)

In [10]:
print("回归系数:", model.coef_)    # 对应 feature1 和 feature3 的权重
print("截距:", model.intercept_)  # 模型偏置项

# 预测测试集
y_pred = model.predict(X_test)
print("预测值:", y_pred)

回归系数: [2.4052196e+01 2.3760562e-02]
截距: -20.880228879561344
预测值: [27.00381081 31.06686692 39.42303269 16.39254086 43.20636489 30.34324438
 35.4204526  43.90082402 95.71186368 60.48894993 28.14431779 47.72627451
 60.94580344 85.20644166 40.00949469 50.95771094 59.49100632 58.58810496
 27.85919104 21.83696318 45.51654224 38.03411442 74.94943092 31.39951478
 24.1125743  68.99848474 42.49895086 53.32621509 35.59758221]


In [14]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("均方误差 (MSE):", mse)
print("R² 分数:", r2)
n = len(y)
k = X.shape[1]
adjusted_r_squared = 1 - (1 - r2) * (n - 1) / (n - k - 1)
print(adjusted_r_squared)

均方误差 (MSE): 52.444002684494016
R² 分数: 0.8800997861971492
0.8775212869755825


### Prediction

In [22]:
player_prediction_player = df_a['Player'].to_numpy()
player_prediction_player

array(['DAY_TiGGER', 'DAY_Flash', 'TWIS_Lu', 'TE_Clories', 'BB_Bestoloch',
       'DAY_Belmoth', 'DAY_Thanad0l', 'TKL_Cold119', 'TKL_Spaceman',
       'FLC_TGLTN', 'FLC_Kickstart', 'TE_TanVuu', 'BGP_2heart',
       'BB_Molodoct', 'TWIS_xmpl', 'FOR_KISS', 'PeRo_Aixleft',
       'FLC_Shrimzy', 'BB_f1lfirst', 'BB_ADOUZ1E', 'GEN_BeaN', 'NH_16KK',
       'BST_Dr4FTk1NG', 'TWIS_BatulinS', 'DNF_DIEL', 'DNF_Salute',
       'TWIS_Perfect1ks', 'TKL_Donzz', 'GK_spyrro', 'NH_Dec12th',
       'NH_ZiLong1', 'T1_Heather', 'FUR_guizeraa', 'EA_Xizzy', '17_Xbei',
       'TKL_SuZe', 'BST_v1n1zxz', 'DNF_Heaven', '17_77owo', 'TE_Delwyn',
       'NH_Inuyasha', 'BGP_Parkpro', 'EA_Nourinz', 'T5_Flukky',
       'FLC_hwinn', 'VP_NIXZYEE', 'NAVI_Feyerist', '4AM_ATongMuu',
       'FOR_Olympus', 'PeRo_MMing', 'BGP_AKaN', 'FOR_Baren', 'FOR_Scappy',
       '17_Lilghost', '17_xwudd', 'NAVI_Hakatory', 'NAVI_SoseD',
       'NAVI_staed', 'T1_EEND', 'GEN_Tosi', 'T1_Type', '4AM_Longz',
       'GK_pwddddddddd', 'T5_RangerX

In [20]:
player_prediction_pts_X = df_a[['KDA', 'Avg Time Survived']]
player_prediction_pts = model.predict(player_prediction_pts_X)
player_prediction_pts

array([95.71186368, 82.18885028, 75.60392098, 74.23121122, 85.20644166,
       83.30559669, 69.94350439, 65.95172997, 79.04165042, 68.99848474,
       60.48894993, 53.32621509, 59.49100632, 78.70900255, 46.8600886 ,
       74.94943092, 67.686251  , 63.09505969, 60.94580344, 59.49100632,
       38.8527792 , 51.94269965, 58.58810496, 56.84277826, 60.87452175,
       56.47341495, 50.95771094, 70.4003579 , 38.3538074 , 44.03258172,
       47.72627451, 40.00949469, 53.13613059, 43.20636489, 42.88992553,
       47.15602102, 51.96646022, 38.58601018, 49.78589135, 42.49895086,
       45.51654224, 47.7554379 , 43.90082402, 45.36857603, 46.94217596,
       35.59758221, 33.82094289, 35.37833431, 52.5658771 , 39.13250311,
       41.98917339, 52.32827148, 47.04262104, 27.85919104, 33.02603867,
       24.38474615, 28.86253749, 47.38067175, 28.33440228, 36.70892579,
       36.64304694, 42.23758468, 29.23730364, 47.29858439, 28.14431779,
       37.87859616, 40.89618754, 38.03411442, 31.39951478, 35.42

In [27]:
players_data_pred = pd.DataFrame({'Player': player_prediction_player, 'PTS_pred': player_prediction_pts})
pd.set_option('display.max_rows', None)
players_data_pred.sort_values('PTS_pred', ascending=False)

Unnamed: 0,Player,PTS_pred
0,DAY_TiGGER,95.711864
4,BB_Bestoloch,85.206442
5,DAY_Belmoth,83.305597
1,DAY_Flash,82.18885
8,TKL_Spaceman,79.04165
13,BB_Molodoct,78.709003
2,TWIS_Lu,75.603921
15,FOR_KISS,74.949431
3,TE_Clories,74.231211
27,TKL_Donzz,70.400358
