In [None]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import plot_importance
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the data
data = pd.read_excel('SignedPlayersDatasetVALORANT.xlsx')
data['date'] = pd.to_datetime(data['date'])
# Fix name errors
data = data.replace('\n','', regex=True)
data = data.replace(' ','', regex=True)

In [None]:
# Select features and target variable
features = ['kills_per_round', 'assists_per_round', 'average_damage_per_round',
            'first_kills_per_round', 'first_deaths_per_round', 'headshot_percentage',
            'clutch_success_percentage', 'total_kills', 'total_deaths', 'total_assists',
            'total_first_kills', 'total_first_deaths']

target_variable = 'average_combat_score'

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target_variable], test_size=0.2, random_state=42)

In [None]:
#Load Model
filename = 'model w tuning.sav'
model = pickle.load(open(filename, 'rb'))

In [None]:
# Evaluate the model
y_pred = model.predict(X_test.to_numpy())

In [None]:
# Actual vs Predicted graph

# Create a larger figure
plt.figure(figsize=(18, 18))

# Plot actual vs predicted values with player names
plt.scatter(y_test, predictions_optimized)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], linestyle='--', color='red', linewidth=2)
plt.xlabel("Actual Average Combat Score")
plt.ylabel("Predicted Average Combat Score")
plt.title("Actual vs Predicted Average Combat Score")

plt.show()

In [None]:
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

In [None]:
#Residual Plot
residuals = y_test - predictions_optimized
plt.scatter(predictions_optimized, residuals)
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.show()

In [None]:
#Distribution of Residuals
sns.histplot(residuals, kde=True)
plt.xlabel('Residuals')
plt.ylabel('Frequency')

In [None]:
#Other graphs

In [None]:
# Actual vs Predicted graph player's highest ACS

df_model_predict = data.assign(predictions = opt.predict(data[features]))
df_only_first = df_model_predict.sort_values('average_combat_score', ascending=False).drop_duplicates(subset='player', keep='first')

actual = df_only_first['average_combat_score']
high_predictions = df_only_first['predictions']

# Create a larger figure
plt.figure(figsize=(18, 18))

# Plot actual vs predicted values with player names
plt.scatter(actual, high_predictions)
plt.plot([min(actual), max(actual)], [min(actual), max(actual)], linestyle='--', color='red', linewidth=2)
plt.xlabel("Actual Highest Average Combat Score")
plt.ylabel("Predicted Highest Average Combat Score")
plt.title("Actual vs Predicted Highest Average Combat Score")

# Display player names with a larger offset
for row in df_only_first.itertuples():
    # Add a larger offset to player names
    offset = 10  # Adjust this value based on your preference
    plt.annotate(row.player, (row.average_combat_score, row.predictions), textcoords="offset points", xytext=(0, offset))

# Show the larger plot
plt.show()

In [None]:
plot_importance(opt)