In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv("high_diamond_ranked_10min.csv")

df.info()
df.describe()

df["goldDif"] = df["blueTotalGold"] - df["redTotalGold"]
df["xpDif"] = df["blueTotalExperience"] - df["redTotalExperience"]
df["killDif"] = df["blueKills"] - df["redKills"]

features = [
    "blueTotalGold", "blueTotalExperience", "blueKills", "blueDeaths", "blueEliteMonsters", "redTotalGold", "redTotalExperience", "redKills", "redDeaths", "redEliteMonsters", "goldDif", "xpDif", "killDif"
]

X = df[features]
y = df["blueWins"]


plt.figure(figsize=(7,7))
sns.heatmap(df[features + ["blueWins"]].corr(), annot=True, fmt=".1f", cmap="coolwarm")
plt.show()

plt.figure(figsize=(7,7))
sns.histplot(df, x="killDif", hue="blueWins", bins=30, kde=True, palette="coolwarm")
plt.show()

plot_features = [
    "goldDiff",
    "xpDiff",
    "killDiff",
    "blueTotalGold",
    "redTotalGold",
    "blueWins"
]


sns.pairplot(df[plot_features], hue="blueWins", diag_kind="kde", palette="coolwarm")
plt.show()

X_trn, X_tst, y_trn, y_tst = train_test_split(X, y, test_size=0.1, random_state=42)

scaler = StandardScaler()
X_trn_scaler = scaler.fit_transform(X_trn)
X_tst_scaler = scaler.transform(X_tst)

log = LogisticRegression(max_iter=500)
log.fit(X_trn_scaler, y_trn)
log_pred = log.predict(X_tst_scaler)

print("Logistic Regression Data")
print("Accuracy:", accuracy_score(y_tst, log_pred))

rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_trn, y_trn)
rf_pred = rf.predict(X_tst)

print("Random Forest Classifier")
print("Accuracy:", accuracy_score(y_tst, rf_pred))