In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Load your data
df = pd.read_csv("counts_features.csv")

# Define merged and kept features
df_new_features = pd.DataFrame()
df_new_features["steamid"] = df["steamid"]
df_new_features["playtime_forever"] = df["playtime_forever"]
df_new_features["achievement_count"] = df["achievement_count"]
df_new_features["efficiency_score"] = df["efficiency_score"]
df_new_features["suspect_flag"] = df["suspect_flag"]

# 1. Combat group = combat + combat_skill + zombie_killer
df_new_features["combat_total"] = (
    df["combat_count"] +
    df["combat_skill_count"] +
    df["zombie_killer_count"]
)

# 2. Crafting (alone)
df_new_features["crafting_count"] = df["crafting_count"]

# 3. Exploration (alone)
df_new_features["exploration_count"] = df["exploration_count"]

# 4. Survival (alone)
df_new_features["survival_count"] = df["survival_count"]

# 5. Death (alone)
df_new_features["death_count"] = df["death_count"]

# 6. PvP (alone)
df_new_features["player_killer_count"] = df["player_killer_count"]

# Show the new DataFrame structure
print("\nNew feature DataFrame (counts only):")
print(df_new_features.head())
# df_new_features = df_new_features[~df_new_features['suspect_flag']]
df_new = df_new_features.copy()
# ----------------------
# Prepare dataframe
# ----------------------
df_eda = df_new.copy()

# Make ratio features for *all count features*
count_cols = [
    "combat_total", "crafting_count", "exploration_count",
    "survival_count", "death_count", "player_killer_count"
]

for col in count_cols:
    rcol = col + "_ratio"
    df_eda[rcol] = np.where(
        df_eda["achievement_count"] > 0,
        df_eda[col] / df_eda["achievement_count"],
        0.0
    )

# Convert survival_min to string labels for plotting
df_eda["survival_min_label"] = df_eda["survival_min"].astype(str) + " min"

# ----------------------
# 1) Class distribution
# ----------------------
plt.figure(figsize=(7,4))
sns.countplot(data=df_eda, x="survival_min_label", order=sorted(df_eda["survival_min_label"].unique()))
plt.title("Survival Time Distribution")
plt.xlabel("Survival Time")
plt.ylabel("Player Count")
plt.show()

# ----------------------
# 2) Feature distributions per class
# ----------------------
feat_for_box = count_cols + [c+"_ratio" for c in count_cols] + ["efficiency_score"]

for feat in feat_for_box:
    plt.figure(figsize=(7,4))
    sns.boxplot(data=df_eda, x="survival_min_label", y=feat)
    plt.title(f"{feat} by Survival Time")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# ----------------------
# 3) Correlation heatmap
# ----------------------
# Encode survival_min as numbers for correlation
df_corr = df_eda.copy()
df_corr["survival_min_num"] = df_corr["survival_min"].astype(int)

corr_feats = feat_for_box + ["survival_min_num"]
corr_matrix = df_corr[corr_feats].corr()

plt.figure(figsize=(12,8))
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm")
plt.title("Feature Correlation with Survival Time")
plt.show()


New feature DataFrame (counts only):
             steamid  playtime_forever  achievement_count  efficiency_score  \
0  76561198037802252               442                  3          0.006787   
1  76561197969100147               478                  0          0.000000   
2  76561198833431225               484                 43          0.088843   
3  76561198033263509              4184                 43          0.010277   
4  76561198040600573               430                 43          0.100000   

   suspect_flag  combat_total  crafting_count  exploration_count  \
0         False             0               2                  0   
1         False             0               0                  0   
2          True            11               8                  6   
3         False            11               8                  6   
4          True            11               8                  6   

   survival_count  death_count  player_killer_count  
0               0       

KeyError: 'survival_min'