In [None]:
import pandas as pd
import numpy as np

from langchain_groq import ChatGroq
from langchain_neo4j import Neo4jGraph

from langchain_core.prompts import ChatPromptTemplate

from dotenv import load_dotenv
load_dotenv()
# --- Configuration ---
import os
import json
from tqdm import tqdm

In [None]:
df = pd.read_csv('../data/sample/sample_action_data.csv')
df.head()

In [None]:
df.shape

In [None]:
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = "Game Bot Detection Framework"
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [None]:
graph = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD
)

In [None]:
llm = ChatGroq(model="llama-3.3-70b-versatile")

In [None]:
def extract_player_action_features(player_id: str) -> dict:
    """
    Extracts features from the knowledge graph for a given player.
    """
    query = f"""
    MATCH (p:Player {{Actor: toInteger('{player_id}')}})-[:PERFORMED]->(a:Action)
    RETURN 
        p.Actor as actor,
        a.collect_max_count as collect_max_count,
        a.Sit_ratio as Sit_ratio,
        a.Sit_count as Sit_count,
        a.sit_count_per_day as sit_count_per_day,
        a.Exp_get_ratio as Exp_get_ratio,
        a.Exp_get_count as Exp_get_count,
        a.exp_get_count_per_day as exp_get_count_per_day,
        a.Item_get_ratio as Item_get_ratio,
        a.Item_get_count as Item_get_count,
        a.item_get_count_per_day as item_get_count_per_day,
        a.Money_get_ratio as Money_get_ratio,
        a.Money_get_count as Money_get_count,
        a.money_get_count_per_day as money_get_count_per_day,
        a.Abyss_get_ratio as Abyss_get_ratio,
        a.Abyss_get_count as Abyss_get_count,
        a.abyss_get_count_per_day as abyss_get_count_per_day,
        a.Exp_repair_count as Exp_repair_count,
        a.Exp_repair_count_per_day as Exp_repair_count_per_day,
        a.Use_portal_count as Use_portal_count,
        a.Use_portal_count_per_day as Use_portal_count_per_day,
        a.Killed_bypc_count as Killed_bypc_count,
        a.Killed_bypc_count_per_day as Killed_bypc_count_per_day,
        a.Killed_bynpc_count as Killed_bynpc_count,
        a.Killed_bynpc_count_per_day as Killed_bynpc_count_per_day,
        a.Teleport_count as Teleport_count,
        a.Teleport_count_per_day as Teleport_count_per_day,
        a.Reborn_count as Reborn_count,
        a.Reborn_count_per_day as Reborn_count_per_day
    """
    results = graph.query(query)
    if results:
        return results[0]
    else:
        return {}


In [None]:
action_features = extract_player_action_features("1312")
action_features

In [None]:
prompt = ChatPromptTemplate.from_template(template= """
<|system|>
## Role: Game Action Anomaly Detector
You're an AI trained to detect bot activity in MMORPGs using behavioral telemetry. Analyze these key per-day metrics with these priorities:

**Core Features (Weighted):**
1. Resource Collection (40%)
   - `exp_get_count_per_day` (Bot >500, Human avg 150-400)
   - `item_get_count_per_day` (Bot >900, Human avg 100-300)
   - `collect_max_count=0` with high resource gains

2. Activity Patterns (30%)
   - `sit_ratio <0.1` (Bot-like hyperactivity) or `>5` (Idle farming)
   - `teleport_count_per_day >50` without portal use
   - 24h perfect action consistency (±2% variance)

3. Combat Signals (20%)
   - `reborn_count_per_day=0` with high-risk actions
   - `killed_bynpc_count_per_day <1` with farming
   - Disproportionate PvP deaths (`killed_bypc_count_per_day`)

4. Anti-Detection (10%)
   - Exact resource ratios (Item/Exp = ±0.5%)
   - Fixed hourly action counts
   - Simultaneous max values in 3+ categories

**Decision Framework**
- Immediate bot flags (Score 90+):
if (exp_get > 650/day AND sit_ratio <0.1)
OR (collect_max=0 AND any_resource >300/day)
OR (item_get >5000/day AND deaths=0)

- Human-like patterns:
- Natural variance (>15%) in hourly actions
- Deaths proportional to resource gains
- Portal/teleport ratio 0.8-1.2

**Scoring Protocol**
1. Start at 0, add points per threshold breach:
 - Major breach (+25): exp_get>500, item_get>900
 - Moderate (+15): collect_max=0, teleport>50
 - Minor (+10): perfect ratios, zero deaths
 
2. Apply multipliers:
 - 1.2x for financial metrics (Money/Item)
 - 0.8x if deaths >5/day with high gains

3. Confidence levels:
 - High: >2 major breaches
 - Medium: 1 major + 2 minor
 - Low: Single anomaly

Please provide your analysis in the following JSON format:
- anomaly_score: An score between 0-100 (0 indicates human like behavior, 100 indicates anomalous behavior)
- confidence_level: High, Medium, Low
- behavior_profile: "Bot-like/Human-like/Uncertain
- reasoning: A brief reasoning for the score

Examples from dataset:
- Bot 6187: 959exp/day + 0 collects + 0 deaths → Score 97
- Human 1312: 316exp/day + 13 PvP deaths → Score 22
- Bot 8085: 671exp/day + 24 teleports → Score 92

<|user|>
Analyze this actor:

Actor: {actor}
- collect_max_count: {collect_max_count}
- Sit_ratio: {Sit_ratio}
- Sit_count: {Sit_count}
- sit_count_per_day: {sit_count_per_day}
- Exp_get_ratio: {Exp_get_ratio}
- Exp_get_count: {Exp_get_count}
- exp_get_count_per_day: {exp_get_count_per_day}
- Item_get_ratio: {Item_get_ratio}
- Item_get_count: {Item_get_count}
- item_get_count_per_day: {item_get_count_per_day}
- Money_get_ratio: {Money_get_ratio}
- Money_get_count: {Money_get_count}
- money_get_count_per_day: {money_get_count_per_day}
- Abyss_get_ratio: {Abyss_get_ratio}
- Abyss_get_count: {Abyss_get_count}
- abyss_get_count_per_day: {abyss_get_count_per_day}
- Exp_repair_count: {Exp_repair_count}
- Exp_repair_count_per_day: {Exp_repair_count_per_day}
- Use_portal_count: {Use_portal_count}
- Use_portal_count_per_day: {Use_portal_count_per_day}
- Killed_bypc_count: {Killed_bypc_count}
- Killed_bypc_count_per_day: {Killed_bypc_count_per_day}
- Killed_bynpc_count: {Killed_bynpc_count}
- Killed_bynpc_count_per_day: {Killed_bynpc_count_per_day}
- Teleport_count: {Teleport_count}
- Teleport_count_per_day: {Teleport_count_per_day}
- Reborn_count: {Reborn_count}
- Reborn_count_per_day: {Reborn_count_per_day}
""")

In [None]:
def assess_bot_likelihood(player_data) -> tuple[int, str, str]:
    """
    Assesses the likelihood of a player being a bot using LLM and extracts score and reasoning.
    Returns a tuple of (anomaly_score, reasoning, full_analysis).
    """
    
    formatted_prompt = prompt.format_messages(**player_data)

    response = llm.invoke(formatted_prompt)

    try:
        json_str = response.content
        response_dict = json.loads(json_str[json_str.find('{'):json_str.rfind('}')+1])
        
        response_dict = {key.strip(): value for key, value in response_dict.items()}
        anomaly_score = response_dict.get("anomaly_score", None)
        reasoning = response_dict.get("reasoning", "Could not parse reasoning.")
        full_analysis = response.content
        
        return anomaly_score, reasoning, full_analysis
        
    except (json.JSONDecodeError, Exception) as e:
        anomaly_score = None
        #reasoning = f"Could not reliably parse LLM response: {str(e)}"
        reasoning = response.content
        full_analysis = response.content
    
    return anomaly_score, reasoning, full_analysis


In [None]:
def generate_bot_report(player_ids: list[str]) -> list[dict]:
    """
    Generates a report for a list of player IDs, including anomaly scores and reasoning.
    """
    report = []
    for player_id in tqdm(player_ids):
        player_data = extract_player_action_features(player_id)
        if player_data:
            anomaly_score, reasoning, full_analysis = assess_bot_likelihood(player_data)

            report.append({
                "player_id": player_id,
                "anomaly_score": anomaly_score,
                "reasoning": reasoning,
                "full_analysis": full_analysis
            })
        else:
            report.append({
                "player_id": player_id,
                "anomaly_score": None,
                "reasoning": "No data found for this player.",
                "full_analysis": None
            })
    return report

In [None]:
player_ids_to_check = ['428616', '433137', '447250', '416536', '441912']  # Replace with actual player IDs
bot_report = generate_bot_report(player_ids_to_check)

for record in tqdm(bot_report):
    print(f"Player ID: {record['player_id']}")
    print(f"Anomaly Score: {record['anomaly_score']}")
    print(f"Reasoning: {record['reasoning']}")
    
    print("-" * 40)