In [1]:
import pandas as pd
import re

from neo4j import GraphDatabase

import os
from dotenv import load_dotenv

from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
load_dotenv()

True

In [2]:
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USERNAME")
password = os.getenv("NEO4J_PASSWORD")

In [3]:
class Neo4jHandler:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def get_player_classifications_as_df(self):
        query = """
        MATCH (p:Player)-[r:HAS_CLASSIFICATION]->(c:Classification)
        RETURN p.Actor AS player_id, c.type AS classification
        """
        with self.driver.session() as session:
            result = session.run(query)
            data = [{"player_id": record["player_id"], "classification": record["classification"]} for record in result]
            
            # Parse classification, confidence, and reasoning from the LLM
            parsed_data = []
            for entry in data:
                llm_response = entry["classification"]  # Assuming the LLM response is stored here, change if it is stored in p properties
                classification = re.search(r"Classification:\s*(\w+)", llm_response)
                confidence = re.search(r"Confidence:\s*(\d+)", llm_response)
                reasoning = re.search(r"Reasoning:\s*(.+)", llm_response)
                
                parsed_data.append({
                    "Actor": entry["player_id"],
                    "Classification": classification.group(1) if classification else None,
                    "Confidence": int(confidence.group(1)) if confidence else None,
                    "Reasoning": reasoning.group(1).strip() if reasoning else None,
                })

            return pd.DataFrame(parsed_data)





In [4]:

handler = Neo4jHandler(uri, username, password)
try:
    df = handler.get_player_classifications_as_df()
finally:
    handler.close()

In [5]:
df

Unnamed: 0,Actor,Classification,Confidence,Reasoning
0,378697,Bot,80.0,** The Player Action Analysis score of 87 indi...
1,249936,Human,60.0,While the player exhibits some anomalous behav...
2,344011,Bot,95.0,** The player's behavior is highly anomalous a...
3,44575,Human,90.0,The player's behavior is consistently rated as...
4,210721,Human,80.0,The player's behavior is deemed legitimate bas...
5,391531,Human,80.0,"The player's behavior, as analyzed through ano..."
6,441832,Bot,95.0,** The player's behavior is highly indicative ...
7,471097,Bot,95.0,The player's behavior shows strong indicators ...
8,432593,Bot,98.0,The player's behavior is highly indicative of ...
9,187491,Human,60.0,"The scores from Anomaly Detection, Social Dive..."


In [6]:
social_data = pd.read_csv('../data/sample/sample_social_data.csv')

social_data = social_data[social_data['Actor'].isin(df['Actor'])]
social_data.drop(columns=['A_Acc', 'Social_diversity'], inplace=True)
eval_df = social_data.merge(df, on='Actor', how='left')
eval_df.dropna(inplace=True)
eval_df

Unnamed: 0,Actor,Type,Classification,Confidence,Reasoning
0,16723,Human,Human,70.0,Although the Anomaly Detection and Player Acti...
1,44575,Bot,Human,90.0,The player's behavior is consistently rated as...
2,163093,Human,Human,90.0,The player's behavior is deemed legitimate due...
3,187491,Bot,Human,60.0,"The scores from Anomaly Detection, Social Dive..."
4,210721,Human,Human,80.0,The player's behavior is deemed legitimate bas...
5,249936,Human,Human,60.0,While the player exhibits some anomalous behav...
6,328320,Human,Human,80.0,** While the Anomaly Detection Score is modera...
7,344011,Bot,Bot,95.0,** The player's behavior is highly anomalous a...
8,362870,Human,Human,70.0,The Anomaly Detection score of 80 suggests sus...
9,374321,Bot,Bot,98.0,"The player's behavior is highly anomalous, wit..."


In [7]:
eval_df['Type'] = eval_df['Type'].apply(lambda x: 1 if x == 'Bot' else 0)
eval_df['Classification'] = eval_df['Classification'].apply(lambda x: 1 if x == 'Bot' else 0)

In [8]:
y_true = eval_df['Type']
y_pred = eval_df['Classification']

# Calculate evaluation metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
confusion = confusion_matrix(y_true, y_pred)

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:\n", confusion)

Accuracy: 0.8333
Precision: 0.8750
Recall: 0.7778
F1 Score: 0.8235
Confusion Matrix:
 [[8 1]
 [2 7]]
