# Employee Review Demo

A demo of using [SurrealDB](https://surrealdb.com/) to query Kaggle's [Employee Review](https://www.kaggle.com/datasets/fiodarryzhykau/employee-review) dataset.

In [1]:
%pip -q install surrealdb pandas

Note: you may need to restart the kernel to use updated packages.


## Setup the database

Simply define a schemafull table for employees and add them.

In [2]:
from surrealdb import Surreal

db = Surreal("http://localhost:8000")
await db.connect()

# TODO: remove insecure creds
await db.signin({"user": "root", "pass": "testing"})
await db.use("test", "test")

print(db.client_state)

ConnectionState.CONNECTED


In [3]:
member_tb = "member"
createMemberQuery = f"""
# Create primary table for holding member info
DEFINE TABLE {member_tb} SCHEMAFULL;

# Name
DEFINE FIELD name ON TABLE {member_tb} TYPE string;

# Employee ID as unique index
DEFINE FIELD employeeId ON TABLE {member_tb} TYPE string
    ASSERT string::is::numeric(employeeId) && string::len(employeeId) == 5;
DEFINE INDEX employeeIdIndex ON TABLE {member_tb} COLUMNS employeeId UNIQUE;
"""

print(createMemberQuery)


# Create primary table for holding member info
DEFINE TABLE member SCHEMAFULL;

# Name
DEFINE FIELD name ON TABLE member TYPE string;

# Employee ID as unique index
DEFINE FIELD employeeId ON TABLE member TYPE string
    ASSERT string::is::numeric(employeeId) && string::len(employeeId) == 5;
DEFINE INDEX employeeIdIndex ON TABLE member COLUMNS employeeId UNIQUE;



In [4]:
# If the command to create the member table looks good, go ahead and create it.
await db.query(createMemberQuery)
# Verify the table was created
await db.query("INFO FOR TABLE member;")

[{'result': {'events': {},
   'fields': {'employeeId': 'DEFINE FIELD employeeId ON member TYPE string ASSERT string::is::numeric(employeeId) AND string::len(employeeId) == 5',
    'name': 'DEFINE FIELD name ON member TYPE string'},
   'indexes': {'employeeIdIndex': 'DEFINE INDEX employeeIdIndex ON member FIELDS employeeId UNIQUE'},
   'lives': {},
   'tables': {}},
  'status': 'OK',
  'time': '2.514229ms'}]

In [5]:
# Preview the CSV
import pandas as pd

# Load the CSV file
file_path = 'employee_review_mturk_dataset_test_v6_kaggle.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
data.head()

# For each unique name in the CSV
unique_names = data['person_name'].unique()
print("Found", len(unique_names), "unique names")


Found 75 unique names


In [7]:
from random import randint

# Roster of entries
records_added = 0
failed_records: list[dict[str, str]] = []

# Add the person to the member table
print("Adding members to table:", member_tb)
for name in unique_names:
    # Create a random employee id of 5 digits
    employee_id = ''.join(str(randint(0, 9)) for _ in range(5))
    try:
        new_record = await db.create(member_tb, {
        'name': name,
        'employeeId': employee_id,
        })
        records_added = records_added + 1
    except Exception as e:
        failed_records.append({name, e})

print("Added records:", records_added)
print("Errors adding:", len(failed_records))
failed_records

Adding members to table: member
Added records: 75
Errors adding: 0


[]

In [10]:
# View some of the results
await db.query(f"SELECT * FROM {member_tb} LIMIT 5;")

[{'result': [{'employeeId': '73286',
    'id': 'member:08m4a63m65d0xl26h3ss',
    'name': 'George Jones'},
   {'employeeId': '13111',
    'id': 'member:0iyikxq41nlze0257cpq',
    'name': 'Daisy Pearce'},
   {'employeeId': '47773',
    'id': 'member:0ogoc9d0eg71st1kiziu',
    'name': 'Archie Dawson'},
   {'employeeId': '98876',
    'id': 'member:0pnzfj9d49iw302bcfnq',
    'name': 'Max Miller'},
   {'employeeId': '91642',
    'id': 'member:1toiwk6yk42jjytiaaj9',
    'name': 'Jack Walsh'}],
  'status': 'OK',
  'time': '106.302µs'}]

## Add performance feedback

- Assume that the last 5 names in the database are managers.
- Randomly assign a manager to each feedback
- Add the feedbackto an edge table using [RELATE](https://docs.surrealdb.com/docs/surrealql/statements/relate)
- Demonstrate graph queries on the edge table

In [40]:
# Create a schemafull table for feedback
feedback_tb = "feedback"
createFeedbackQuery = f"""
DEFINE TABLE {feedback_tb} SCHEMAFULL;
DEFINE FIELD member ON TABLE {feedback_tb} TYPE record<member>;
DEFINE FIELD rater ON TABLE {feedback_tb} TYPE record<member>;
DEFINE FIELD date on TABLE {feedback_tb} TYPE datetime;
DEFINE FIELD body ON TABLE {feedback_tb} TYPE string;
# Create index for faster query
DEFINE INDEX memberIndex ON TABLE {feedback_tb} COLUMNS member;
"""

print(createFeedbackQuery)


DEFINE TABLE feedback SCHEMAFULL;
DEFINE FIELD member ON TABLE feedback TYPE record<member>;
DEFINE FIELD rater ON TABLE feedback TYPE record<member>;
DEFINE FIELD date on TABLE feedback TYPE datetime;
DEFINE FIELD body ON TABLE feedback TYPE string;
# Create index for faster query
DEFINE INDEX memberIndex ON TABLE feedback COLUMNS member;



In [12]:
# If the command to create the table looks good, go ahead and create it.
await db.query(createFeedbackQuery)
# Verify the table was created
print(await db.query(f"INFO FOR TABLE {feedback_tb};"))

[{'result': {'events': {}, 'fields': {'body': 'DEFINE FIELD body ON feedback TYPE string', 'date': 'DEFINE FIELD date ON feedback TYPE datetime', 'member': 'DEFINE FIELD member ON feedback TYPE record<member>', 'rater': 'DEFINE FIELD rater ON feedback TYPE record<member>'}, 'indexes': {}, 'lives': {}, 'tables': {}}, 'status': 'OK', 'time': '209.823µs'}]


In [23]:
# Assume managers are the last 5 names in the list
managers = unique_names[-5:]
print(managers)

['Jaxson Giles' 'Heidi Wallace' 'Zachary Doyle' 'Lauren Baker'
 'George Jones']


In [39]:
from random import choice

# Go through each feedback and add it to the database with a randomly selected manager
feedback_df = data[['person_name','feedback']]
# Roster of entries
records_added = 0
failed_records: list[dict[str, str]] = []

# Add feedback
for _, row in feedback_df.iterrows():
    # Randomly generate datetime for when feedback happened
    feedbackDate = str(randint(2019, 2023)) + "-" + str(randint(10,12)) + "-" + str(randint(10,28))
    feedbackDate = feedbackDate + "T00:00:00Z"
    try:
        # Randomly select rater and get their member id
        rater = choice(managers)
        raterId = await db.query(f"""SELECT id FROM member WHERE name = "{rater}";""")
        raterId = raterId[0]["result"][0]["id"]
        # Get id of member. Doing this with employeeId would be better, but dataset limits
        memberId = await db.query(f"""SELECT id FROM member WHERE name = "{row.person_name}";""")
        memberId = memberId[0]["result"][0]["id"]
        new_record = await db.create(feedback_tb, {
        'member': memberId,
        'rater' : raterId,
        'date'  : feedbackDate,
        'body'  : row.feedback,
        })
        records_added = records_added + 1
    except Exception as e:
        failed_records.append({name, e})

print("Added records:", records_added)
print("Errors adding:", len(failed_records))
failed_records

    

Added records: 225
Errors adding: 0


[]

### Query Feedback Records

- Notice that the index makes queries much faster

In [None]:
# Get all of the records for a member

crane_feedback = await db.query("SELECT * FROM feedback WHERE member=")