# Employee Review Demo

A demo of using [SurrealDB](https://surrealdb.com/) to query Kaggle's [Employee Review](https://www.kaggle.com/datasets/fiodarryzhykau/employee-review) dataset.

In [81]:
%pip -q install surrealdb pandas

Note: you may need to restart the kernel to use updated packages.


## Setup the database

Simply define a schemafull table for employees and add them.

In [82]:
from surrealdb import Surreal

db = Surreal("http://localhost:8000")
await db.connect()

# TODO: remove insecure creds
await db.signin({"user": "root", "pass": "testing"})
await db.use("test", "test")

print(db.client_state)

ConnectionState.CONNECTED


In [83]:
member_tb = "member"
createMemberQuery = f"""
# Create primary table for holding member info
DEFINE TABLE {member_tb} SCHEMAFULL;

# Name
DEFINE FIELD name ON TABLE {member_tb} TYPE string;

# Employee ID as unique index
DEFINE FIELD employeeId ON TABLE {member_tb} TYPE string
    ASSERT string::is::numeric(employeeId) && string::len(employeeId) == 5;
DEFINE INDEX employeeIdIndex ON TABLE {member_tb} COLUMNS employeeId UNIQUE;
"""

print(createMemberQuery)


# Create primary table for holding member info
DEFINE TABLE member SCHEMAFULL;

# Name
DEFINE FIELD name ON TABLE member TYPE string;

# Employee ID as unique index
DEFINE FIELD employeeId ON TABLE member TYPE string
    ASSERT string::is::numeric(employeeId) && string::len(employeeId) == 5;
DEFINE INDEX employeeIdIndex ON TABLE member COLUMNS employeeId UNIQUE;



In [84]:
# If the command to create the member table looks good, go ahead and create it.
await db.query(createMemberQuery)
# Verify the table was created
await db.query("INFO FOR TABLE member;")

[{'result': {'events': {},
   'fields': {'employeeId': 'DEFINE FIELD employeeId ON member TYPE string ASSERT string::is::numeric(employeeId) AND string::len(employeeId) == 5',
    'name': 'DEFINE FIELD name ON member TYPE string'},
   'indexes': {'employeeIdIndex': 'DEFINE INDEX employeeIdIndex ON member FIELDS employeeId UNIQUE'},
   'lives': {},
   'tables': {}},
  'status': 'OK',
  'time': '2.878352ms'}]

In [85]:
# Preview the CSV
import pandas as pd

# Load the CSV file
file_path = 'employee_review_mturk_dataset_test_v6_kaggle.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
data.head()

# For each unique name in the CSV
unique_names = data['person_name'].unique()
print("Found", len(unique_names), "unique names")


Found 75 unique names


In [86]:
# Roster of entries
records_added = 0
failed_records: list[dict[str, str]] = []

# Add the person to the member table
print("Adding members to table:", member_tb)
for name in unique_names:
    # Create a random employee id of 5 digits
    employee_id = ''.join(str(random.randint(0, 9)) for _ in range(5))
    try:
        new_record = await db.create(member_tb, {
        'name': name,
        'employeeId': employee_id,
        })
        records_added = records_added + 1
    except Exception as e:
        failed_records.append({name, e})

print("Added records:", records_added)
print("Errors adding:", len(failed_records))
failed_records

Adding members to table: member
Added records: 75
Errors adding: 0
[]


In [108]:
# View some of the results
await db.query(f"SELECT * FROM {member_tb} LIMIT 5;")

[{'result': [{'employeeId': '06886',
    'id': 'member:0n8ude84gxl4imdtkhnq',
    'name': 'Zoe White'},
   {'employeeId': '13786',
    'id': 'member:147f4liwpa7cv7095t5a',
    'name': 'Milania Hodge'},
   {'employeeId': '49998',
    'id': 'member:16cwc0g8i420kcr0z2j3',
    'name': 'Ella Green'},
   {'employeeId': '68527',
    'id': 'member:170lruskvgodnhbga8je',
    'name': 'Brayden Harding'},
   {'employeeId': '98370',
    'id': 'member:1xncyyimk2xnn9y4oros',
    'name': 'Max Miller'}],
  'status': 'OK',
  'time': '220.406µs'}]

## Add performance feedback

- Assume that the last 5 names in the database are managers.
- Randomly assign a manager to each feedback
- Add the feedbackto an edge table using [RELATE](https://docs.surrealdb.com/docs/surrealql/statements/relate)
- Demonstrate graph queries on the edge table

In [116]:
# Create a schemafull table for feedback
feedback_tb = "feedback"
createFeedbackQuery = f"""
DEFINE TABLE {feedback_tb} SCHEMAFULL;
DEFINE FIELD body ON TABLE {feedback_tb} TYPE string;
DEFINE FIELD date on TABLE {feedback_tb} TYPE datetime;
"""

print(createFeedbackQuery)

# Create a schemafull edge table for gaveFeedback
# This will enforce that there is a link to the feedback itself
gaveFeedback_tb = "gaveFeedback"
createGaveFeedbackQuery = f"""
DEFINE TABLE {gaveFeedback_tb} SCHEMAFULL;
DEFINE FIELD feedbackId ON TABLE {gaveFeedback_tb} TYPE record<feedback>;
"""

print(createGaveFeedbackQuery)


DEFINE TABLE feedback SCHEMAFULL;
DEFINE FIELD body ON TABLE feedback TYPE string;
DEFINE FIELD date on TABLE feedback TYPE datetime;


DEFINE TABLE gaveFeedback SCHEMAFULL;
DEFINE FIELD feedbackId ON TABLE gaveFeedback TYPE record<feedback>;



In [117]:
# If the command to create the table looks good, go ahead and create it.
await db.query(createFeedbackQuery)
await db.query(createGaveFeedbackQuery)
# Verify the table was created
print(await db.query(f"INFO FOR TABLE {feedback_tb};"))
print(await db.query(f"INFO FOR TABLE {gaveFeedback_tb};"))

[{'result': {'events': {}, 'fields': {'body': 'DEFINE FIELD body ON feedback TYPE string', 'content': 'DEFINE FIELD content ON feedback TYPE string', 'date': 'DEFINE FIELD date ON feedback TYPE datetime'}, 'indexes': {}, 'lives': {}, 'tables': {}}, 'status': 'OK', 'time': '130.444µs'}]
[{'result': {'events': {}, 'fields': {'feedbackId': 'DEFINE FIELD feedbackId ON gaveFeedback TYPE record<feedback>'}, 'indexes': {}, 'lives': {}, 'tables': {}}, 'status': 'OK', 'time': '136.456µs'}]


In [115]:
# Managers are the last 5 names in the list
managers = unique_names[-5:]
print(managers)

# Go through each feedback and add it to the database with a randomly selected manager
feedback_df = data[['person_name','feedback']]

['Jaxson Giles' 'Heidi Wallace' 'Zachary Doyle' 'Lauren Baker'
 'George Jones']


In [100]:
# Roster of entries
records_added = 0
failed_records: list[dict[str, str]] = []

# Add feedback
for _, row in feedback_df.iterrows():
    # Create a random employee id of 5 digits
    employee_id = ''.join(str(random.randint(0, 9)) for _ in range(5))
    try:
        new_record = await db.create(member_tb, {
        'name': name,
        'employeeId': employee_id,
        })
        records_added = records_added + 1
    except Exception as e:
        failed_records.append({name, e})

print("Added records:", records_added)
print("Errors adding:", len(failed_records))
failed_records

    

person_name                                         Lacey Howard
feedback       Lacey's performance has been sub standard in t...
Name: 0, dtype: object
person_name                                            Amy Jones
feedback       Amy struggles at her work a lot. Shes always o...
Name: 1, dtype: object
person_name                                            Amy Jones
feedback       Amy Jones is a nice person and she is dedicate...
Name: 2, dtype: object
person_name                                            Amy Jones
feedback       Amy Jones needs to become a better player. She...
Name: 3, dtype: object
person_name                                            Amy Jones
feedback       Amy is able to focus on the task at hand only ...
Name: 4, dtype: object
person_name                                           Rylan Mack
feedback       Rylan Mack is a danger to his position. He has...
Name: 5, dtype: object
person_name                                           Rylan Mack
feedback       Ry