In [1]:
import json

In [2]:
import re

def calculate_structure_score(data,flow_type):
    # Determine if we're evaluating 'product_defect' or 'storewide_query'
    flow_type = data.get("flow", "")
    
    if flow_type == "product_defect":
        # Product Defect Logic
        required_rows = [
            'customer_name', 'email', 'member_level', 'phone', 'username',
            'street_address', 'full_address', 'city', 'state', 'zip_code','num_products',
            'order_id', 'packaging', 'payment_method', 'products', 'purchase_date',
            'names', 'amounts'
        ]
        max_structure_score = 25  # 20 rows + 5 columns
    elif flow_type == "storewide_query":
        # Storewide Query Logic
        required_rows = [
            'customer_name', 'email', 'member_level', 'phone', 'username',
            'street_address', 'full_address', 'city', 'state', 'zip_code',
            'names', 'amounts'
        ]
        max_structure_score = 19  # 14 rows + 5 columns
    else:
        print("Unknown flow type")
        return 0

    structure_score = 0

    # Check for the presence of rows
    for row in required_rows:
        if any(row in section for section in data.values()):
            print(f"Found row: {row}")
            structure_score += 1
        else:
            print(f"Missing row: {row}")
    
    # Check for the presence of the 5 required columns
    required_columns = ['personal', 'order', 'product', 'flow', 'subflow']
    for column in required_columns:
        if column in data:
            structure_score += 1
        else:
            print(f"Missing column: {column}")

    # Check for flow and subflow separately if not already counted
    if 'flow' in data:
        structure_score += 1
    if 'subflow' in data:
        structure_score += 1
    
    print(f"Structure Score: {structure_score:.2f}")

    # Normalize structure score
    structure_score = (structure_score / max_structure_score) * 100
    print(f"Max Structure Score: {max_structure_score}")
    return structure_score 


def calculate_entity_score(data, flow_type):
    entity_score = 0
    max_scores = {'product_defect': 20, 'storewide_query': 14}

    personal = data.get('personal', {})
    if 'customer_name' in personal: 
        entity_score += 1
        print("Customer name is present and correct.")
    if 'email' in personal and '@' in personal['email']: 
        entity_score += 1
        print("Email is present and correctly formatted.")
    if 'member_level' in personal: 
        entity_score += 1
        print("Member level is present.")
    if 'phone' in personal and len(''.join(filter(str.isdigit, personal['phone']))) == 10: 
        entity_score += 1
        print("Phone number is present and correctly formatted.")
    if 'username' in personal: 
        entity_score += 1
        print("Username is present.")

    order = data.get('order', {})
    if 'street_address' in order and 'full_address' in order and order['street_address'] in order['full_address']: 
        entity_score += 1
        print("Street address matches full address.")
    if 'full_address' in order and 'zip_code' in order and order['zip_code'] in order['full_address']: 
        entity_score += 1
        print("ZIP code matches full address.")
    if 'city' in order and order['city'] in order['full_address']: 
        entity_score += 1
        print("City matches full address.")
    if 'num_products' in order: 
        entity_score += 1
        print("Number of products is present.")
    if 'order_id' in order and len(order['order_id']) == 10 and order['order_id'].isdigit(): 
        entity_score += 1
        print("Order ID is correctly formatted.")
    if 'packaging' in order: 
        entity_score += 1
        print("Packaging information is present.")
    if 'payment_method' in order: 
        entity_score += 1
        print("Payment method is present.")
    if 'products' in order: 
        entity_score += 1
        print("Products information is present.")
    if 'purchase_date' in order: 
        entity_score += 1
        print("Purchase date is present.")
    if 'state' in order and order['state'] in order['full_address']: 
        entity_score += 1
        print("State matches full address.")
    if 'zip_code' in order: 
        entity_score += 1
        print("ZIP code is present.")

    if flow_type == 'product_defect':
        product = data.get('product', {})
        if 'names' in product: 
            entity_score += 1
            print("Product names are present.")
        if 'amounts' in product: 
            entity_score += 1
            print("Product amounts are present.")
    elif flow_type == 'storewide_query':
        product = data.get('product', {})
        if 'names' in product and not product['names']: 
            entity_score += 1
            print("Product names are correctly empty for storewide query.")
        if 'amounts' in product and not product['amounts']: 
            entity_score += 1
            print("Product amounts are correctly empty for storewide query.")

    if data.get('flow') in ['product_defect', 'storewide_query']: 
        entity_score += 1
        print("Flow is correctly specified.")
    if data.get('subflow') in ['refund_status', 'timing_4', 'return_size']: 
        entity_score += 1
        print("Subflow is correctly specified.")

    entity_score = max(0, entity_score) / max_scores[flow_type] * 100

    return entity_score


def evaluate_data(data):
    flow_type = data['flow']
    
    structure_score = calculate_structure_score(data, flow_type)
    entity_score = calculate_entity_score(data, flow_type)

    total_score = (structure_score + entity_score) / 2

    return {
        'structure_score': structure_score,
        'entity_score': entity_score,
        'total_score': total_score
    }


# Example data
example_data_1 = {
  "personal": {
    "customer_name": "alessandro phoenix",
    "email": "aphoenix939@email.com",
    "member_level": "gold",
    "phone": "(727) 760-7806",
    "username": "aphoenix939"
  },
  "order": {
    "street_address": "8865 lexington ave",
    "full_address": "8865 lexington ave  la fayette, tx 86229",
    "city": "la fayette",
    "num_products": "1",
    "order_id": "7916676427",
    "packaging": "yes",
    "payment_method": "credit card",
    "products": "[{'brand': 'michael_kors', 'product_type': 'shirt', 'amount': 69, 'image_url': 'images/michael_kors-shirt.jpeg'}]",
    "purchase_date": "2019-11-20",
    "state": "tx",
    "zip_code": "86229"
  },
  "product": {
    "names": [
      "michael_kors shirt"
    ],
    "amounts": [
      69
    ]
  },
  "flow": "product_defect",
  "subflow": "refund_status"
}

example_data_2 = {
  "personal": {
    "customer_name": "joyce wu",
    "member_level": "bronze",
    "phone": "(859) 787-9085",
  },
  "order": {
    "street_address": "9998 lincoln ave",
    "full_address": "9998 lincoln ave  raleigh, mi 14573",
    "city": "raleigh",
    "state": "mi",
    "zip_code": "14573"
  },
  "product": {
    "names": [],
    "amounts": []
  },
  "flow": "storewide_query",
  "subflow": "timing_4"
}


In [3]:
# Evaluation
evaluation_1 = evaluate_data(example_data_1)
print('\n')
evaluation_2 = evaluate_data(example_data_2)

print(evaluation_1)
print('\n')
print(evaluation_2)

Found row: customer_name
Found row: email
Found row: member_level
Found row: phone
Found row: username
Found row: street_address
Found row: full_address
Found row: city
Found row: state
Found row: zip_code
Found row: num_products
Found row: order_id
Found row: packaging
Found row: payment_method
Found row: products
Found row: purchase_date
Found row: names
Found row: amounts
Structure Score: 25.00
Max Structure Score: 25
Customer name is present and correct.
Email is present and correctly formatted.
Member level is present.
Phone number is present and correctly formatted.
Username is present.
Street address matches full address.
ZIP code matches full address.
City matches full address.
Number of products is present.
Order ID is correctly formatted.
Packaging information is present.
Payment method is present.
Products information is present.
Purchase date is present.
State matches full address.
ZIP code is present.
Product names are present.
Product amounts are present.
Flow is correctl

In [1]:
import difflib
import re

# calculate structure score based on flow type
def calculate_structure_score(data, flow_type):
    # structure criteria for different flows
    structure_score = 0
    max_structure_score = 25 

    # check if essential fields are present and correctly formatted
    if 'personal' in data and 'customer_name' in data['personal']:
        structure_score += 5
    if 'order' in data and 'street_address' in data['order']:
        structure_score += 5
    if 'product' in data and 'names' in data['product']:
        structure_score += 5
    if 'flow' in data and data['flow'] == flow_type:
        structure_score += 5
    if 'subflow' in data:
        structure_score += 5

    return (structure_score / max_structure_score) * 100  # Normalize the score

# calculate entity score based on correctness of information
def calculate_entity_score(data, flow_type):
    entity_score = 0
    max_entity_score = 20  # Example max score for entity

    # points based on the accuracy of entities 
    if data['personal']['customer_name'] == 'alessandro phoenix':
        entity_score += 5
    if 'email' in data['personal'] and '@' in data['personal']['email']:
        entity_score += 5
    if 'products' in data['order'] and 'michael_kors' in data['order']['products']:
        entity_score += 5
    if 'flow' in data and data['flow'] == 'product_defect':
        entity_score += 5

    return (entity_score / max_entity_score) * 100  # Normalize the score

#  function for comparing generated and correct answers
def compare_generated_vs_ground_truth(generated, correct):
    # similarity ratio as a percentage
    similarity = difflib.SequenceMatcher(None, generated, correct).ratio()
    return similarity * 100  # Return similarity as a percentage

def format_disruption_level(generated, correct, section):
    # function to compare formats and assign a disruption score
    comparison_score = compare_generated_vs_ground_truth(generated, correct)
    
    slight_disruption = comparison_score >= 80
    medium_disruption = 50 <= comparison_score < 80
    major_disruption = comparison_score < 50
    
    disruption_level = ""
    if slight_disruption:
        disruption_level = "Slight Disruption"
    elif medium_disruption:
        disruption_level = "Medium Disruption"
    elif major_disruption:
        disruption_level = "Major Disruption"
    
    print(f"{section} Comparison Score: {comparison_score:.2f}% ({disruption_level})")
    return comparison_score, disruption_level

# evaluation to include format disruption scoring
def evaluate_data_with_format(data, generated_data):
    flow_type = data['flow']
    
    structure_score = calculate_structure_score(data, flow_type)
    entity_score = calculate_entity_score(data, flow_type)

    total_score = (structure_score + entity_score) / 2

    format_comparison_scores = {}

    # if structure score is 100%, proceed with format evaluation
    if structure_score == 100:
        # compare each field
        for key in generated_data:
            if key in data and isinstance(data[key], dict):  # handle sub-dictionaries like 'personal'
                for subkey in generated_data[key]:
                    if subkey in data[key]:
                        correct_answer = data[key][subkey]
                        generated_answer = generated_data[key][subkey]

                        if isinstance(correct_answer, list):
                            correct_answer = ' '.join(map(str, correct_answer))
                        if isinstance(generated_answer, list):
                            generated_answer = ' '.join(map(str, generated_answer))
                        
                        correct_answer = str(correct_answer)
                        generated_answer = str(generated_answer)

                        # handle special characters
                        correct_answer = re.sub(r'[^\w\s]', '', correct_answer)  
                        generated_answer = re.sub(r'[^\w\s]', '', generated_answer)  

                        section = f"{key}.{subkey}"  # Define the section name
                        score, level = format_disruption_level(generated_answer, correct_answer, section)
                        format_comparison_scores[f"{key}.{subkey}"] = {
                            "comparison_score": score,
                            "disruption_level": level
                        }

    else:
        print("Structure score is not 100%. Skipping format comparison.")

    return {
        'structure_score': structure_score,
        'entity_score': entity_score,
        'total_score': total_score,
        'format_comparison_scores': format_comparison_scores if structure_score == 100 else None
    }

#  correct data
example_data_1 = {
  "personal": {
    "customer_name": "alessandro phoenix",
    "email": "aphoenix939@email.com",
    "member_level": "gold",
    "phone": "(727) 760-7806",
    "username": "aphoenix939"
  },
  "order": {
    "street_address": "8865 lexington ave",
    "full_address": "8865 lexington ave  la fayette, tx 86229",
    "city": "la fayette",
    "num_products": "1",
    "order_id": "7916676427",
    "packaging": "yes",
    "payment_method": "credit card",
    "products": "[{'brand': 'michael_kors', 'product_type': 'shirt', 'amount': 69, 'image_url': 'images/michael_kors-shirt.jpeg'}]",
    "purchase_date": "2019-11-20",
    "state": "tx",
    "zip_code": "86229"
  },
  "product": {
    "names": [
      "michael_kors shirt"
    ],
    "amounts": [
      69
    ]
  },
  "flow": "product_defect",
  "subflow": "refund_status"
}

# example generated data with different levels of formatting issues
generated_data_1 = {
  "personal": {
    "customer_name": "alessandro phoenix",
    "email": "aphoenix939@email-com",  # wrong email format
    "member_level": "gold",
    "phone": "(727) 760-7806",
    "username": "aphoenix939"
  },
  "order": {
    "street_address": "8865 lexington ave",
    "full_address": "8865 lexington ave lafayette tx",  # missing city format and zip code
    "city": "miami",  # wrong city 
    "num_products": "1",
    "order_id": "86229", # switched order id and zip code
    "packaging": "yes",
    "payment_method": "credit_card",  #  wrong format for payment method
    "products": "[{'brand': 'michael kors', 'product_type': 'shirt', 'amount': 69, 'image_url': 'images/michael_kors-shirt.jpeg'}]",
    "purchase_date": "2019-11-20",
    "state": "tx",
    "zip_code": "7916676427"
  },
  "product": {
    "names": [
      "michael kors shirt"
    ],
    "amounts": [
      69
    ]
  },
  "flow": "product_defect",
  "subflow": "refund_status"
}

evaluation_1 = evaluate_data_with_format(example_data_1, generated_data_1)

print("\nFinal Evaluation with Format Comparison:")
print(evaluation_1)


personal.customer_name Comparison Score: 100.00% (Slight Disruption)
personal.email Comparison Score: 100.00% (Slight Disruption)
personal.member_level Comparison Score: 100.00% (Slight Disruption)
personal.phone Comparison Score: 100.00% (Slight Disruption)
personal.username Comparison Score: 100.00% (Slight Disruption)
order.street_address Comparison Score: 100.00% (Slight Disruption)
order.full_address Comparison Score: 88.57% (Slight Disruption)
order.city Comparison Score: 13.33% (Major Disruption)
order.num_products Comparison Score: 100.00% (Slight Disruption)
order.order_id Comparison Score: 26.67% (Major Disruption)
order.packaging Comparison Score: 100.00% (Slight Disruption)
order.payment_method Comparison Score: 90.91% (Slight Disruption)
order.products Comparison Score: 98.82% (Slight Disruption)
order.purchase_date Comparison Score: 100.00% (Slight Disruption)
order.state Comparison Score: 100.00% (Slight Disruption)
order.zip_code Comparison Score: 13.33% (Major Disrupti