In [270]:
import json

# Load data from JSON
with open('apiResponse/all_responses_200_sample.json', 'r', encoding='utf-8') as file:
    api_data = json.load(file)

with open('Samples/sample_siirtokarjalaiset_annotated.json', 'r', encoding='utf-8') as file:
    hand_data = json.load(file)



In [271]:
def clean_api_response_data(file_path, items_to_remove):
    # Convert all items in the list to lowercase for comparison
    items_to_remove = [item.lower().strip() for item in items_to_remove]
    
    # Load the json data
    with open(file_path, 'r') as f:
        data = json.load(f)

    removed_count = 0
    
    for entry in data:
        api_response = entry.get("api_response", "")
        api_items = api_response.split("\n")
        
        cleaned_api_items = []
        for item in api_items:
            if ": " not in item:  # Check if ": " exists in the line
                continue  # If not, skip to the next iteration

            # Split into key and values
            key, value = item.split(": ", 1)
            
            # Split values based on comma, while stripping any extra spaces
            values_list = [v.strip() for v in value.split(",")]
            cleaned_values_list = [v for v in values_list if v.lower() not in items_to_remove and v.strip() != '']
                
            # Update count of removed items
            removed_count += len(values_list) - len(cleaned_values_list)
                
            # Recombine to a single string
            cleaned_value = ", ".join(cleaned_values_list)
            cleaned_api_items.append(f"{key}: {cleaned_value}")
                
        # Recombine the cleaned API items to the api_response format
        entry["api_response"] = "\n".join(cleaned_api_items)
    
    # Save the cleaned data back to the file
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4)

    return f"Removed {removed_count} items. Cleaned data saved back to the file."

# Example usage
file_path = 'apiResponse/all_responses_200_sample.json'
items_to_remove = ["none", "N/A", "whateverelse", "-", " ", "  "]
print(clean_api_response_data(file_path, items_to_remove))

Removed 0 items. Cleaned data saved back to the file.


In [272]:
#FuxxyWUzzy algorithm
from fuzzywuzzy import fuzz

def parse_response(response_str):
    lines = response_str.split('\n')
    parsed_response = {}
    for line in lines:
        key, _, value = line.partition(': ')
        parsed_response[key] = value.strip() if value.strip() else None
    return parsed_response


# Checking how similar the words are
def are_similar(str1, str2, threshold=70, context=None):
    similarity = fuzz.token_set_ratio(str1, str2)
    is_similar = similarity > threshold
    
    # Check if similarity is below 100% and store it if so
    if is_similar and similarity < 100:
        store_not_exact_matches(str1, str2, similarity, context)
        
    return is_similar


def store_not_exact_matches(str1, str2, similarity, context):
    data = {
        "string_1": str1,
        "string_2": str2,
        "similarity": similarity,
        "context": context  # This will provide additional information
    }
    with open("non_exact_matches.json", "a", encoding="utf-8") as file:
        json.dump(data, file, ensure_ascii=False, indent=4)
        file.write(",\n")






In [273]:
## to pass context to non_exact_matches use this script!

def compare_values(api_values, annotated_values, context):
    api_list = api_values.lower().split(', ') if api_values else []
    annotated_list = annotated_values.lower().split(', ') if annotated_values else []
    
    matches = set()
    mismatches = set(api_list).union(set(annotated_list))  
    
    for api_val in api_list:
        for ann_val in annotated_list:
            if are_similar(api_val, ann_val, context=context):
                matches.add(api_val)
                mismatches.discard(api_val)
                mismatches.discard(ann_val)
    
    return matches, mismatches


# Parse JSON strings
api_responses = api_data
hand_annotated = hand_data
# Loop over all elements in api_responses and hand_annotated to compare them
results = []
total_matches = 0
total_mismatches = 0

for api_resp in api_responses:
    hand_ann = next((item for item in hand_annotated if item["index"] == api_resp["person_index"]), None)
    if not hand_ann:
        continue
    
    parsed_api_response = parse_response(api_resp['api_response'])
    
    comparison_results = {
        "index": hand_ann['index'],
        "person_name": hand_ann['primary_person_name'],
        "spouse_name": hand_ann['spouse_name'],
        "detail": []
    }
    
    for key in ["person_hobbies", "person_social_orgs", "spouse_hobbies", "spouse_social_orgs"]:
        split_keys = key.split("_")
        api_key = split_keys[0].capitalize() + "".join(word.capitalize() for word in split_keys[1:])
        
        # Safely get the index values for context
        api_person_index = api_resp.get('person_index', None)
        annotated_person_index = hand_ann.get('index', None)
        

        # Build the context
        context = {
            "api_person_index": api_person_index,
            "annotated_person_index": annotated_person_index,
            "category_type": key
        }
        
        matches, mismatches = compare_values(parsed_api_response.get(api_key, ""), hand_ann[key], context)
        
        detail = {
            "type": key,
            "matches": list(matches),
            "mismatches": list(mismatches)
        }
        comparison_results["detail"].append(detail)
        
        total_matches += len(matches)
        total_mismatches += len(mismatches)
    
    results.append(comparison_results)

output_json = json.dumps(results, indent=4, ensure_ascii=False)

# To store the results in a file:
with open("matches_results.json", "w") as file:
    file.write(output_json)

# Printing total matches and mismatches
print(f"Total Matches: {total_matches}")
print(f"Total Mismatches: {total_mismatches}")

# Calculating and printing the match percentage
total_comparisons = total_matches + total_mismatches
if total_comparisons > 0: 
    match_percentage = (total_matches / total_comparisons) * 100
    print(f"Match Percentage: {match_percentage:.2f}%")
else:
    print("No comparisons were made (Total Comparisons: 0).")

Total Matches: 390
Total Mismatches: 461
Match Percentage: 45.83%


In [274]:
### Precision, recall and F Score 

# to pass context to non_exact_matches use the previous script

def compare_values(api_dict, hand_ann_dict):
    def safe_extract(api_response, keyword):
        try:
            return api_response.split(keyword)[1].split("\n")[0].strip().split(', ')
        except IndexError:
            return []

    api_name = safe_extract(api_dict["api_response"], "PersonName:")
    api_hobbies = safe_extract(api_dict["api_response"], "PersonHobbies:")
    api_social_orgs = safe_extract(api_dict["api_response"], "PersonSocialOrgs:")
    api_spouse_hobbies = safe_extract(api_dict["api_response"], "SpouseHobbies:")
    api_spouse_social_orgs = safe_extract(api_dict["api_response"], "SpouseSocialOrgs:")

    hand_name = hand_ann_dict["primary_person_name"]
    hand_hobbies = hand_ann_dict["person_hobbies"].split(', ')
    hand_social_orgs = hand_ann_dict["person_social_orgs"].split(', ')
    hand_spouse_hobbies = hand_ann_dict["spouse_hobbies"].split(', ')
    hand_spouse_social_orgs = hand_ann_dict["spouse_social_orgs"].split(', ')

    # Assuming are_similar function is predefined

    def calculate_metrics(api_values, hand_values):
        TP = set()
        FP = set(api_values)  # Temporarily assume all api_values are FP
        FN = set(hand_values)  # Temporarily assume all annotated_values are FN
        
        for api_val in api_values:
            for ann_val in hand_values:
                if are_similar(api_val, ann_val):
                    TP.add(api_val)  # Add to True Positives
                    FP.discard(api_val)  # Remove from False Positives
                    FN.discard(ann_val)  # Remove from False Negatives
                    break  # Stop looking for additional matches
                    
        precision = len(TP) / (len(TP) + len(FP)) if TP or FP else 0
        recall = len(TP) / (len(TP) + len(FN)) if TP or FN else 0
        f_score = (2 * precision * recall) / (precision + recall) if precision + recall > 0 else 0
    
        return TP, FP, FN, precision, recall, f_score

    # Compute metrics for person and spouse separately
    person_metrics = calculate_metrics(api_social_orgs, hand_social_orgs)
    person_hobbies = calculate_metrics(api_hobbies, hand_hobbies)
    spouse_metrics = calculate_metrics(api_spouse_social_orgs, hand_spouse_social_orgs)
    spouse_hobbies = calculate_metrics(api_spouse_hobbies, hand_spouse_hobbies)

    

    return person_metrics, spouse_metrics, person_hobbies, spouse_hobbies

    



api_responses = api_data  # This should be your API data
hand_annotated = hand_data  # This should be your hand annotated data

results = []


total_person_TP = total_person_FP = total_person_FN = 0
total_spouse_TP = total_spouse_FP = total_spouse_FN = 0
total_person_hobbies_TP = total_person_hobbies_FP = total_person_hobbies_FN = 0
total_spouse_hobbies_TP = total_spouse_hobbies_FP = total_spouse_hobbies_FN = 0


for api_resp in api_responses:
    hand_ann = next((item for item in hand_annotated if item["index"] == api_resp["person_index"]), None)
    if not hand_ann:
        continue
    
    # Call compare_values and unpack all four return values here:
    person_metrics, spouse_metrics, person_hobbies_metrics, spouse_hobbies_metrics = compare_values(api_resp, hand_ann)

    total_person_TP += len(person_metrics[0])
    total_person_FP += len(person_metrics[1])
    total_person_FN += len(person_metrics[2])

    total_spouse_TP += len(spouse_metrics[0])
    total_spouse_FP += len(spouse_metrics[1])
    total_spouse_FN += len(spouse_metrics[2])
    
    # Add handling for person_hobbies_metrics and spouse_hobbies_metrics
    # For example:
    total_person_hobbies_TP += len(person_hobbies_metrics[0])
    total_person_hobbies_FP += len(person_hobbies_metrics[1])
    total_person_hobbies_FN += len(person_hobbies_metrics[2])

    total_spouse_hobbies_TP += len(spouse_hobbies_metrics[0])
    total_spouse_hobbies_FP += len(spouse_hobbies_metrics[1])
    total_spouse_hobbies_FN += len(spouse_hobbies_metrics[2])

    results.append({
    "person_metrics": {
        "index": hand_ann['index'],
        "person_name": hand_ann['primary_person_name'],
        "true_positives": list(person_metrics[0]),
        "false_positives": list(person_metrics[1]),
        "false_negatives": list(person_metrics[2]),
        "precision": person_metrics[3],
        "recall": person_metrics[4],
        "f_score": person_metrics[5]
    },
    "person_hobbies_metrics": {
        "true_positives": list(person_hobbies_metrics[0]),
        "false_positives": list(person_hobbies_metrics[1]),
        "false_negatives": list(person_hobbies_metrics[2]),
        "precision": person_hobbies_metrics[3],
        "recall": person_hobbies_metrics[4],
        "f_score": person_hobbies_metrics[5]
    },
    "spouse_metrics": {
        "true_positives": list(spouse_metrics[0]),
        "false_positives": list(spouse_metrics[1]),
        "false_negatives": list(spouse_metrics[2]),
        "precision": spouse_metrics[3],
        "recall": spouse_metrics[4],
        "f_score": spouse_metrics[5]
    },
    "spouse_hobbies_metrics": {
        "true_positives": list(spouse_hobbies_metrics[0]),
        "false_positives": list(spouse_hobbies_metrics[1]),
        "false_negatives": list(spouse_hobbies_metrics[2]),
        "precision": spouse_hobbies_metrics[3],
        "recall": spouse_hobbies_metrics[4],
        "f_score": spouse_hobbies_metrics[5]
    }
})

summary = {

}
for key in summary:
    summary[key]['precision'] = round(summary[key]['precision'], 3)
    summary[key]['recall'] = round(summary[key]['recall'], 3)
    summary[key]['f_score'] = round(summary[key]['f_score'], 3)


# Final JSON output to include both the detailed results and the summary
output_data = {
    "summary": summary,
    "results": results
}

# Storing in JSON format
output_json = json.dumps(output_data, indent=4, ensure_ascii=False)

# To store the results in a file:
with open("precision_recall_results.json", "w", encoding="utf-8") as file:
    file.write(output_json)


In [275]:
#remove words like "none", "N/A" etc.. 
def clean_json_data(file_path, items_to_remove):
    # Convert all items in the list to lowercase for comparison
    items_to_remove = [item.lower() for item in items_to_remove]
    
    # Load the json data
    with open(file_path, 'r') as f:
        data = json.load(f)

    removed_count = 0

    # Iterate through the "results" list and clean unwanted items
    for result in data.get("results", []):
        # For each metric type (e.g., person_metrics, spouse_metrics)...
        for metric_key, metric_values in result.items():
            # For each list value (e.g., true_positives, false_positives)...
            for list_key, list_values in metric_values.items():
                if isinstance(list_values, list):
                    # Filter out unwanted items and empty strings or strings with only spaces
                    cleaned_list = [value for value in list_values if value.lower() not in items_to_remove and value.strip() != '']
                    
                    # Update count of removed items
                    removed_count += len(list_values) - len(cleaned_list)

                    # Update the list in the JSON data
                    metric_values[list_key] = cleaned_list

    # Save the cleaned data back to the file
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4)

    return f"Removed {removed_count} items. Cleaned data saved back to the file."

# Example usage
file_path = 'precision_recall_results.json'
items_to_remove = ["none", "N/A", "-", " ", "  "]
print(clean_json_data(file_path, items_to_remove))



Removed 429 items. Cleaned data saved back to the file.


In [276]:
#micro f score
#summary for all categories



def calculate_metrics(TP, FP, FN):
    try:
        precision = round((TP / (TP + FP)) if TP + FP != 0 else 0, 2)
    except ZeroDivisionError:
        precision = 0.0
        
    try:
        recall = round((TP / (TP + FN)) if TP + FN != 0 else 0, 2)
    except ZeroDivisionError:
        recall = 0.0
        
    try:
        f_score = round((2 * precision * recall) / (precision + recall) if precision + recall != 0 else 0, 2)
    except ZeroDivisionError:
        f_score = 0.0
    
    return precision, recall, f_score

# Load the JSON data
with open('precision_recall_results.json', 'r') as file:
    data = json.load(file)

# Initialize metrics counters
metrics_counter = {
    "person": {"TP": 0, "FP": 0, "FN": 0},
    "spouse": {"TP": 0, "FP": 0, "FN": 0},
    "social_orgs": {"TP": 0, "FP": 0, "FN": 0},
    "hobbies": {"TP": 0, "FP": 0, "FN": 0},
    "person_social_orgs": {"TP": 0, "FP": 0, "FN": 0},
    "person_hobbies": {"TP": 0, "FP": 0, "FN": 0},
    "spouse_social_orgs": {"TP": 0, "FP": 0, "FN": 0},
    "spouse_hobbies": {"TP": 0, "FP": 0, "FN": 0},
    "overall": {"TP": 0, "FP": 0, "FN": 0},
    "overall_no_emptylabel": {"TP": 0, "FP": 0, "FN": 0}
}

# Iterate through the "results" section
# Helper function to update the metrics
def update_metrics(category_key, value):
    metrics_counter[category_key]["TP"] += len(value["true_positives"])
    metrics_counter[category_key]["FP"] += len(value["false_positives"])
    metrics_counter[category_key]["FN"] += len(value["false_negatives"])

    # Update the "overall_no_emptylabel" category
    if category_key == "overall":
        metrics_counter["overall_no_emptylabel"]["TP"] += len([tp for tp in value["true_positives"] if tp != "emptylabel"])
        metrics_counter["overall_no_emptylabel"]["FP"] += len([fp for fp in value["false_positives"] if fp != "emptylabel"])
        metrics_counter["overall_no_emptylabel"]["FN"] += len([fn for fn in value["false_negatives"] if fn != "emptylabel"])


# Iterate through the "results" section
for result in data['results']:
    for key, value in result.items():
        if "person_metrics" in key:
            update_metrics("person_social_orgs", value)
            update_metrics("person", value)
            update_metrics("social_orgs", value)

        if "person_hobbies_metrics" in key:
            update_metrics("person_hobbies", value)
            update_metrics("person", value)
            update_metrics("hobbies", value)

        if "spouse_metrics" in key:
            update_metrics("spouse_social_orgs", value)
            update_metrics("spouse", value)
            update_metrics("social_orgs", value)

        if "spouse_hobbies_metrics" in key:
            update_metrics("spouse_hobbies", value)
            update_metrics("spouse", value)
            update_metrics("hobbies", value)

        # Update overall metrics
        update_metrics("overall", value)

# Update summary metrics in data
for category, metrics in metrics_counter.items():
    precision, recall, f_score = calculate_metrics(metrics["TP"], metrics["FP"], metrics["FN"])
    data["summary"][category] = {
        "total_true_positives": metrics["TP"],
        "total_false_positives": metrics["FP"],
        "total_false_negatives": metrics["FN"],
        "precision": precision,
        "recall": recall,
        "f_score": f_score
    }

# Save the updated data back to the JSON file
with open('precision_recall_results.json', 'w') as file:
    json.dump(data, file, indent=4, ensure_ascii=False)

In [277]:
#macro f score
# summary for all categories


# Load the JSON data
with open('precision_recall_results.json', 'r') as file:
    data = json.load(file)

averages = []

for result in data["results"]:
    precision_sum = 0.0
    recall_sum = 0.0
    f_score_sum = 0.0
    
    entities = ["person_metrics", "person_hobbies_metrics", "spouse_metrics", "spouse_hobbies_metrics"]
    num_entities = len(entities)
    
    for entity in entities:
        precision_sum += result[entity]["precision"]
        recall_sum += result[entity]["recall"]
        f_score_sum += result[entity]["f_score"]
    
    avg_precision = precision_sum / num_entities
    avg_recall = recall_sum / num_entities
    avg_f_score = f_score_sum / num_entities
    
    averages.append({
        "average_precision": avg_precision,
        "average_recall": avg_recall,
        "average_f_score": avg_f_score
    })

# Now `averages` contains the average precision, recall, and F-score
# for each person in the original "results" array.

total_precision = 0.0
total_recall = 0.0
total_f_score = 0.0
num_instances = len(averages)

# Summing up all the average scores
for avg in averages:
    total_precision += avg['average_precision']
    total_recall += avg['average_recall']
    total_f_score += avg['average_f_score']

# Calculating the macro-average for all instances
macro_avg_precision = total_precision / num_instances
macro_avg_recall = total_recall / num_instances
macro_avg_f_score = total_f_score / num_instances

micro_avg_precision = round((metrics_counter["overall"]["TP"] / (metrics_counter["overall"]["TP"] + metrics_counter["overall"]["FP"])) if metrics_counter["overall"]["TP"] + metrics_counter["overall"]["FP"] != 0 else 0, 2)
micro_avg_recall = round((metrics_counter["overall"]["TP"] / (metrics_counter["overall"]["TP"] + metrics_counter["overall"]["FN"])) if metrics_counter["overall"]["TP"] + metrics_counter["overall"]["FN"] != 0 else 0, 2)
micro_avg_f_score = round((2 * micro_avg_precision * micro_avg_recall) / (micro_avg_precision + micro_avg_recall) if micro_avg_precision + micro_avg_recall != 0 else 0, 2)

data["summary"]["micro_average"] = {
    "total_true_positives": metrics_counter["overall"]["TP"],
    "total_false_positives": metrics_counter["overall"]["FP"],
    "total_false_negatives": metrics_counter["overall"]["FN"],
    "precision": micro_avg_precision,
    "recall": micro_avg_recall,
    "f_score": micro_avg_f_score
}

# Add macro averages to the summary
data["summary"]["macro_average"] = {
    "total_true_positives": None,  # These values are not defined in macro-averaging
    "total_false_positives": None,  # These values are not defined in macro-averaging
    "total_false_negatives": None,  # These values are not defined in macro-averaging
    "precision": macro_avg_precision,
    "recall": macro_avg_recall,
    "f_score": macro_avg_f_score
}

# Save the updated data back to the JSON file
with open('precision_recall_results.json', 'w') as file:
    json.dump(data, file, indent=4, ensure_ascii=False)

# Print Macro and Micro averages
print(f"Macro-average Precision: {macro_avg_precision:.2f}")
print(f"Macro-average Recall: {macro_avg_recall:.2f}")
print(f"Macro-average F-score: {macro_avg_f_score:.2f}")
print("--")
print(f"Micro-average Precision: {data['summary']['micro_average']['precision']:.2f}")
print(f"Micro-average Recall: {data['summary']['micro_average']['recall']:.2f}")
print(f"Micro-average F-score: {data['summary']['micro_average']['f_score']:.2f}")
print("----------")


Macro-average Precision: 0.26
Macro-average Recall: 0.25
Macro-average F-score: 0.25
--
Micro-average Precision: 0.76
Micro-average Recall: 0.53
Micro-average F-score: 0.62
----------


In [278]:
## Evaluation for true negative # EMPTY LABEL for updated results
## Basicly adding emptylabel in true positive if there is no false positive or false negative

# Load the JSON data
with open('precision_recall_results.json', 'r') as file:
    data = json.load(file)

empty_label_assigned_count = 0  # Counter for empty labels assigned

# Iterate through the "results" section
for result in data['results']:
    for metric_key, metric_value in result.items():
        
        # Check if all entities are empty
        if not any(metric_value['true_positives']) and \
           not any(metric_value['false_positives']) and \
           not any(metric_value['false_negatives']):
            metric_value['true_positives'] = ["emptylabel"]
            metric_value['f_score'] = 1.0
            metric_value['precision'] = 1.0
            metric_value['recall'] = 1.0
            empty_label_assigned_count += 1  # Increment the counter
        else:
            # Calculate precision, recall, and F-score
            try:
                metric_value['precision'] = len(metric_value['true_positives']) / \
                                            (len(metric_value['true_positives']) + len(metric_value['false_positives']))
            except ZeroDivisionError:
                metric_value['precision'] = 0.0
                
            try:
                metric_value['recall'] = len(metric_value['true_positives']) / \
                                         (len(metric_value['true_positives']) + len(metric_value['false_negatives']))
            except ZeroDivisionError:
                metric_value['recall'] = 0.0
                
            try:
                metric_value['f_score'] = 2 * (metric_value['precision'] * metric_value['recall']) / \
                                          (metric_value['precision'] + metric_value['recall'])
            except ZeroDivisionError:
                metric_value['f_score'] = 0.0

# Save the updated data back to the JSON file
with open('precision_recall_updated_results.json', 'w') as file:
    json.dump(data, file, indent=4, ensure_ascii=False)

    # Print how many times the "empty label" was assigned
print(f"Assigned 'empty label' {empty_label_assigned_count} times.")


Assigned 'empty label' 411 times.


In [279]:
#remove words like "none", "N/A" etc.. 
def clean_json_data(file_path, items_to_remove):
    # Convert all items in the list to lowercase for comparison
    items_to_remove = [item.lower() for item in items_to_remove]
    
    # Load the json data
    with open(file_path, 'r') as f:
        data = json.load(f)

    removed_count = 0

    # Iterate through the "results" list and clean unwanted items
    for result in data.get("results", []):
        # For each metric type (e.g., person_metrics, spouse_metrics)...
        for metric_key, metric_values in result.items():
            # For each list value (e.g., true_positives, false_positives)...
            for list_key, list_values in metric_values.items():
                if isinstance(list_values, list):
                    # Filter out unwanted items and empty strings or strings with only spaces
                    cleaned_list = [value for value in list_values if value.lower() not in items_to_remove and value.strip() != '']
                    
                    # Update count of removed items
                    removed_count += len(list_values) - len(cleaned_list)

                    # Update the list in the JSON data
                    metric_values[list_key] = cleaned_list

    # Save the cleaned data back to the file
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4)

    return f"Removed {removed_count} items. Cleaned data saved back to the file."

# Example usage
file_path = 'precision_recall_updated_results.json'
items_to_remove = ["none", "N/A", "-", " ", "  "]
print(clean_json_data(file_path, items_to_remove))



Removed 0 items. Cleaned data saved back to the file.


In [280]:
#micro f score # EMPTY LABEL
#summary for all categories



def calculate_metrics(TP, FP, FN):
    try:
        precision = round((TP / (TP + FP)) if TP + FP != 0 else 0, 2)
    except ZeroDivisionError:
        precision = 0.0
        
    try:
        recall = round((TP / (TP + FN)) if TP + FN != 0 else 0, 2)
    except ZeroDivisionError:
        recall = 0.0
        
    try:
        f_score = round((2 * precision * recall) / (precision + recall) if precision + recall != 0 else 0, 2)
    except ZeroDivisionError:
        f_score = 0.0
    
    return precision, recall, f_score

# Load the JSON data
with open('precision_recall_updated_results.json', 'r') as file:
    data = json.load(file)

# Initialize metrics counters
metrics_counter = {
    "person": {"TP": 0, "FP": 0, "FN": 0},
    "spouse": {"TP": 0, "FP": 0, "FN": 0},
    "social_orgs": {"TP": 0, "FP": 0, "FN": 0},
    "hobbies": {"TP": 0, "FP": 0, "FN": 0},
    "person_social_orgs": {"TP": 0, "FP": 0, "FN": 0},
    "person_hobbies": {"TP": 0, "FP": 0, "FN": 0},
    "spouse_social_orgs": {"TP": 0, "FP": 0, "FN": 0},
    "spouse_hobbies": {"TP": 0, "FP": 0, "FN": 0},
    "overall": {"TP": 0, "FP": 0, "FN": 0},
}

# Iterate through the "results" section
# Helper function to update the metrics
def update_metrics(category_key, value):
    metrics_counter[category_key]["TP"] += len(value["true_positives"])
    metrics_counter[category_key]["FP"] += len(value["false_positives"])
    metrics_counter[category_key]["FN"] += len(value["false_negatives"])


# Iterate through the "results" section
for result in data['results']:
    for key, value in result.items():
        if "person_metrics" in key:
            update_metrics("person_social_orgs", value)
            update_metrics("person", value)
            update_metrics("social_orgs", value)

        if "person_hobbies_metrics" in key:
            update_metrics("person_hobbies", value)
            update_metrics("person", value)
            update_metrics("hobbies", value)

        if "spouse_metrics" in key:
            update_metrics("spouse_social_orgs", value)
            update_metrics("spouse", value)
            update_metrics("social_orgs", value)

        if "spouse_hobbies_metrics" in key:
            update_metrics("spouse_hobbies", value)
            update_metrics("spouse", value)
            update_metrics("hobbies", value)

        # Update overall metrics
        update_metrics("overall", value)

# Update summary metrics in data
for category, metrics in metrics_counter.items():
    precision, recall, f_score = calculate_metrics(metrics["TP"], metrics["FP"], metrics["FN"])
    data["summary"][category] = {
        "total_true_positives": metrics["TP"],
        "total_false_positives": metrics["FP"],
        "total_false_negatives": metrics["FN"],
        "precision": precision,
        "recall": recall,
        "f_score": f_score
    }

# Save the updated data back to the JSON file
with open('precision_recall_updated_results.json', 'w') as file:
    json.dump(data, file, indent=4, ensure_ascii=False)

In [281]:
#macro f score # EMPTY LABEL
# summary for all categories


# Load the JSON data
with open('precision_recall_updated_results.json', 'r') as file:
    data = json.load(file)

averages = []

for result in data["results"]:
    precision_sum = 0.0
    recall_sum = 0.0
    f_score_sum = 0.0
    
    entities = ["person_metrics", "person_hobbies_metrics", "spouse_metrics", "spouse_hobbies_metrics"]
    num_entities = len(entities)
    
    for entity in entities:
        precision_sum += result[entity]["precision"]
        recall_sum += result[entity]["recall"]
        f_score_sum += result[entity]["f_score"]
    
    avg_precision = precision_sum / num_entities
    avg_recall = recall_sum / num_entities
    avg_f_score = f_score_sum / num_entities
    
    averages.append({
        "average_precision": avg_precision,
        "average_recall": avg_recall,
        "average_f_score": avg_f_score
    })

# Now `averages` contains the average precision, recall, and F-score
# for each person in the original "results" array.

total_precision = 0.0
total_recall = 0.0
total_f_score = 0.0
num_instances = len(averages)

# Summing up all the average scores
for avg in averages:
    total_precision += avg['average_precision']
    total_recall += avg['average_recall']
    total_f_score += avg['average_f_score']

# Calculating the macro-average for all instances
macro_avg_precision = total_precision / num_instances
macro_avg_recall = total_recall / num_instances
macro_avg_f_score = total_f_score / num_instances

micro_avg_precision = round((metrics_counter["overall"]["TP"] / (metrics_counter["overall"]["TP"] + metrics_counter["overall"]["FP"])) if metrics_counter["overall"]["TP"] + metrics_counter["overall"]["FP"] != 0 else 0, 2)
micro_avg_recall = round((metrics_counter["overall"]["TP"] / (metrics_counter["overall"]["TP"] + metrics_counter["overall"]["FN"])) if metrics_counter["overall"]["TP"] + metrics_counter["overall"]["FN"] != 0 else 0, 2)
micro_avg_f_score = round((2 * micro_avg_precision * micro_avg_recall) / (micro_avg_precision + micro_avg_recall) if micro_avg_precision + micro_avg_recall != 0 else 0, 2)

data["summary"]["micro_average"] = {
    "total_true_positives": metrics_counter["overall"]["TP"],
    "total_false_positives": metrics_counter["overall"]["FP"],
    "total_false_negatives": metrics_counter["overall"]["FN"],
    "precision": micro_avg_precision,
    "recall": micro_avg_recall,
    "f_score": micro_avg_f_score
}

# Add macro averages to the summary
data["summary"]["macro_average"] = {
    "total_true_positives": None,  # These values are not defined in macro-averaging
    "total_false_positives": None,  # These values are not defined in macro-averaging
    "total_false_negatives": None,  # These values are not defined in macro-averaging
    "precision": macro_avg_precision,
    "recall": macro_avg_recall,
    "f_score": macro_avg_f_score
}

# Save the updated data back to the JSON file
with open('precision_recall_updated_results.json', 'w') as file:
    json.dump(data, file, indent=4, ensure_ascii=False)

# Print Macro and Micro averages
print(f"Macro-average Precision: {macro_avg_precision:.2f}")
print(f"Macro-average Recall: {macro_avg_recall:.2f}")
print(f"Macro-average F-score: {macro_avg_f_score:.2f}")
print("--")
print(f"Micro-average Precision: {data['summary']['micro_average']['precision']:.2f}")
print(f"Micro-average Recall: {data['summary']['micro_average']['recall']:.2f}")
print(f"Micro-average F-score: {data['summary']['micro_average']['f_score']:.2f}")
print("----------")
for i, avg in enumerate(averages):
   print(f"Person {i+1} - Average Precision: {avg['average_precision']:.2f}, Average Recall: {avg['average_recall']:.2f}, Average F-score: {avg['average_f_score']:.2f}")


Macro-average Precision: 0.77
Macro-average Recall: 0.76
Macro-average F-score: 0.76
--
Micro-average Precision: 0.87
Micro-average Recall: 0.70
Micro-average F-score: 0.78
----------
Person 1 - Average Precision: 1.00, Average Recall: 1.00, Average F-score: 1.00
Person 2 - Average Precision: 1.00, Average Recall: 1.00, Average F-score: 1.00
Person 3 - Average Precision: 0.00, Average Recall: 0.00, Average F-score: 0.00
Person 4 - Average Precision: 0.00, Average Recall: 0.00, Average F-score: 0.00
Person 5 - Average Precision: 1.00, Average Recall: 0.88, Average F-score: 0.92
Person 6 - Average Precision: 1.00, Average Recall: 1.00, Average F-score: 1.00
Person 7 - Average Precision: 0.25, Average Recall: 0.25, Average F-score: 0.25
Person 8 - Average Precision: 0.75, Average Recall: 0.62, Average F-score: 0.67
Person 9 - Average Precision: 1.00, Average Recall: 1.00, Average F-score: 1.00
Person 10 - Average Precision: 1.00, Average Recall: 1.00, Average F-score: 1.00
Person 11 - Ave

In [107]:
#Print false_positives and false_negatives

import json

def extract_false_data(file_path, output_path):
    # Load the json data
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    # Dictionaries to collect data
    false_positives_dict = {}
    false_negatives_dict = {}

    for result in data.get("results", []):
        index = result.get("person_metrics", {}).get("index", None)  # Extracting index
        if not index:
            continue
        
        false_positives_dict[index] = []
        false_negatives_dict[index] = []
        
        for metric_key, metric_values in result.items():
            fp_values = metric_values.get("false_positives", [])
            fn_values = metric_values.get("false_negatives", [])
            false_positives_dict[index].extend(fp_values)
            false_negatives_dict[index].extend(fn_values)

    # Write the results to the output file
    with open(output_path, 'w') as output_file:
        
        # Write False Negatives
        output_file.write("False Negatives:\n")
        output_file.write("----------------------------------------\n")
        for index, fn_list in false_negatives_dict.items():
            for fn in fn_list:
                output_file.write(fn + '\n')
        output_file.write("\n")
        
        # Write False Positives
        output_file.write("False Positives:\n")
        output_file.write("----------------------------------------\n")
        for index, fp_list in false_positives_dict.items():
            for fp in fp_list:
                output_file.write(fp + '\n')
        output_file.write("\n")
        
        # Write by Index
        output_file.write("Data by Index:\n")
        output_file.write("----------------------------------------\n")
        for index in false_positives_dict.keys():
            output_file.write(f"Index: {index}\n")
            output_file.write("False Positives: " + ", ".join(false_positives_dict[index]) + "\n")
            output_file.write("False Negatives: " + ", ".join(false_negatives_dict[index]) + "\n")
            output_file.write("----------------------------------------\n")

# Example usage
file_path = 'precision_recall_updated_results.json'
output_path = 'output_false_data.txt'
extract_false_data(file_path, output_path)
