In [2]:
import json
from collections import defaultdict


def load_json(json_file_path):
    """Load JSON data from a file."""
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    return data

def preprocess_data(issue_data):
    """Clean and preprocess issue data."""
    for issue in issue_data["Sources"]:
        issue["RepoLanguage"] = clean_text(issue.get("RepoLanguage"))
        issue["State"] = clean_text(issue.get("State"))
    return issue_data

def clean_text(text):
    """Clean text by removing unwanted characters."""
    if text is not None:
        cleaned_text = text.encode('utf-8').decode('unicode-escape')
        return cleaned_text
    return None
def separate_issues_by_state(issue_data, state):
    """Separate issues into open and closed based on state."""
    return [issue for issue in issue_data["Sources"] if issue["State"] == state]

def calculate_total_issues_by_language(issue_data):
    """Calculate total issues for each language."""
    total_issues = defaultdict(int)
    for issue in issue_data["Sources"]:
        language = issue["RepoLanguage"]
        if language is not None:
            total_issues[language] += 1
    return total_issues

def calculate_precision_for_closed_issues(closed_issues):
    """Calculate precision for each language only for closed issues."""
    precision_results = {}
    for issue in closed_issues:
        language = issue["RepoLanguage"]
        if language is not None:
            precision_results[language] = precision_results.get(language, 0) + 1
    return precision_results

def add_entries_for_open_issues(open_issues, precision_results):
    """Add entries for languages with no closed issues."""
    for issue in open_issues:
        language = issue["RepoLanguage"]
        if language is not None and language not in precision_results:
            precision_results[language] = 0

def calculate_precision(precision_results, total_issues):
    """Calculate precision for each language."""
    for language, closed_count in precision_results.items():
        total_count = total_issues[language]
        precision_results[language] = closed_count / total_count if total_count > 0 else 0.0
    return precision_results

json_file_path = 'Snapshots/20230831_061759_issue_sharings.json'
issue_data = load_json(json_file_path)

# Preprocess data
preprocessed_data = preprocess_data(issue_data)

# Separate issues by state
open_issues = separate_issues_by_state(preprocessed_data, 'OPEN')
closed_issues = separate_issues_by_state(preprocessed_data, 'CLOSED')

# Calculate total issues by language
total_issues = calculate_total_issues_by_language(preprocessed_data)

# Calculate precision for closed issues
precision_results = calculate_precision_for_closed_issues(closed_issues)

# Add entries for open issues
add_entries_for_open_issues(open_issues, precision_results)

# Calculate precision for each language
precision_results = calculate_precision(precision_results, total_issues)
