# Implementing Data Restrictions

In [25]:
import json
import os

### Creating Function to Read JSON files

In [8]:
def load_data_from_json(filepath):
    dict_format = json.loads(open(filepath, "r").read())
    return dict_format

### Create Functions to Enforce Restrictions

In [21]:
# Return True for matches played in men's competitions and False for matches played in women's competitions
def mens_only_matches(match_dict):
    return match_dict['info']['gender'] == 'male'

# Return True for matches with a winner and False for matches without a winner
def has_result(match_dict):
    try:
        winner = match_dict['info']['outcome']['winner']
    except KeyError:
        return False
    return True

In [31]:
eligible_matches = []
ineligible_matches = []

internationals_directory = "../../data/raw/international/"
domestics_directory = "../../data/raw/domestic/"

for directory in [internationals_directory, domestics_directory]:

    for filename in [x for x in os.listdir(directory) if x.endswith("json")]:
        match_dict = load_data_from_json(directory + filename)
        try:
            assert mens_only_matches(match_dict)
            assert has_result(match_dict)
        except AssertionError:
            ineligible_matches.append(filename)
            continue
        eligible_matches.append(filename)

assert len(eligible_matches) + len(ineligible_matches) == len([x for x in os.listdir(internationals_directory) if x.endswith("json")]) + len([x for x in os.listdir(domestics_directory) if x.endswith("json")])

print("Number of eligible matches:", len(eligible_matches))
print("Number of ineligible matches:", len(ineligible_matches))

Number of eligible matches: 3472
Number of ineligible matches: 904
