# Implementing Data Restrictions

In [1]:
import json
import os
import shutil

### Creating Function to Read JSON files

In [2]:
def load_data_from_json(filepath):
    dict_format = json.loads(open(filepath, "r").read())
    return dict_format

### Create Functions to Enforce Restrictions

In [3]:
# Return True for matches played in men's competitions and False for matches played in women's competitions
def mens_only_matches(match_dict):
    return match_dict['info']['gender'] == 'male'

# Return True for matches with a winner and False for matches without a winner
def has_result(match_dict):
    try:
        winner = match_dict['info']['outcome']['winner']
    except KeyError:
        return False
    return True

### Enforce Eligibility Restrictions

In [4]:
eligible_domestics = []
ineligible_domestics = []

eligible_internationals = []
ineligible_internationals = []

internationals_directory = "../../data/raw/international/"
domestics_directory = "../../data/raw/domestic/"

for filename in [x for x in os.listdir(internationals_directory) if x.endswith("json")]:
    match_dict = load_data_from_json(internationals_directory + filename)
    try:
        assert mens_only_matches(match_dict)
        assert has_result(match_dict)
    except AssertionError:
        ineligible_internationals.append(filename)
        continue
    eligible_internationals.append(filename)

for filename in [x for x in os.listdir(domestics_directory) if x.endswith("json")]:
    match_dict = load_data_from_json(domestics_directory + filename)
    try:
        assert mens_only_matches(match_dict)
        assert has_result(match_dict)
    except AssertionError:
        ineligible_domestics.append(filename)
        continue
    eligible_domestics.append(filename)

assert len(eligible_domestics) + len(ineligible_domestics) == len([x for x in os.listdir(domestics_directory) if x.endswith("json")])
assert len(eligible_internationals) + len(ineligible_internationals) == len([x for x in os.listdir(internationals_directory) if x.endswith("json")])

print("Number of eligible matches:", len(eligible_domestics) + len(eligible_internationals))
print("Number of ineligible matches:", len(ineligible_domestics) + len(ineligible_internationals))

Number of eligible matches: 3472
Number of ineligible matches: 904


### Copy Eligible Match Files to New Directory

In [7]:
new_int_dir = internationals_directory.replace("raw", "step_02")
new_dom_dir = domestics_directory.replace("raw", "step_02")

os.makedirs(new_int_dir, exist_ok=True)
os.makedirs(new_dom_dir, exist_ok=True)

for eligible_international in eligible_internationals:
    shutil.copyfile(internationals_directory + eligible_international, new_int_dir + eligible_international)

for eligible_domestic in eligible_domestics:
    shutil.copyfile(domestics_directory + eligible_domestic, new_dom_dir + eligible_domestic)