In [None]:
import json # To read JSON files
import os # To access files
from collections import Counter # Used to count the most occuring words in a file

In [None]:
WORDS_TO_COUNT = [ # The occurences of the words in this list are counted for every file
    'cyber',
    'security',
    'cyber security',
    'cybersecurity',
    'ddos',
    'hacker',
    'cybercrime',
    'digital',
    'protection',
    'cloud',
    'surveillance systems',
    'privacy',
    'internet',
    'internet of things',
    'digitalisation',
    'big data',
    'digital technologies',
    'digital age',
    'ict',
    'cyber attack',
    'egovernment',
    'data protection',
    'personal data',
    'information security',
    'financial',
]

EXCLUDE_LIST = [ # These words are excluded when counting the most occuring words in a file
    'and',
    'the',
    'of',
    'to',
    'for',
    'in',
    'a',
    'on',
    'that',
    'be',
    'as',
    'or',
    'not',
]

counter = 0

base_dir = os.path.abspath(os.curdir) + '/files/'

print("Starting process")

# Loop through all files in the base_dir
for filename in os.listdir(base_dir):
    
    # Skip directories
    if os.path.isdir(os.path.join(base_dir, filename)):
        continue
    
    # Open every file
    with open(base_dir + filename, 'r') as file:
        try:
            file_data = json.loads(file.read())
        except Exception:
            print("Formatting '{}' went wrong!".format(filename))
            continue
        
        # Calculate the words that occur the most
        most_occur = Counter(word for word in file_data['content'].split() if word not in EXCLUDE_LIST).most_common(10)
        
        # Add the new information to the file_data
        file_data.update({
            'word_counts': {},
            'most_common_words': dict(most_occur),
        })
    
        # Count the words that have been specifically defined in WORDS_TO_COUNT
        for word in WORDS_TO_COUNT:
            file_data['word_counts'][word] = file_data['content'].count(word.lower())
        
        counter += 1
        
    with open(base_dir + filename, 'w') as file:
        file.write(json.dumps(file_data))
        
print("Done! Processed files: " + str(counter))
