# Assignment 1.
You have a database from a study, that needs some cleaning up. Your task is to write a Python script which would:

* load the database from a file `database.json`
* verify the age of participants. The inclusion criteria to the study assume that participants are between 18 and 70 years of age. If the user is too young or too old, he/she should be removed from the database
* verify completeness of data. Every participant should have `scores` from three measurements. In case there is not enough data the participant should be removed from the database 
* verify the code of a participant. The code should follow the pattern: 2 upper letters - 8 alphanumeric characters (lowercase), e.g. AB-ab012k3y (use RegEx!). If the code does not match the pattern, a new random code should be generated for this participant
* save the verified and cleaned database to a new JSON file
* create a dictionary of grouped participants based on their favourite color; use `groupby` form `itertools`; the values of a dictionary should include only participants' codes. Do not save it to a file. Just display its content.



In [229]:
#Setting up the environment

import json
import datetime
import re
import exrex
from itertools import groupby

path = "database.json"
date = datetime.date.today()

## Loading the data

In [230]:
file = open(path, "r")

data = json.loads(file.read())

data

[{'code': 'BP-2t1e9j5b',
  'gender': 'Polygender',
  'date_birth': '1989-02-01',
  'profession': 'Environmental Tech',
  'fav_color': 'Red',
  'scores': [1.54, 3.49, 4.9]},
 {'code': 'GJ-9y9h9w8l',
  'gender': 'Female',
  'date_birth': '1959-05-02',
  'profession': 'Quality Control Specialist',
  'fav_color': 'Blue',
  'scores': [3.21, 0.28, 0.92]},
 {'code': 'HF-4y5k6a8a',
  'gender': 'Male',
  'date_birth': '1961-01-22',
  'profession': 'Quality Control Specialist',
  'fav_color': 'Red',
  'scores': [3.96, 0.67, 2.11]},
 {'code': 'AP-1d6u3j6b',
  'gender': 'Male',
  'date_birth': '1996-09-27',
  'profession': 'Environmental Tech',
  'fav_color': 'White',
  'scores': [3.41, 4.05]},
 {'code': 'WU-1e3d7w7j',
  'gender': 'Female',
  'date_birth': '2002-10-28',
  'profession': 'Marketing Manager',
  'fav_color': 'Blue',
  'scores': [3.02, 4.54, 4.77]},
 {'code': 'DM-6g3w0e4z',
  'gender': 'Male',
  'date_birth': '1974-05-11',
  'profession': 'Staff Scientist',
  'fav_color': 'Blue',
  'sc

## Veryfying the age

In [231]:
def age_verification(data):

    """
    Verify if every participant's age is above 18 and below 70. If the requirement is not fulfilled the entry is deleted.
    :param data: database (list of dictionaries)
    :return: modifies existing database
    """

    for elt in data:

        diff = (date - datetime.datetime.strptime(elt['date_birth'], "%Y-%m-%d").date()).days

        age_years = diff//365

        if age_years < 18 or age_years > 70:

            print(f" User with the code: {elt['code']} has been removed from the database due to not fullfilling age restricions (participant's age: {age_years})")

            del data[data.index(elt)]

age_verification(data)

 User with the code: HI-5o9v8s6x has been removed from the database due to not fullfilling age restricions (participant's age: 72)
 User with the code: KN-5x6g9p3v has been removed from the database due to not fullfilling age restricions (participant's age: 16)
 User with the code: RI-5w8c1m9u has been removed from the database due to not fullfilling age restricions (participant's age: 72)


## Veryfying the scores

In [232]:
def score_verification(data):

    """
    Verify if every participant has 3 scores. If the requirement is not fulfilled the entry is deleted.
    :param data: database (list of dictionaries)
    :return: modifies existing database
    """

    for elt in data:

        if len(elt['scores']) != 3:

            print(f" User with the code: {elt['code']} has been removed from the database due to inncorrect number of scores (number of participant's scores: {len(elt['scores'])})")

            del data[data.index(elt)]

score_verification(data)

 User with the code: AP-1d6u3j6b has been removed from the database due to inncorrect number of scores (number of participant's scores: 2)
 User with the code: ZM-4y2x2u7k has been removed from the database due to inncorrect number of scores (number of participant's scores: 2)


## Veryfying the code

In [233]:
def code_verification(data):

    """
    Verify if every participant is assigned an appropriate code: 2 upper letters - 8 alphanumeric characters (lowercase).
    If not, new code is generated and assigned to a participant with an incorrect code.
    :param data: database (list of dictionaries)
    :return: modifies existing database
    """

    for elt in data:

        if not re.fullmatch('[A-Z]{2}-[a-z0-9]{8}', elt['code']):

            new_code = exrex.getone('[A-Z]{2}-[a-z0-9]{8}')

            while not code_not_used(data, new_code):

                new_code = exrex.getone('[A-Z]{2}-[a-z0-9]{8}')

            print(f" User with the code: {elt['code']} has incorrect code. New code has been assigned to the participant: {new_code}")

            data[data.index(elt)]['code'] = new_code


def code_not_used(data, code):

    """
    Check whether given code already exists in the database
    :param data: database (list of dictionaries)
    :param code: generated code to be assigned to a participant
    :return: True if code is not assigned to any participant, False otherwise
    """

    for elt in data:

        if elt['code'] == code:

            return False

    return True

code_verification(data)

 User with the code: za2u3m2f8g has incorrect code. New code has been assigned to the participant: JU-exs9ol70
 User with the code:  has incorrect code. New code has been assigned to the participant: DV-u1b9bnr3
 User with the code: CB-22 has incorrect code. New code has been assigned to the participant: WD-udon3qos
 User with the code: 99245 has incorrect code. New code has been assigned to the participant: WB-wfeedqzr


## Saving cleaned database

In [234]:
def save(file_name, data):

    """
    Save database to a file of a given name
    :param file_name: name of a file without file extension
    :param data: database (list of dictionaries)
    :return: creates new file with saved database
    """

    with open(file_name + ".json", "w") as file:

        json.dump(data, file, indent = 2)

save("database_verified", data)

data

[{'code': 'BP-2t1e9j5b',
  'gender': 'Polygender',
  'date_birth': '1989-02-01',
  'profession': 'Environmental Tech',
  'fav_color': 'Red',
  'scores': [1.54, 3.49, 4.9]},
 {'code': 'GJ-9y9h9w8l',
  'gender': 'Female',
  'date_birth': '1959-05-02',
  'profession': 'Quality Control Specialist',
  'fav_color': 'Blue',
  'scores': [3.21, 0.28, 0.92]},
 {'code': 'HF-4y5k6a8a',
  'gender': 'Male',
  'date_birth': '1961-01-22',
  'profession': 'Quality Control Specialist',
  'fav_color': 'Red',
  'scores': [3.96, 0.67, 2.11]},
 {'code': 'WU-1e3d7w7j',
  'gender': 'Female',
  'date_birth': '2002-10-28',
  'profession': 'Marketing Manager',
  'fav_color': 'Blue',
  'scores': [3.02, 4.54, 4.77]},
 {'code': 'DM-6g3w0e4z',
  'gender': 'Male',
  'date_birth': '1974-05-11',
  'profession': 'Staff Scientist',
  'fav_color': 'Blue',
  'scores': [3.31, 1.32, 2.72]},
 {'code': 'DT-9i2x6z1p',
  'gender': 'Female',
  'date_birth': '1985-12-25',
  'profession': 'Quality Engineer',
  'fav_color': 'White',

## Grouped by favourite colour

In [235]:
def group_by_colour(data):

    """
    Group participants by their favourite colour
    :param data: our database (list of dictionaries)
    :return: dictionary with favourite colours used as keys and and list of participants' codes as values
    """

    res = {}

    data_sorted = sorted(data, key=lambda participant: participant['fav_color'])

    for key, group in groupby(data_sorted, lambda x: x['fav_color']):

        for elt in group:

            if key not in res.keys():

                res[key] = [elt['code']]

            else:

                res[key].append(elt['code'])

    return res

group_by_colour(data)

{'Blue': ['GJ-9y9h9w8l',
  'WU-1e3d7w7j',
  'DM-6g3w0e4z',
  'NF-3w7c5n3x',
  'SA-8a6q0h7e',
  'AF-1p3e1y0p',
  'GT-3p1k9u3x',
  'HD-8t6b6n1w',
  'WD-udon3qos',
  'ZS-8i1u3e4w',
  'WB-wfeedqzr'],
 'Red': ['BP-2t1e9j5b',
  'HF-4y5k6a8a',
  'GE-1l4f5j7h',
  'DV-u1b9bnr3',
  'HS-3q1i9o1g',
  'OY-7t4q0g0v',
  'MO-2z6z8w1e'],
 'White': ['DT-9i2x6z1p',
  'JU-exs9ol70',
  'BU-4x8n2b1t',
  'TR-9z3v5h8a',
  'WY-1u0g7e3t',
  'LB-7u8k3w1k',
  'LW-8k3p8a9n']}