In [None]:
#Project 4: Brooke Comstock

import os
import json
import unittest

#Initialize global variables that can be called into function later:
total_bad_username = 0
total_bad_password = 0
total_bad_email = 0
total_bad_date_created = 0
total_bad_last_login = 0
total_bad_account_balance = 0
total_bad_account_number = 0
total_potential_duplicates = 0
total_bank_accounts = 0

#Access the files needed:
def get_file_paths(directory):
    filepaths = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                filepath = os.path.join(root, file)
                filepaths.append(filepath)
    return filepaths

#Make sure the accessed files are usable/not empty:
def check_empty(file_path):
    file_size = os.path.getsize(file_path)
    if file_size == 0:
        return True
    else:
        return False

#Decrypting the files using ASCII's:
def decrypt_file(file_path, key):
    with open(file_path, 'r') as file:
        encrypted_contents = file.read()
    decrypted_string = ''
    #Reassigning value to each character
    for char in encrypted_contents:
        ascii = ord(char)
        decrypted = (ascii - key) % 128
        converted_to_char = chr(decrypted)
        decrypted_string += converted_to_char
    return decrypted_string

#Check that the decryption worked, and that the data is readable:
def check_decryption(decrypted_string):
    if "Password" in decrypted_string or "Username" in decrypted_string:
        return True
    else:
        return False

#Fucntion to reencrypt/rewrite each file back to the ASCII values:
def reencrypt_file(file_path, key, text):
    encrypted_string = ''
    for char in text:
        ascii = ord(char)
        decrypted = (ascii - key) % 128
        converted_to_char = chr(decrypted)
        encrypted_string += converted_to_char
    with open(file_path, 'w') as file:
        file.write(encrypted_string)

#Parse the data so we can properly create statistics about each category:
def parse_decrypted_data(decrypted_string):
    list_of_accounts = json.loads(decrypted_string)
    return list_of_accounts

#Make sure the parsing was successful:
def validate_data_format(parsed_data):
    if isinstance(parsed_data, list) and all(isinstance(item, dict) for item in parsed_data):
        return True
    else:
        return False

#Identify and store duplicate accounts in a list to create statistic later:
def find_duplicate_accounts(accounts):
    unique_accounts = []
    duplicate_accounts = []
    for account in accounts:
        #Initialize is duplicate to False
        is_duplicate = False
        #Check each email or account number value against the email and account number values in unique_accounts
        for existing_account in unique_accounts:
            if (
                account.get("Email") == existing_account.get("Email") or
                account.get("Account Number") == existing_account.get("Account Number")
            ):
                duplicate_accounts.append(account)
                is_duplicate = True
                break
        #Add to the unique accounts list
        if not is_duplicate:
            unique_accounts.append(account)
    return duplicate_accounts

#Ensure that passwords are present and valid as well, but add extra parameters for a "valid" password (1 lowercase value, 1 uppercase, a special character):
def check_password(account):
    if len(account["Password"]) < 8 or not any(char.isupper() for char in account["Password"]) or not any(char.islower() for char in account["Password"]) or not any(char.isdigit() for char in account["Password"]) or not any(char in r'@#$%^&*()_-+=<>,.?/:;{}[]|' for char in account["Password"]):
        return False
    else:
        return True

#Check that the account number holds a valid value as well.
def check_account_number(account):
    if account["Account Number"].isdigit():
        return True
    else:
        return False

#Identify if usernames are valid/present in the data:
def check_username(account):
    if account["Username"].isdigit():
        return False
    else:
        return True

#Check that account balance is valid by making sure it is a float value:
def check_account_balance(account):
    try:
        float(account["Account Balance"])
        return True
    except ValueError:
        return False

#Check that there is a valid email present using the "@" character that should be present:
def check_email(account):
    if "@" in account["Email"]:
        return True
    else:
        return False

#Statistics generating function:
def calculate_statistics(parsed_data):

    #loop in all global variables for use and modification in the function:
    global total_bad_username
    global total_bad_password
    global total_bad_email
    global total_bad_date_created
    global total_bad_last_login
    global total_bad_account_balance
    global total_bad_account_number
    global total_potential_duplicates
    global total_bank_accounts

    #Initialize local variables to be used within the function and later added to the global variables
    bad_username = 0
    bad_password = 0
    bad_email = 0
    bad_date_created = 0
    bad_last_login = 0
    bad_account_balance = 0
    bad_account_number = 0
    #Gather the number of duplicate accounts found using the find duplicates function
    potential_duplicates = len(find_duplicate_accounts(parsed_data))
    #quantify the total number of bank accounts found in the parsed data
    bank_accounts = len(parsed_data)

    for account in parsed_data:
        #Count the number of invalid or illigitimate usernames using the check usernames function
        if "Username" in account:
            if not check_username(account):
                bad_username += 1
        else:
            bad_username += 1
        #Count the number of invalid or illigitimate passwords using the check passwords function
        if "Password" in account:
            if not check_password(account):
                bad_password += 1
        else:
            bad_password += 1
        #Count the number of missing emails using the check email function
        if "Email"  in account:
            if not check_email(account):
                bad_email += 1
        else:
            bad_email += 1
        #Count the number of missing dates by searching the "Date Created" key:
        if "Date Created" not in account:
            bad_date_created += 1

        #Count the number of illigitimate account numbers calling the check account number function:
        if "Account Number" in account:
            if not check_account_number(account):
                bad_account_number += 1
        else:
            bad_account_number += 1

        #Count the number of illigitimate account balances using the check account balance function:
        if "Account Balance" in account:
            if not check_account_balance(account):
                bad_account_balance += 1
        else:
            bad_account_balance += 1

        #Count the number of last logins by searching the "Last Login" key:
        if "Last Login" not in account:
            bad_last_login += 1



    #Modify the global statistics variables using the local variables that were just returned
    total_bad_username += bad_username
    total_bad_password += bad_username
    total_bad_email += bad_email
    total_bad_date_created += bad_date_created
    total_bad_last_login += bad_last_login
    total_bad_account_balance += bad_account_balance
    total_bad_account_number += bad_account_number
    total_potential_duplicates += potential_duplicates
    total_bank_accounts += bank_accounts

    #Print the results for the individual file (not global results yet):
    print(f"Bank accounts: {bank_accounts}")
    print(f"Missing/illigitimate usernames: {bad_username}")
    print(f"Missing/weak passwords: {bad_password}")
    print(f"Missing/bad emails: {bad_email}")
    print(f"Missing creation date: {bad_date_created}")
    print(f"No last login: {bad_last_login}")
    print(f"Missing/bad account balances: {bad_account_balance}")
    print(f"Missing/bad account numbers: {bad_account_number}\n")

#Main function that will call above functions for their aapropriate tasks:
def main():

    #loop in all global variables for use and modification locally
    global total_bad_username
    global total_bad_password
    global total_bad_email
    global total_bad_date_created
    global total_bad_last_login
    global total_bad_account_balance
    global total_bad_account_number
    global total_potential_duplicates
    global total_bank_accounts

    #Assign my file path to file_paths variable to be looped into get_file_paths function:
    file_paths = get_file_paths(r'C:\Users\Brook\OneDrive\Desktop\Fall2023\CSC101\Project 4\Project 4 Data')
    #Initialize empty file variable to 0:
    empty_files = 0
    new_file_numbers = [22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]

    #Make sure to account for empty files using our previously defined check_empty function
    for file_path in file_paths:
        if check_empty(file_path):
            empty_files += 1
        #If the file is proven to not be empty, move on to decryption:
        else:
            for key in range(128):
                decrypted_content = decrypt_file(file_path, key)
                #If decryption is successful, parse the data for inspection:
                if check_decryption(decrypted_content):
                    parsed_data = parse_decrypted_data(decrypted_content)
                    #If parsing is successful, move on to statistical analyssi:
                    if validate_data_format(parsed_data):
                        directory, file = os.path.split(file_path)
                        bank = os.path.basename(directory)
                        print(f"The accounts in file {file} in the {bank} have the following characteristics:\n")
                        calculate_statistics(parsed_data)
                        text = str(parsed_data)
                        file_number = new_file_numbers.pop()
                        new_file_path = fr"C:\Users\Brook\OneDrive\Desktop\Fall2023\CSC101\Project 4\Project 4 Data\{file_number}reencrypted.txt"
                        reencrypt_file(new_file_path, 47, text)

    #Return total file statisitics using the global variables that were modified in the calculate statistics function:
    print("In all files:\n")
    print(f"Bank accountsL {total_bank_accounts}")
    print(f"Missing/bad usernames: {total_bad_username}")
    print(f"Missing/illegitimate passwords: {total_bad_password}")
    print(f"Missing/invalid account numbers: {total_bad_account_number}")
    print(f"Missing/bad account balances: {total_bad_account_balance}")
    print(f"Missing/bad emails: {total_bad_email}")
    print(f"Missing/bad dates of creation: {total_bad_date_created}")
    print(f"Missing last logins: {total_bad_last_login}")



#Run the main function
if __name__ == "__main__":
    main()

#Test each return function using the unittest package:
class TestFunctions(unittest.TestCase):

    #Test the file path function and make sure a specific file can be located:
    def test_get_file_paths(self):
        filepaths = get_file_paths(r"C:\Users\Brook\OneDrive\Desktop\Fall2023\CSC101\Project 4\Project 4 Data")
        self.assertIn(r"C:\Users\Brook\OneDrive\Desktop\Fall2023\CSC101\Project 4\Project 4 Data\East Bank\333_encrypted.txt", filepaths)
    #Make sure empty file detection is functional using an empty file:
    def test_check_empty(self):
        empty_file = r"C:\Users\Brook\OneDrive\Desktop\Fall2023\CSC101\Project 4\Project 4 Data\West Bank\519_encrypted.txt"
        self.assertTrue(check_empty(empty_file))
    #Make sure data parsing is functional:
    def test_parse_decrypted_data(self):
        decrypted_data = '{"Username": "user1", "Password": "pass1"}'
        parsed_data = parse_decrypted_data(decrypted_data)
        self.assertEqual(parsed_data, {"Username": "user1", "Password": "pass1"})

    #Test duplicate account detection using a fake account:
    def find_duplicate_accounts(self):
        fakeaccountlist = [
            {
                "Username": "Brooke Comstock",
                "Password": "+!aOfX~Z|",
                "Email": "CSC101",
                "Date Created": "2023-03-06 08:10:30",
                "Last Login": "2021-10-12 08:50:01",
                "Account Balance": "82569.67",
                "Account Number": "N/A"
            },
            {
                "Username": "Brooke Comstock",
                "Password": "+!aOfX~Z|",
                "Email": "CSC101",
                "Date Created": "2023-03-06 08:10:30",
                "Last Login": "2021-10-12 08:50:01",
                "Account Balance": "82569.67",
                "Account Number": "N/A"
            }
        ]
        self.assertEqual(len(find_duplicate_accounts(fakeaccountlist)), 1)
    #test the username detection function using a fake account:
    def check_username(self):
        fakeaccount = {
            "Username": "Brooke Comstock",
            "Password": "+!aOfX~Z|",
            "Email": "CSC101",
            "Date Created": "2023-03-06 08:10:30",
            "Last Login": "2021-10-12 08:50:01",
            "Account Balance": "82569.67",
            "Account Number": "N/A"
        }
        self.assertTrue(check_username(fakeaccount))

        self.assertFalse(check_password(fakeaccount))
    #Test my check email function using a fake account
    def check_email(self):
        fakeaccount = {
            "Username": "Brooke Comstock",
            "Password": "+!aOfX~Z|",
            "Email": "CSC101",
            "Date Created": "2023-03-06 08:10:30",
            "Last Login": "2021-10-12 08:50:01",
            "Account Balance": "82569.67",
            "Account Number": "N/A"
        }
        self.assertFalse(check_email(fakeaccount))
    #Check the password detecting function using a fake account:
    def check_password(self):
        fakeaccount = {
            "Username": "Brooke Comstock",
            "Password": "+!aOfX~Z|",
            "Email": "CSC101",
            "Date Created": "2023-03-06 08:10:30",
            "Last Login": "2021-10-12 08:50:01",
            "Account Balance": "82569.67",
            "Account Number": "N/A"
        }
    #Check for a valid account number:
    def check_account_number(self):
        fakeaccount = {
            "Username": "Brooke Comstock",
            "Password": "+!aOfX~Z|",
            "Email": "CSC101",
            "Date Created": "2023-03-06 08:10:30",
            "Last Login": "2021-10-12 08:50:01",
            "Account Balance": "82569.67",
            "Account Number": "N/A"
        }
        self.assertFalse(check_account_number(fakeaccount))
    #Check for a valid account balance:
    def check_account_balance(self):
        fakeaccount = {
            "Username": "Brooke Comstock",
            "Password": "+!aOfX~Z|",
            "Email": "CSC101",
            "Date Created": "2023-03-06 08:10:30",
            "Last Login": "2021-10-12 08:50:01",
            "Account Balance": "82569.67",
            "Account Number": "N/A"
        }
        self.assertTrue(check_account_balance(fakeaccount))


#Run the testing
if __name__ == '__main__':
    unittest.main()

