<a href="https://colab.research.google.com/github/andreac941/Pruebas_Software_Aseguramiento_Calidad_A01034993/blob/main/computeStatistics_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import time
import re
import sys

def contains_valid_data(line):
    # Check if a line contains valid data (numeric or string)
    for part in line.split():
        try:
            float(part)
        except ValueError:
            return False
    return True

def extract_numbers_from_string(s):
    # Extract numeric parts from a string and return them as a list of strings
    return re.findall(r'-?\d+\.?\d*', s)

def convert_to_float(num_str, line_num):
    try:
        return float(num_str)
    except ValueError as e:
        print(f"Error in line {line_num}: {e}. Skipping this part of the line: {num_str}")
        return None

def read_file(file_content):
    data = []
    lines = file_content.split('\n')
    for line_num, line in enumerate(lines, start=1):
        if contains_valid_data(line):
            extracted_numbers = extract_numbers_from_string(line)
            for num_str in extracted_numbers:
                num_float = convert_to_float(num_str, line_num)
                if num_float is not None:
                    data.append(num_float)
        else:
            print(f"Error in line {line_num}: Contains non-numeric or invalid data: {line}. Skipping this line")

    if not data:
        print("No valid numeric data found in the file.")

    return data

def calculate_mode(data):
    frequency_dict = {}
    for num in data:
        frequency_dict[num] = frequency_dict.get(num, 0) + 1

    max_frequency = max(frequency_dict.values())
    mode = [key for key, value in frequency_dict.items() if value == max_frequency]

    return mode[0] if mode else None

def calculate_statistics(data):
    if data is not None:
        start_time = time.time()

        #if len(data)%2  == 0:
        #len(data)/2
        mean_value = sum(data) / len(data)
        median_value = sorted(data)[len(data) // 2] if len(
            data) % 2 != 0 else (sorted(data)[len(data) // 2 - 1] + sorted(data)[len(data) // 2]) / 2
        mode_value = calculate_mode(data)
        variance_value = (sum((x - mean_value) ** 2 for x in data) / (len(data)-1))
        std_deviation_value = (sum((x - mean_value) ** 2 for x in data) / len(data)) ** 0.5

        end_time = time.time()
        elapsed_time = end_time - start_time

        return mean_value, median_value, mode_value, variance_value, std_deviation_value, elapsed_time
    else:
        return None

def print_results(mean, median, mode, variance, std_deviation, elapsed_time):
    print(f"Mean: {mean}")
    print(f"Median: {median}")
    print(f"Mode: {mode}")
    print(f"Variance: {variance}")
    print(f"Standard Deviation: {std_deviation}")
    print(f"Time Elapsed: {elapsed_time} seconds")

def write_results_to_file(file_path, mean, median, mode, variance, std_deviation, elapsed_time):
    with open(file_path, 'w') as result_file:
        result_file.write(f"Mean: {mean}\n")
        result_file.write(f"Median: {median}\n")
        result_file.write(f"Mode: {mode}\n")
        result_file.write(f"Variance: {variance}\n")
        result_file.write(f"Standard Deviation: {std_deviation}\n")
        result_file.write(f"Time Elapsed: {elapsed_time} seconds\n")

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python computeStatistics.py TC1.txt")
        sys.exit(1)

    input_file_path = sys.argv[1]

    with open(input_file_path, "r") as file:
        file_content = file.read()

    data = read_file(file_content)

    if data is not None:
        mean, median, mode, variance, std_deviation, elapsed_time = calculate_statistics(data)

        print_results(mean, median, mode, variance, std_deviation, elapsed_time)

        # Write results to a file
        output_file_path = "StatisticsResults.txt"
        write_results_to_file(output_file_path, mean, median, mode, variance, std_deviation, elapsed_time)

        print(f"Results written to {output_file_path}")
    else:
        print("Error: No valid numeric data found in the file.")
