## Data Analysis

In [17]:
#Imports
import pandas as pd
import os
import matplotlib as plot

In [35]:
"""
Reads in a given CSV file, then filters rows based on the given time range. 

Parameters:
csv_file (str): The path to the CSV file.
start_time (int): The start time to filter the data.
end_time (int): The end time to filter the data.
"""
def read_csv(csv, start_time, end_time):
    #Load csv files 
    df = pd.read_csv(csv)

    #Drop unnecessary rows and columns 
    drop_columns = ['time', 'events']
    df.drop(index=0, inplace=True)

    #Convert all columns to numeric (if they aren't already)
    df = df.apply(pd.to_numeric, errors='coerce')

    #Filter rows based on the time range for the scanning period
    scan_df = df[(df['time'] >= start_time) & (df['time'] <= end_time)]
    scan_df = scan_df.drop(columns=drop_columns)

    return scan_df

"""
Takes the modified dataframe of the original CSV file and calculates statistical data. 
Returns a dataframe which composites all of the new data. 

Parameters: 
df (DataFrame (pandas)): The modified dataframe. 
"""
def calc_stat(df):
    #Calculate the mean, median, and standard deviation
    avg_df = df.mean(numeric_only=True)
    med_df = df.median(numeric_only=True)
    sd_df = df.std(numeric_only=True)

    #Combine the results into a DataFrame (renamed to pi_table)
    pi_table = pd.DataFrame({
        'Mean': avg_df,
        'Median': med_df,
        'Standard Deviation': sd_df
    })

    return pi_table

"""
Takes the modified dataframe of the original CSV file and calculates statistical data. 
Returns a dataframe which composites all of the new data. 

Parameters: 
df (DataFrame (pandas)): The modified dataframe. 
"""
def print_to_file(df, output):
    #Format the text output for readability
    output_file = output
    with open(output_file, 'w') as file:
        #Write the header with spacing
        file.write(f"{'Metric':<25}{'Mean':<20}{'Median':<20}{'Standard Deviation':<20}\n")
        file.write("-" * 85 + "\n")  # Add a separator line

        #Write each row of data with formatted spacing
        for metric, row in df.iterrows():
            file.write(f"{metric:<25}{row['Mean']:<20.2e}{row['Median']:<20.2e}{row['Standard Deviation']:<20.2e}\n")

    print(f"Data successfully saved to {output_file}")

In [42]:
# Input and output file paths 
windows_csv_0 = 'power_profiler_scan/windows/mr_windows_0.csv'
output = 'data_analysis/windows_trial_0.txt'

# Read in the csv
csv = read_csv(windows_csv_0, 46, 194)

# Calculate the mean, median, and standard deviation (and print example of a few lines)
df = calc_stat(csv)
print("Example Output:")
print(df.head())
print("...\n")

# Output the statistics into file path given
print_to_file(df, output)

Example Output:
                  Mean     Median  Standard Deviation
wearable  6.402940e+06  6408000.0        1.069324e+05
soc       1.260885e+06  1243000.0        1.648849e+05
cvip      2.110346e+06  2066000.0        4.293415e+05
cpu       1.263579e+06  1199000.0        3.569126e+05
gpu       1.575549e+06   105000.0        2.251262e+06
...

Data successfully saved to data_analysis/windows_trial_0.txt
