In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data into a DataFrame
df = pd.read_csv('PerformanceTable.csv')

# Define the list of variables to analyze
variables = df.columns.tolist()

# Remove the non-numeric variables
non_numeric = ['cBlock', 'cCycle', 'cPlot', 'cEntry', 'cTreatment', 'cRep']
numeric_variables = [var for var in variables if var not in non_numeric]

# Create a file to write the analysis and output
output_file = open('analysis_output.txt', 'w')

# Loop through each numeric variable and perform exploratory and univariate analysis
for var in numeric_variables:
    # Convert the column to numeric data type (if not already numeric)
    df[var] = pd.to_numeric(df[var], errors='coerce')

    # Calculate the IQR and define the threshold values
    Q1 = df[var].quantile(0.25)
    Q3 = df[var].quantile(0.75)
    IQR = Q3 - Q1
    threshold_low = Q1 - 1.5 * IQR
    threshold_high = Q3 + 1.5 * IQR

    # Create a new DataFrame with the outlier values removed
    df_no_outliers = df[(df[var] >= threshold_low) & (df[var] <= threshold_high)]

    # Perform exploratory analysis
    output_file.write(f"Exploratory Analysis for {var}:\n")
    output_file.write("-------------------------------\n")
    output_file.write(f"Number of observations: {df_no_outliers.shape[0]}\n")
    output_file.write(f"Mean: {df_no_outliers[var].mean()}\n")
    output_file.write(f"Standard Deviation: {df_no_outliers[var].std()}\n")
    output_file.write(f"Minimum: {df_no_outliers[var].min()}\n")
    output_file.write(f"25th Percentile: {df_no_outliers[var].quantile(0.25)}\n")
    output_file.write(f"Median: {df_no_outliers[var].median()}\n")
    output_file.write(f"75th Percentile: {df_no_outliers[var].quantile(0.75)}\n")
    output_file.write(f"Maximum: {df_no_outliers[var].max()}\n\n")

    # Plot a histogram of the variable
    sns.histplot(data=df_no_outliers, x=var, kde=True)
    plt.title('Histogram of {}'.format(var))
    plt.xlabel(var)
    plt.ylabel('Frequency')
    plt.savefig(f'histogram_{var}.png')
    plt.close()

    # Plot a boxplot of the variable
    sns.boxplot(data=df_no_outliers, x=var)
    plt.title('Boxplot of {}'.format(var))
    plt.xlabel(var)
    plt.savefig(f'boxplot_{var}.png')
    plt.close()

# Close the output file
output_file.close()

# Write the cleaned data to a new CSV file
df.to_csv('cleaned_data_PerformanceTable.csv', index=False)
