In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

In [None]:
df = pd.read_csv('2024-divc-raws.csv')
df.index.name = None
df.columns.name = None

In [None]:
# Get descriptive statistics for each numeric column
desc_stats = df.describe()

# Print mean, standard deviation, min, and max for each column
for column in df.select_dtypes(include=[np.number]).columns:
    mean_val = desc_stats.at['mean', column]
    std_val = desc_stats.at['std', column]
    min_val = desc_stats.at['min', column]
    max_val = desc_stats.at['max', column]
    print(f'{column}: \n    Mean = {mean_val:.2f} \n    Stdev = {std_val:.2f} \n    Min = {min_val:.2f} \n    Max = {max_val:.2f}')

In [None]:
# Set font properties
plt.rcParams.update({
    'font.family': 'Avenir',  # Change to desired font family
    'font.size': 12          # Change to desired font size
})

# Function to remove outliers
def remove_outliers(data):
    q1 = np.percentile(data, 10)
    q3 = np.percentile(data, 90)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    return data[(data >= lower_bound) & (data <= upper_bound)]

# Plot histograms for each column
for column in df.select_dtypes(include=[np.number]).columns:
    data = df[column].dropna()
    data_no_outliers = remove_outliers(data)
    
    plt.figure(figsize=(10, 5))
    ax = sns.histplot(data_no_outliers, kde=False)  # Increase the number of bins
    sns.despine()
    plt.title(f'Histogram of {column}')
    plt.xlabel(column)
    plt.ylabel('Count')
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))  # Ensure frequency axis has whole numbers
    plt.show()

## Optional filters for build events with extremely skewed histograms

In [None]:
# Filter data for "Bungee Drop" column with scores ≤ 100
column = 'Bungee Drop'
data = df[column].dropna()
data = data[data <= 100]
data_no_outliers = remove_outliers(data)

# Plot histogram for the filtered data
plt.figure(figsize=(10, 5))
ax = sns.histplot(data_no_outliers, kde=False)
sns.despine()
plt.title(f'Histogram of {column} (≤ 100)')
plt.xlabel(column)
plt.ylabel('Count')
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
plt.show()

In [None]:
# Filter data for "Electric Vehicle" column with scores ≤ 100
column = 'Electric Vehicle'
data = df[column].dropna()
data = data[data <= 100]
data_no_outliers = remove_outliers(data)

# Plot histogram for the filtered data
plt.figure(figsize=(10, 5))
ax = sns.histplot(data_no_outliers, kde=False)
sns.despine()
plt.title(f'Histogram of {column} (≤ 100)')
plt.xlabel(column)
plt.ylabel('Count')
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
plt.show()

In [None]:
# Filter data for "Robot Tour" column with scores ≤ 1000
column = 'Robot Tour'
data = df[column].dropna()
data = data[data <= 1000]
data_no_outliers = remove_outliers(data)

# Plot histogram for the filtered data
plt.figure(figsize=(10, 6))
ax = sns.histplot(data_no_outliers, kde=False, bins = 10)
sns.despine()
plt.title(f'Histogram of {column} (≤ 1000)')
plt.xlabel(column)
plt.ylabel('Count')
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
plt.show()