In [1]:
import numpy as np
import pandas as pd
from scipy.spatial import distance_matrix
from scipy.stats import pearsonr
from IPython.display import display, Math, clear_output
import ipywidgets as widgets
from ipywidgets import interactive, HBox, VBox

In [2]:
# Function to calculate mean and display LaTeX formatted output
def calculate_and_display_mean_latex(data):
    for column in data.columns:
        total = data[column].sum()  # Sum of all elements in the column
        count = len(data[column])  # Number of elements in the column
        mean = total / count  # Mean calculation
        rounded_mean = round(mean, 3)  # Rounding the mean value
        # Displaying calculation process in LaTeX (translated to Bahasa Indonesia)
        latex_str = rf"\text{{Rata-rata dari {column}: }} \frac{{\sum {data[column].tolist()}}}{{{count}}} = \frac{{{total}}}{{{count}}} = {mean:.3f} \rightarrow \textbf{{{rounded_mean}}}"
        display(Math(latex_str))
        
# Function to calculate and display variance and covariance in LaTeX formatted output
def calculate_and_display_var_cov_latex(data):
    # Calculate mean
    mean_vector = data.mean()
    # Calculate covariance matrix
    covariance_matrix = data.cov(ddof=1).round(3)
    # Display variance for each column
    for column in data.columns:
        variance = covariance_matrix.loc[column, column]
        deviation_squared_sum = sum((data[column] - mean_vector[column])**2)
        latex_str_variance = rf"\text{{Varians dari {column}: }} \sigma_{{{column}}}^2 = \frac{{1}}{{n-1}}\sum (x_{{{column},i}} - \bar{{x}}_{{{column}}})^2 = \frac{{{deviation_squared_sum:.3f}}}{{{len(data[column])-1}}} = {variance}"
        display(Math(latex_str_variance))
    
    # Display covariance between different columns
    for i, col1 in enumerate(data.columns):
        for j, col2 in enumerate(data.columns):
            if i < j:  # To avoid repeating the same pair
                covariance = covariance_matrix.loc[col1, col2]
                deviation_product_sum = sum((data[col1] - mean_vector[col1]) * (data[col2] - mean_vector[col2]))
                latex_str_covariance = rf"\text{{Kovarians antara {col1} dan {col2}: }} \sigma_{{{col1}{col2}}} = \frac{{1}}{{n-1}}\sum (x_{{{col1},i}} - \bar{{x}}_{{{col1}}})(x_{{{col2},i}} - \bar{{x}}_{{{col2}}}) = \frac{{{deviation_product_sum:.3f}}}{{{len(data[col1])-1}}} = {covariance}"
                display(Math(latex_str_covariance))
    
    # Display the full covariance matrix in matrix format
    matrix_latex = '\\begin{bmatrix}'
    for i, col1 in enumerate(data.columns):
        row_elements = []
        for col2 in data.columns:
            row_elements.append(str(covariance_matrix.loc[col1, col2]))
        matrix_latex += ' & '.join(row_elements)
        if i < len(data.columns) - 1:
            matrix_latex += ' \\\\ '  # Add newline for LaTeX matrix except for the last line
    matrix_latex += '\\end{bmatrix}'
    display(Math(f"\\text{{Matriks Kovarians:}} {matrix_latex}"))

# Function to calculate and display correlation in LaTeX formatted output
def calculate_and_display_correlation_latex(data):
    # Calculate mean and standard deviation
    mean_vector = data.mean()
    std_dev = data.std(ddof=1)
    # Calculate correlation matrix
    correlation_matrix = data.corr().round(3)
    
    # Display correlation for each pair of columns
    for i, col1 in enumerate(data.columns):
        for j, col2 in enumerate(data.columns):
            if i <= j:  # To avoid repeating pairs and display diagonal as 1
                correlation = correlation_matrix.loc[col1, col2]
                covariance = data.cov().loc[col1, col2]
                latex_str_correlation = rf"\text{{Korelasi antara {col1} dan {col2}: }} \rho_{{{col1},{col2}}} = \frac{{\sigma_{{{col1}{col2}}}}}{{\sigma_{{{col1}}} \sigma_{{{col2}}}}} = \frac{{{covariance:.3f}}}{{({std_dev[col1]:.3f})({std_dev[col2]:.3f})}} = {correlation}"
                display(Math(latex_str_correlation))
    
    # Display the full correlation matrix in matrix format
    matrix_latex = '\\begin{bmatrix}'
    for i, col1 in enumerate(data.columns):
        row_elements = []
        for col2 in data.columns:
            row_elements.append(str(correlation_matrix.loc[col1, col2]))
        matrix_latex += ' & '.join(row_elements)
        if i < len(data.columns) - 1:
            matrix_latex += ' \\\\ '  # Add newline for LaTeX matrix except for the last line
    matrix_latex += '\\end{bmatrix}'
    display(Math(f"\\text{{Matriks Korelasi:}} {matrix_latex}"))

# Function to calculate and display statistical distance to the origin in LaTeX formatted output
def calculate_and_display_statistical_distance_latex(data, variance_x1, variance_x2):
    origin = np.array([0, 0])
    distances = []
    for index, row in data.iterrows():
        x1, x2 = row['x1'], row['x2']
        distance = np.sqrt((x1**2 / variance_x1) + (x2**2 / variance_x2))
        distances.append(round(distance, 3))
        latex_str_distance = rf"\text{{Jarak statistik titik ({x1}, {x2}) ke asal (0,0):}} \sqrt{{\left(\frac{{{x1}}}{{\sqrt{{{variance_x1:.3f}}}}}\right)^2 + \left(\frac{{{x2}}}{{\sqrt{{{variance_x2:.3f}}}}}\right)^2}} = {distance:.3f}"
        display(Math(latex_str_distance))
    return distances

# Function to calculate and display statistical distance to the mean using LaTeX formatted output
def calculate_and_display_distance_to_mean_latex(data, mean_vector, variance_x1, variance_x2):
    distances = []
    for index, row in data.iterrows():
        x1, x2 = row['x1'], row['x2']
        mean_x1, mean_x2 = mean_vector['x1'], mean_vector['x2']
        # Calculate distance using variances
        distance = np.sqrt(((x1 - mean_x1)**2 / variance_x1) + ((x2 - mean_x2)**2 / variance_x2))
        distances.append(round(distance, 3))
        latex_str_distance = rf"\text{{Jarak statistik titik ({x1}, {x2}) ke rata-rata ({mean_x1:.3f}, {mean_x2:.3f}):}} \sqrt{{\left(\frac{{{x1} - {mean_x1:.3f}}}{{\sqrt{{{variance_x1:.3f}}}}}\right)^2 + \left(\frac{{{x2} - {mean_x2:.3f}}}{{\sqrt{{{variance_x2:.3f}}}}}\right)^2}} = {distance:.3f}"
        display(Math(latex_str_distance))
    return distances


In [3]:
# Main function to process data based on widget input
def process_data(x1_values, x2_values):
    # Clear previous outputs
    clear_output(wait=True)
    
    # Display the input widgets again
    display(VBox([x1_input, x2_input]))
    
    # Convert comma-separated string values to lists of floats
    try:
        x1_list = [float(val.strip()) for val in x1_values.split(',') if val.strip()]
        x2_list = [float(val.strip()) for val in x2_values.split(',') if val.strip()]
        if len(x1_list) != len(x2_list):
            raise ValueError("Panjang x1 dan x2 harus sama.")
    except ValueError as e:
        print(f"Error: {e}")
        return
    
    # Create the DataFrame
    data = pd.DataFrame({'x1': x1_list, 'x2': x2_list})
    
    # Display the data
    print("## Data Terbaru:")
    print(data)
    
    # Section for Mean
    print("\n## Rata-rata")
    calculate_and_display_mean_latex(data)
    
    # Section for Variance and Covariance
    print("\n## Varians dan Kovarians")
    calculate_and_display_var_cov_latex(data)
    
    # Section for Correlation
    print("\n## Korelasi")
    calculate_and_display_correlation_latex(data)
    
    # Calculate variances for x1 and x2
    variance_x1 = data['x1'].var(ddof=1)  # Sample variance for x1
    variance_x2 = data['x2'].var(ddof=1)  # Sample variance for x2
    
    # Display variances
    print(f"\nVarians x1: {variance_x1:.3f}, Varians x2: {variance_x2:.3f}")
    
    # Section for Statistical Distances to Origin
    print("\n## Jarak Statistik ke Asal")
    statistical_distances = calculate_and_display_statistical_distance_latex(data, variance_x1, variance_x2)
    
    # Section for Statistical Distances to Mean
    mean_vector = data.mean()
    print("\n## Jarak Statistik ke Rata-rata")
    statistical_distances_to_mean = calculate_and_display_distance_to_mean_latex(data, mean_vector, variance_x1, variance_x2)


In [4]:
# Create TextArea widgets for user input
x1_input = widgets.Textarea(
    value='3, 4, 2, 6, 8, 2, 5',
    placeholder='Masukkan nilai x1, pisahkan dengan koma',
    description='x1:',
    layout=widgets.Layout(width='50%'),
    style={'description_width': 'initial'}
)

x2_input = widgets.Textarea(
    value='5, 5.5, 4, 7, 10, 5, 7.5',
    placeholder='Masukkan nilai x2, pisahkan dengan koma',
    description='x2:',
    layout=widgets.Layout(width='50%'),
    style={'description_width': 'initial'}
)

# Display the input widgets
display(VBox([x1_input, x2_input]))

# Create an interactive output
out = widgets.interactive_output(process_data, {'x1_values': x1_input, 'x2_values': x2_input})

# Display the output
display(out)

VBox(children=(Textarea(value='3, 4, 2, 6, 8, 2, 5', description='x1:', layout=Layout(width='50%'), placeholde…

Output()