In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import HTML
from io import BytesIO
import base64

# 데이터 준비
np.random.seed(0)
data = {
    'cat1': np.random.choice(['ProductA', 'ProductB', 'ProductC', 'ProductD', 'ProductE', 'ProductF', 'ProductG'], 3000),
    'cat2': np.random.choice(['Setting1', 'Setting2', 'Setting3', 'Setting4', 'Setting5', 'Setting6', 'Setting7', 'Setting8'], 3000),
    'cat3': np.random.randint(1, 11, 3000),
    'x': np.random.randint(1, 11, 3000),
    'y': np.random.randint(1, 11, 3000),
    'datetime': pd.date_range(start='2021-01-01', periods=3000, freq='D'),
    **{f'val{val_num}': np.random.random(3000) for val_num in range(1, 6)}
}
df = pd.DataFrame(data)
filtered_df = df[df['cat1'] == 'ProductA']

# 박스플롯 생성 및 인코딩
def create_boxplot_base64(df, val_column):
    plt.figure(figsize=(8, 4))  # 박스플롯 크기 조절
    sns.boxplot(x='cat3', y=val_column, data=df)
    plt.title(f'{val_column} by cat3')
    buf = BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close()
    return base64.b64encode(buf.getvalue()).decode()

# 통계치 계산 및 HTML 포맷팅
def calculate_and_format_stats_html(df, val_columns):
    html_str = "<style>table {border-collapse: collapse; width: 80%;} th, td {border: 1px solid #ddd; text-align: left; padding: 8px;} th {background-color: #f2f2f2;}</style>"
    for val_column in val_columns:
        stats_df = df.groupby('cat3')[val_column].describe().reset_index()
        stats_df['<0.5 count'] = df[df[val_column] < 0.5].groupby('cat3')[val_column].count().values
        stats_html = stats_df.to_html(index=False)
        html_str += f"<h3>{val_column} Statistics</h3>" + stats_html
    return html_str

# 전체 HTML 컨텐츠 생성 및 파일 저장
def create_full_html_content(df):
    val_columns = [f'val{i}' for i in range(1, 6)]
    full_html_content = ""
    for val_column in val_columns:
        boxplot_img_data = create_boxplot_base64(df, val_column)
        full_html_content += f'<img src="data:image/png;base64,{boxplot_img_data}" style="width:80%; display: block; margin-left: auto; margin-right: auto;">'
        stats_html = calculate_and_format_stats_html(df, [val_column])
        full_html_content += stats_html
    return full_html_content

html_content = create_full_html_content(filtered_df)
html_file_path = './boxplots_and_stats.html'
with open(html_file_path, 'w') as file:
    file.write(html_content)

print(f"HTML 파일이 저장된 위치: {html_file_path}")

HTML 파일이 저장된 위치: ./boxplots_and_stats.html
