In [1]:
from IPython.display import display, HTML
display(HTML("""
<style>
div.container{width:95% !important;}
div.cell.code_cell.rendered{width:100%;}
div.CodeMirror {font-family:Consolas; font-size:15pt;}
div.output {font-size:15pt; font-weight:bold;}
div.input {font-family:Consolas; font-size:15pt;}
div.prompt {min-width:70px;}
div#toc-wrapper{padding-top:120px;}
div.text_cell_render ul li{font-size:12pt;padding:5px;}
table.dataframe{font-size:15px;}
</style>
"""))

In [3]:
import pandas as pd

def explore_data(file_path):
    # Load the preprocessed data
    df = pd.read_csv(file_path, encoding='utf-8-sig')

    # Convert 'YearMonth' to datetime
    df['YearMonth'] = pd.to_datetime(df['YearMonth'])

    # --- Summary Statistics ---

    # 1. Total visitors per year
    df['Year'] = df['YearMonth'].dt.year
    yearly_visitors = df.groupby('Year')['Headcount'].sum()

    # 2. Top 10 nationalities by total visitors
    top_10_nationalities = df.groupby('국적')['Headcount'].sum().nlargest(10)

    # 3. Total visitors by purpose
    purpose_visitors = df.groupby('목적')['Headcount'].sum()

    # --- Print Results ---
    with open(r'C:\Users\Admin\data\summary_statistics.txt', 'w', encoding='utf-8') as f:
        f.write(yearly_visitors.to_string())
        f.write("\n\n--- Top 10 Nationalities ---\n")
        f.write(top_10_nationalities.to_string())
        f.write("\n\n--- Visitors by Purpose ---\n")
        f.write(purpose_visitors.to_string())

if __name__ == '__main__':
    explore_data(r'C:\Users\Admin\data\preprocessed_entrants.csv')

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def visualize_data(file_path):
    df = pd.read_csv(file_path, encoding='utf-8-sig')
    df['YearMonth'] = pd.to_datetime(df['YearMonth'])

    # Set Korean font for matplotlib
    plt.rcParams['font.family'] = 'Malgun Gothic' # For Windows
    plt.rcParams['axes.unicode_minus'] = False

    # 1. Total Monthly Visitors Trend
    monthly_total = df.groupby('YearMonth')['Headcount'].sum().reset_index()
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=monthly_total, x='YearMonth', y='Headcount')
    plt.title('월별 총 해외 여행객 유입량 추이')
    plt.xlabel('연월')
    plt.ylabel('입국자 수')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(r'C:\Users\Admin\data\monthly_total_visitors.png')
    plt.close()

    # 2. Top 5 Nationalities Trend
    # Get top 5 nationalities excluding '전체'
    top_5_nationalities = df[df['국적'] != '전 체'].groupby('국적')['Headcount'].sum().nlargest(5).index
    df_top_5 = df[df['국적'].isin(top_5_nationalities)]

    monthly_nationality_total = df_top_5.groupby(['YearMonth', '국적'])['Headcount'].sum().unstack().fillna(0).reset_index()
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=monthly_nationality_total.melt(id_vars='YearMonth', var_name='국적', value_name='Headcount'), 
                 x='YearMonth', y='Headcount', hue='국적')
    plt.title('주요 5개 국적별 월별 해외 여행객 유입량 추이')
    plt.xlabel('연월')
    plt.ylabel('입국자 수')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(r'C:\Users\Admin\data\top_5_nationalities_trend.png')
    plt.close()

    # 3. Visitors by Purpose Trend (excluding '전체' and empty purpose)
    purpose_df = df[~df['목적'].isin(['전 체', ''])]
    monthly_purpose_total = purpose_df.groupby(['YearMonth', '목적'])['Headcount'].sum().unstack().fillna(0).reset_index()
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=monthly_purpose_total.melt(id_vars='YearMonth', var_name='목적', value_name='Headcount'), 
                 x='YearMonth', y='Headcount', hue='목적')
    plt.title('목적별 월별 해외 여행객 유입량 추이')
    plt.xlabel('연월')
    plt.ylabel('입국자 수')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(r'C:\Users\Admin\data\purpose_trend.png')
    plt.close()

    print(r"Visualization complete. Plots saved to C:\Users\Admin\data")

if __name__ == '__main__':
    visualize_data(r'C:\Users\Admin\data\preprocessed_entrants.csv')

Visualization complete. Plots saved to C:\Users\Admin\data
