## 지역 추출

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist

# Load the CSV file
df_original = pd.read_csv('/content/drive/MyDrive/여의도_불꽃축제_데이터/성별,연령,국적 통합 데이터/2024-10-05.csv')

# Reference points with region names
reference_points = [
    (37.529714378403206, 126.93099129426616, '1번지역'),
    (37.52657336296044, 126.93384449219988, '2번지역'),
    (37.5234376670226, 126.93782909786222, '3번지역'),
    (37.52255245794694, 126.94123076525138, '4번지역')
]

# Function to process each region
def filter_and_save(df, reference_point, region_name):
    # Preprocess the data (remove NaN and duplicates)
    df = df.dropna(subset=['DPR_X_AXIS_WGS', 'DPR_Y_AXIS_WGS', 'ARV_X_AXIS_WGS', 'ARV_Y_AXIS_WGS'])
    df = df.drop_duplicates(subset=['DPR_X_AXIS_WGS', 'DPR_Y_AXIS_WGS', 'ARV_X_AXIS_WGS', 'ARV_Y_AXIS_WGS'])

    # Prepare reference coordinates
    reference_coordinates = np.array([reference_point[:2]])

    # Calculate distances using cdist
    df['DPR_Distance'] = cdist(df[['DPR_Y_AXIS_WGS', 'DPR_X_AXIS_WGS']], reference_coordinates, metric='euclidean').flatten()
    df['ARV_Distance'] = cdist(df[['ARV_Y_AXIS_WGS', 'ARV_X_AXIS_WGS']], reference_coordinates, metric='euclidean').flatten()

    # Find the closest distance for each row
    df['Closest_Distance'] = df[['DPR_Distance', 'ARV_Distance']].min(axis=1)

    # Combine DPR and ARV coordinates into tuples
    df['DPR_Coordinates'] = list(zip(df['DPR_Y_AXIS_WGS'], df['DPR_X_AXIS_WGS']))
    df['ARV_Coordinates'] = list(zip(df['ARV_Y_AXIS_WGS'], df['ARV_X_AXIS_WGS']))

    # Combine and deduplicate coordinates
    all_coordinates = pd.concat([
        df[['DPR_Coordinates', 'Closest_Distance']].rename(columns={'DPR_Coordinates': 'Coordinates'}),
        df[['ARV_Coordinates', 'Closest_Distance']].rename(columns={'ARV_Coordinates': 'Coordinates'})
    ])
    unique_coords = all_coordinates.drop_duplicates(subset='Coordinates').sort_values(by='Closest_Distance')

    # Select the top 25 closest unique coordinates
    closest_25 = unique_coords.head(25)

    # Reload the original DataFrame to retain all columns
    df_reloaded = df_original.copy()

    # Create coordinate sets for filtering
    closest_coordinates_set = set(closest_25['Coordinates'])

    # Filter rows based on the closest coordinates
    df_reloaded['DPR_Coordinates'] = list(zip(df_reloaded['DPR_Y_AXIS_WGS'], df_reloaded['DPR_X_AXIS_WGS']))
    df_reloaded['ARV_Coordinates'] = list(zip(df_reloaded['ARV_Y_AXIS_WGS'], df_reloaded['ARV_X_AXIS_WGS']))
    filtered_df = df_reloaded[
        df_reloaded['ARV_Coordinates'].isin(closest_coordinates_set)
    ]

    # Save the filtered DataFrame to a new CSV file
    output_filename = f"{region_name}.csv"
    filtered_df.drop(columns=['DPR_Coordinates', 'ARV_Coordinates']).to_csv(output_filename, index=False)
    print(f"Filtered data for {region_name} saved to '{output_filename}'")

# Apply the function for each reference point
for point in reference_points:
    filter_and_save(df_original.copy(), point, point[2])
