## 클러스터링용 시간 통합

In [None]:
import pandas as pd

# CSV 파일 읽기
input_file = "/content/drive/MyDrive/여의도_불꽃축제_데이터/지역 데이터/경로분해/2023/경로분해_4.csv"  # 실제 파일 경로로 변경
output_file = "/content/drive/MyDrive/여의도_불꽃축제_데이터/지역 데이터/클러스터링/4번지역_경로분해_시간통합.csv"  # 출력 파일 경로 설정

# 데이터 로드
df = pd.read_csv(input_file)

# 노드 좌표 및 다음 노드 좌표를 묶어 그룹화
df_grouped = df.groupby(
    ['노드X좌표', '노드Y좌표', '다음노드X좌표', '다음노드Y좌표'],
    as_index=False
)['유동인구'].sum()

# 결과 저장
df_grouped.to_csv(output_file, index=False, encoding='utf-8-sig')

print(f"새로운 CSV 파일이 생성되었습니다: {output_file}")


## K-Means

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Load the dataset (assuming your file is named 'data.csv' and located in the current working directory)
df = pd.read_csv('/content/output.csv')  # Change '/content/data.csv' to your file path

# Select relevant columns for clustering
X = df[['노드X좌표', '노드Y좌표', '유동인구']]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply K-Means Clustering
kmeans = KMeans(n_clusters=6, random_state=42)
kmeans.fit(X_scaled)

# Add cluster labels to the original dataframe
df['Cluster'] = kmeans.labels_

# Plotting the clusters
plt.figure(figsize=(10, 6))
plt.scatter(df['노드X좌표'], df['노드Y좌표'], c=df['Cluster'], cmap='viridis', marker='o')
plt.colorbar(label='Cluster')
plt.show()


## DBSCAN

### 1번 지역

In [None]:
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset (replace with your correct file path)
df = pd.read_csv('/content/drive/MyDrive/여의도_불꽃축제_데이터/지역 데이터/경로분해/2023/경로분해_1.csv')  # Adjust path as needed

# Randomly sample 1/5 of the data
df_sampled = df.sample(frac=1/6, random_state=42)  # Adjust 'random_state' for reproducibility

# Select relevant columns for clustering
X = df_sampled[['노드X좌표', '노드Y좌표', '유동인구']]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply DBSCAN with adjusted parameters
dbscan = DBSCAN(eps=0.7, min_samples=4)  # Increase eps and reduce min_samples
dbscan.fit(X_scaled)

# Add cluster labels to the sampled dataframe
df_sampled['Cluster'] = dbscan.labels_

# Plotting the clusters
plt.figure(figsize=(10, 6))

# DBSCAN assigns -1 to noise points, which we can plot separately for better visualization
clusters = df_sampled['Cluster'].unique()
for cluster in clusters:
    if cluster == -1:  # Noise
        plt.scatter(
            df_sampled[df_sampled['Cluster'] == cluster]['노드X좌표'],
            df_sampled[df_sampled['Cluster'] == cluster]['노드Y좌표'],
            c='red', label='Noise', marker='x', alpha=0.5
        )
    else:  # Clusters
        plt.scatter(
            df_sampled[df_sampled['Cluster'] == cluster]['노드X좌표'],
            df_sampled[df_sampled['Cluster'] == cluster]['노드Y좌표'],
            label=f'Cluster {cluster}', alpha=0.6
        )

plt.legend()
plt.xlabel('노드X좌표')
plt.ylabel('노드Y좌표')
plt.title('DBSCAN Clustering (Adjusted Parameters)')
plt.show()


### 2번 지역

In [None]:
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset (replace with your correct file path)
df = pd.read_csv('/content/drive/MyDrive/여의도_불꽃축제_데이터/지역 데이터/경로분해/2023/경로분해_2.csv')  # Adjust path as needed

# Randomly sample 1/5 of the data
df_sampled = df.sample(frac=1/6, random_state=42)  # Adjust 'random_state' for reproducibility

# Select relevant columns for clustering
X = df_sampled[['노드X좌표', '노드Y좌표', '유동인구']]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply DBSCAN with adjusted parameters
dbscan = DBSCAN(eps=0.7, min_samples=4)  # Increase eps and reduce min_samples
dbscan.fit(X_scaled)

# Add cluster labels to the sampled dataframe
df_sampled['Cluster'] = dbscan.labels_

# Plotting the clusters
plt.figure(figsize=(10, 6))

# DBSCAN assigns -1 to noise points, which we can plot separately for better visualization
clusters = df_sampled['Cluster'].unique()
for cluster in clusters:
    if cluster == -1:  # Noise
        plt.scatter(
            df_sampled[df_sampled['Cluster'] == cluster]['노드X좌표'],
            df_sampled[df_sampled['Cluster'] == cluster]['노드Y좌표'],
            c='red', label='Noise', marker='x', alpha=0.5
        )
    else:  # Clusters
        plt.scatter(
            df_sampled[df_sampled['Cluster'] == cluster]['노드X좌표'],
            df_sampled[df_sampled['Cluster'] == cluster]['노드Y좌표'],
            label=f'Cluster {cluster}', alpha=0.6
        )

plt.legend()
plt.xlabel('노드X좌표')
plt.ylabel('노드Y좌표')
plt.title('DBSCAN Clustering (Adjusted Parameters)')
plt.show()


### 3번 지역

In [None]:
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset (replace with your correct file path)
df = pd.read_csv('/content/drive/MyDrive/여의도_불꽃축제_데이터/지역 데이터/경로분해/2023/경로분해_3.csv')  # Adjust path as needed

# Randomly sample 1/5 of the data
df_sampled = df.sample(frac=1/6, random_state=42)  # Adjust 'random_state' for reproducibility

# Select relevant columns for clustering
X = df_sampled[['노드X좌표', '노드Y좌표', '유동인구']]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply DBSCAN with adjusted parameters
dbscan = DBSCAN(eps=0.7, min_samples=4)  # Increase eps and reduce min_samples
dbscan.fit(X_scaled)

# Add cluster labels to the sampled dataframe
df_sampled['Cluster'] = dbscan.labels_

# Plotting the clusters
plt.figure(figsize=(10, 6))

# DBSCAN assigns -1 to noise points, which we can plot separately for better visualization
clusters = df_sampled['Cluster'].unique()
for cluster in clusters:
    if cluster == -1:  # Noise
        plt.scatter(
            df_sampled[df_sampled['Cluster'] == cluster]['노드X좌표'],
            df_sampled[df_sampled['Cluster'] == cluster]['노드Y좌표'],
            c='red', label='Noise', marker='x', alpha=0.5
        )
    else:  # Clusters
        plt.scatter(
            df_sampled[df_sampled['Cluster'] == cluster]['노드X좌표'],
            df_sampled[df_sampled['Cluster'] == cluster]['노드Y좌표'],
            label=f'Cluster {cluster}', alpha=0.6
        )

plt.legend()
plt.xlabel('노드X좌표')
plt.ylabel('노드Y좌표')
plt.title('DBSCAN Clustering (Adjusted Parameters)')
plt.show()


### 4번 지역

In [None]:
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset (replace with your correct file path)
df = pd.read_csv('/content/drive/MyDrive/여의도_불꽃축제_데이터/지역 데이터/경로분해/2023/경로분해_4.csv')  # Adjust path as needed

# Randomly sample 1/5 of the data
df_sampled = df.sample(frac=1/6, random_state=42)  # Adjust 'random_state' for reproducibility

# Select relevant columns for clustering
X = df_sampled[['노드X좌표', '노드Y좌표', '유동인구']]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply DBSCAN with adjusted parameters
dbscan = DBSCAN(eps=0.7, min_samples=4)  # Increase eps and reduce min_samples
dbscan.fit(X_scaled)

# Add cluster labels to the sampled dataframe
df_sampled['Cluster'] = dbscan.labels_

# Plotting the clusters
plt.figure(figsize=(10, 6))

# DBSCAN assigns -1 to noise points, which we can plot separately for better visualization
clusters = df_sampled['Cluster'].unique()
for cluster in clusters:
    if cluster == -1:  # Noise
        plt.scatter(
            df_sampled[df_sampled['Cluster'] == cluster]['노드X좌표'],
            df_sampled[df_sampled['Cluster'] == cluster]['노드Y좌표'],
            c='red', label='Noise', marker='x', alpha=0.5
        )
    else:  # Clusters
        plt.scatter(
            df_sampled[df_sampled['Cluster'] == cluster]['노드X좌표'],
            df_sampled[df_sampled['Cluster'] == cluster]['노드Y좌표'],
            label=f'Cluster {cluster}', alpha=0.6
        )

plt.legend()
plt.xlabel('노드X좌표')
plt.ylabel('노드Y좌표')
plt.title('DBSCAN Clustering (Adjusted Parameters)')
plt.show()


### 메인경로 추출

In [None]:
import pandas as pd

# 1번, 2번 CSV 파일 경로
file_path_1 = '/content/filtered_clusters_33_34_35_36.csv'  # 1번 CSV 파일 경로
file_path_2 = '/content/drive/MyDrive/여의도_불꽃축제_데이터/지역 데이터/경로분해/2023/경로분해_1.csv'  # 2번 CSV 파일 경로

# CSV 파일 읽기
df1 = pd.read_csv(file_path_1)
df2 = pd.read_csv(file_path_2)

# 1번 파일의 좌표를 모아놓기
unique_coordinates = set(
    df1['노드X좌표'].astype(str) + ',' + df1['노드Y좌표'].astype(str)
).union(
    df1['다음노드X좌표'].astype(str) + ',' + df1['다음노드Y좌표'].astype(str)
)

# 2번 파일에서 필터링
filtered_rows = []
for _, row in df2.iterrows():
    node_coords = f"{row['노드X좌표']},{row['노드Y좌표']}"
    next_node_coords = f"{row['다음노드X좌표']},{row['다음노드Y좌표']}"
    if node_coords in unique_coordinates or next_node_coords in unique_coordinates:
        filtered_rows.append(row)

# 필터링된 데이터프레임 생성
filtered_df = pd.DataFrame(filtered_rows)

# 결과 확인
print("Filtered Data:")
print(filtered_df)

# 필터링된 데이터 저장
filtered_output_path = '4번.csv'
filtered_df.to_csv(filtered_output_path, index=False)
print(f"Filtered data saved to {filtered_output_path}")


## 시각화

In [None]:
import pandas as pd
import folium

# CSV 파일 경로
file_path = '/content/drive/MyDrive/여의도_불꽃축제_데이터/메인경로/2번.csv'  # 여기에 CSV 파일 경로 입력

# CSV 파일 읽기
df = pd.read_csv(file_path)

# 지도 초기화 (중심 좌표는 첫 번째 노드로 설정)
map_center = [df['노드Y좌표'].iloc[0], df['노드X좌표'].iloc[0]]
m = folium.Map(location=map_center, zoom_start=16)

# 경로 추가: 노드 좌표와 다음 노드 좌표를 선으로 연결
for _, row in df.iterrows():
    start = (row['노드Y좌표'], row['노드X좌표'])
    end = (row['다음노드Y좌표'], row['다음노드X좌표'])

    # 경로를 지도에 추가
    folium.PolyLine([start, end], color='red', weight=5, opacity=0.7).add_to(m)

# 지도 출력
m
