In [1]:
import os
project_path = "C:/workspace/Bus Project"
os.chdir(project_path)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import folium
import datetime
import random

import bus.analyzer as anz

In [3]:
cluster_df = anz.load_cluster_df()

In [9]:
cluster_df

Unnamed: 0,cluster_id,cluster_group,cluster_target,cluster_longitude,cluster_latitude,tour_geton_usage,regident_geton_usage,tour_getoff_usage,regident_getoff_usage,total_usage
0,0,제주시,0,126.525346,33.499519,35643.0,854183.0,33975.0,527495.0,1451296.0
1,1,제주시,1,126.481693,33.562378,16648.0,798094.0,15597.0,572968.0,1403307.0
2,2,제주시,2,126.502212,33.489384,13625.0,769078.0,12011.0,474573.0,1269287.0
3,3,제주시,3,126.526664,33.508671,4886.0,295701.0,5493.0,223543.0,529623.0
4,4,제주시,4,126.536271,33.515032,6526.0,280593.0,3899.0,151690.0,442708.0
...,...,...,...,...,...,...,...,...,...,...
964,964,서귀포시,407,126.243800,33.276700,1.0,36.0,3.0,60.0,100.0
965,965,서귀포시,408,126.255100,33.293110,1.0,40.0,2.0,18.0,61.0
966,966,서귀포시,409,126.859420,33.394990,0.0,7.0,1.0,17.0,25.0
967,967,서귀포시,410,126.845280,33.374930,0.0,2.0,0.0,6.0,8.0


In [50]:
class ClusterManager:
    
    def __init__(self, cluster_df):
        self.cluster_df = cluster_df
    
    def set_cluster_df(self, cluster_df):
        self.cluster_df = cluster_df
    def extract_cluster_by_id(self, cluster_id):
        return cluster_df[cluster_df["cluster_id"] == cluster_id]

    def get_location_from_cluster(self, cluster):
        return cluster[["cluster_longitude", "cluster_latitude"]].values[0]

    def set_dist_clolums_from_two_clustes(self, id1, id2, longitude = "cluster_longitude", latitude = "cluster_latitude"):
        location1 = self.get_location_from_cluster(self.extract_cluster_by_id(id1))
        location2 = self.get_location_from_cluster(self.extract_cluster_by_id(id2))
        x1, y1 = location1
        x2, y2 = location2
        cluster_df = self.cluster_df
        selector = list(cluster_df.columns)
        cluster_df['dist1_x'] = (cluster_df["cluster_longitude"] - x1)**2
        cluster_df['dist1_y'] = (cluster_df[latitude] - y1)**2
        cluster_df['dist2_x'] = (cluster_df["cluster_longitude"] - x2)**2
        cluster_df['dist2_y'] = (cluster_df[latitude] - y2)**2

        cluster_df['dist1'] = (cluster_df['dist1_x'] + cluster_df['dist1_y'])**(1/2)
        cluster_df['dist2'] = (cluster_df['dist2_x'] + cluster_df['dist2_y'])**(1/2)
        cluster_df['dist'] = cluster_df['dist1'] + cluster_df['dist2']
        cluster_df['dist'] = cluster_df['dist']*6500000/360
        cluster_df['dist'] = cluster_df['dist'].apply(lambda x : int(x))
        if "dist" not in selector:
            selector.append("dist")
        cluster_df = cluster_df[selector]
        cluster_df = cluster_df.sort_values(by="dist")

    def get_dist(self, id1, id2):
        location1 = self.get_location_from_cluster(self.extract_cluster_by_id(id1))
        location2 = self.get_location_from_cluster(self.extract_cluster_by_id(id2))
        x1, y1 = location1
        x2, y2 = location2
        return ((x1-x2)**2+(y1-y2)**2)**(1/2)*6500000/360

    def get_column_filter(self):
        return self.cluster_df.columns
    
    def filter_column(self, column_filter):
        self.cluster_df = self.cluster_df[column_filter]
    
    def get_stay_area_flag_list(self, id1, id2):
        column_filter = self.get_column_filter()
        dist = self.get_dist(id1, id2)
        self.set_dist_clolums_from_two_clustes(id1, id2)
        stay_area_flag_list = self.cluster_df['dist'] <= dist*1.01
        self.filter_column(column_filter)
        return stay_area_flag_list
    
    def get_stay_area_df(self, id1, id2):
        return self.cluster_df[self.get_stay_area_flag_list(id1, id2)]
    
    def get_cluster_map(self):
        return get_cluster_map(self.cluster_df)
    
    def get_cluster_map(self, df):
        center = [df["cluster_latitude"].mean(), df["cluster_longitude"].mean()]
        map = folium.Map(location=center, zoom_start=10)

        for i in df.index:
            folium.CircleMarker([df.loc[i, "cluster_latitude"], df.loc[i, "cluster_longitude"]], color = 'blue', weight = 5, radius=1).add_to(map)

        return map
    
    def get_stay_area_map(self, id1, id2):
        stay_area_df = self.get_stay_area_df(id1, id2)
        location1 = self.get_location_from_cluster(self.extract_cluster_by_id(id1))
        location2 = self.get_location_from_cluster(self.extract_cluster_by_id(id2))
        map = self.get_cluster_map(stay_area_df)
        folium.CircleMarker([location1[1], location1[0]], color = 'red', weight = 10, radius=3).add_to(map)
        folium.CircleMarker([location2[1], location2[0]], color = 'red', weight = 10, radius=3).add_to(map)
        return map

    def get_set_stay_infor(self, stay_df):
        table = self.cluster_df[[]]
        
        cluster_id_list = self.cluster_df["cluster_id"]
        
        for idx, start in enumerate(tqdm(cluster_id_list)):
            for end in tqdm(cluster_id_list[:idx+1]):
                table[str(start)+"/"+str(end)] = self.get_stay_area_flag_list(start, end)

cm = ClusterManager(cluster_df)
# cm.get_stay_area_df(13, 10)
id1 = 15
id2 = 20
cm.get_stay_area_map(id1, id2)
# cm.get_stay_area_flag_list(1, 100)
# cm.get_stay_area_flag_table()
# cm.get_cluster_map()

In [15]:
len(cluster_df)

969

In [39]:
def draw_cluster(df, id1, id2):
    center = [df["cluster_latitude"].mean(), df["cluster_longitude"].mean()]
    map = folium.Map(location=center, zoom_start=10)
    
    for i in df.index:
        folium.CircleMarker([df.loc[i, "cluster_latitude"], df.loc[i, "cluster_longitude"]], color = 'blue', weight = 3, radius=1).add_to(map)

    return map

map = draw_cluster(cluster_df, 10, 20)