In [1]:
import os
project_path = "C:/workspace/Bus Project"
os.chdir(project_path)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import folium
import datetime
import random

import bus.analyzer as anz

In [3]:
cluster_df = anz.load_cluster_df()

In [4]:
cluster_df

Unnamed: 0,cluster_id,cluster_group,cluster_target,cluster_longitude,cluster_latitude,tour_geton_usage,regident_geton_usage,tour_getoff_usage,regident_getoff_usage,total_usage
0,0.0,제주시,0.0,126.493047,33.506241,28678.0,382778.0,27997.0,209550.0,649003.0
1,1.0,제주시,1.0,126.514680,33.499755,12349.0,472516.0,7793.0,250057.0,742715.0
2,2.0,제주시,2.0,126.524287,33.511325,3180.0,205014.0,4019.0,161129.0,373342.0
3,3.0,제주시,3.0,126.527888,33.513440,6112.0,176127.0,3558.0,94720.0,280517.0
4,4.0,제주시,4.0,126.532737,33.495390,446.0,133885.0,441.0,86516.0,221288.0
...,...,...,...,...,...,...,...,...,...,...
1649,1650.0,서귀포시,705.0,126.607060,33.290130,0.0,2.0,0.0,7.0,9.0
1650,1651.0,서귀포시,706.0,126.882770,33.442380,0.0,3.0,0.0,6.0,9.0
1651,1652.0,서귀포시,707.0,126.845280,33.374930,0.0,2.0,0.0,6.0,8.0
1652,1653.0,서귀포시,708.0,126.759700,33.357750,0.0,2.0,0.0,6.0,8.0


In [5]:
class ClusterManager:
    
    def __init__(self, cluster_df):
        self.cluster_df = cluster_df
    
    def set_cluster_df(self, cluster_df):
        self.cluster_df = cluster_df
    def extract_cluster_by_id(self, cluster_id):
        return cluster_df[cluster_df["cluster_id"] == cluster_id]

    def get_location_from_cluster(self, cluster):
        return cluster[["cluster_longitude", "cluster_latitude"]].values[0]

    def set_dist_clolums_from_two_clustes(self, id1, id2, longitude = "cluster_longitude", latitude = "cluster_latitude"):
        location1 = self.get_location_from_cluster(self.extract_cluster_by_id(id1))
        location2 = self.get_location_from_cluster(self.extract_cluster_by_id(id2))
        x1, y1 = location1
        x2, y2 = location2
        cluster_df = self.cluster_df
        selector = list(cluster_df.columns)
        cluster_df['dist1_x'] = (cluster_df["cluster_longitude"] - x1)**2
        cluster_df['dist1_y'] = (cluster_df[latitude] - y1)**2
        cluster_df['dist2_x'] = (cluster_df["cluster_longitude"] - x2)**2
        cluster_df['dist2_y'] = (cluster_df[latitude] - y2)**2

        cluster_df['dist1'] = (cluster_df['dist1_x'] + cluster_df['dist1_y'])**(1/2)
        cluster_df['dist2'] = (cluster_df['dist2_x'] + cluster_df['dist2_y'])**(1/2)
        cluster_df['dist'] = cluster_df['dist1'] + cluster_df['dist2']
        cluster_df['dist'] = cluster_df['dist']*6500000/360
        cluster_df['dist'] = cluster_df['dist'].apply(lambda x : int(x))
        if "dist" not in selector:
            selector.append("dist")
        cluster_df = cluster_df[selector]
        cluster_df = cluster_df.sort_values(by="dist")

    def get_dist(self, id1, id2):
        location1 = self.get_location_from_cluster(self.extract_cluster_by_id(id1))
        location2 = self.get_location_from_cluster(self.extract_cluster_by_id(id2))
        x1, y1 = location1
        x2, y2 = location2
        return ((x1-x2)**2+(y1-y2)**2)**(1/2)*6500000/360

    def get_column_filter(self):
        return self.cluster_df.columns
    
    def filter_column(self, column_filter):
        self.cluster_df = self.cluster_df[column_filter]
    
    def get_stay_area_flag_list(self, id1, id2):
        column_filter = self.get_column_filter()
        dist = self.get_dist(id1, id2)
        self.set_dist_clolums_from_two_clustes(id1, id2)
        stay_area_flag_list = self.cluster_df['dist'] <= dist*1.005
        self.filter_column(column_filter)
        return stay_area_flag_list
    
    def get_stay_area_df(self, id1, id2):
        return self.cluster_df[self.get_stay_area_flag_list(id1, id2)]
    
    def get_cluster_map(self):
        return get_cluster_map(self.cluster_df)
    
    def get_cluster_map(self, df):
        center = [df["cluster_latitude"].mean(), df["cluster_longitude"].mean()]
        map = folium.Map(location=center, zoom_start=10)

        for i in df.index:
            folium.CircleMarker([df.loc[i, "cluster_latitude"], df.loc[i, "cluster_longitude"]], color = 'blue', weight = 5, radius=1).add_to(map)

        return map
    
    def get_stay_area_map(self, id1, id2):
        stay_area_df = self.get_stay_area_df(id1, id2)
        location1 = self.get_location_from_cluster(self.extract_cluster_by_id(id1))
        location2 = self.get_location_from_cluster(self.extract_cluster_by_id(id2))
        map = self.get_cluster_map(stay_area_df)
        folium.CircleMarker([location1[1], location1[0]], color = 'red', weight = 10, radius=3).add_to(map)
        folium.CircleMarker([location2[1], location2[0]], color = 'red', weight = 10, radius=3).add_to(map)
        return map

    def get_set_stay_infor(self, stay_df):
        table = self.cluster_df[[]]
        
        cluster_id_list = self.cluster_df["cluster_id"]
        
        for idx, start in enumerate(tqdm(cluster_id_list)):
            for end in tqdm(cluster_id_list[:idx+1]):
                table[str(start)+"/"+str(end)] = self.get_stay_area_flag_list(start, end)

cm = ClusterManager(cluster_df)
# cm.get_stay_area_df(13, 10)
id1 = 200
id2 = 800
cm.get_stay_area_map(id1, id2)
# cm.get_stay_area_flag_list(1, 100)
# cm.get_stay_area_flag_table()
# cm.get_cluster_map()

In [6]:
len(cluster_df)

1654

In [7]:
def draw_cluster(df, id1, id2):
    center = [df["cluster_latitude"].mean(), df["cluster_longitude"].mean()]
    map = folium.Map(location=center, zoom_start=10)
    
    for i in df.index:
        folium.CircleMarker([df.loc[i, "cluster_latitude"], df.loc[i, "cluster_longitude"]], color = 'blue', weight = 3, radius=1).add_to(map)

    return map

map = draw_cluster(cluster_df, 10, 20)