# 준비

In [None]:
import pandas as pd
import numpy as np
from plotnine import *
import seaborn as sns
# 지도 시각화를 위해
import folium
import matplotlib.pyplot as plt

In [None]:
# !sudo apt-get install -y fonts-nanum
# !sudo fc-cache -fv
# !rm ~/.cache/matplotlib -rf

In [None]:
import gspread
from google.auth import default
creds, _ = default()
from gspread_dataframe import get_as_dataframe, set_with_dataframe

from google.cloud import bigquery
from oauth2client.client import GoogleCredentials

gc = gspread.authorize(creds)

In [None]:
import folium
from folium import plugins
from folium.plugins import HeatMap

In [None]:
# # 다른 노트북 작성할 때도 이 셀만 떼서 사용 가능하다.
# import platform

# # 웬만하면 해주는 것이 좋다.
# from matplotlib import font_manager, rc
# plt.rcParams['axes.unicode_minus']= False

# if platform.system() == 'Darwin': # 맥os 사용자의 경우에
#     plt.style.use('seaborn-darkgrid')
#     rc('font', family = 'AppleGothic')

# elif platform.system() == 'Windows':# 윈도우 사용자의 경우에
#     path = 'c:/Windows/Fonts/malgun.ttf'
#     font_name = font_manager.FontProperties(fname=path).get_name()
#     plt.style.use('seaborn-darkgrid') # https://python-graph-gallery.com/199-matplotlib-style-sheets/
#     rc('font', family=font_name)

In [None]:
query_base = '''
CREATE TEMP FUNCTION set_region(zone_id any TYPE) AS (
  CASE WHEN zone_id IN (737, 12689, 17046, 110, 2819, 2820, 8743, 8744, 10978, 11456, 13744, 13881, 14264, 15322, 16619, 16620, 17020, 17212, 17270, 17378, 10652, 2574, 5828, 8113, 14217, 17053, 17054, 17184, 17145, 12236, 12362, 12583, 14697, 12929, 12930, 17996, 18202, 18203, 10652, 12689, 12929, 12930, 18623, 18857,18867, 3558) THEN '공항주변'
       WHEN zone_id IN (10654, 2230, 16618, 17154, 14451, 12703, 18269, 12703, 10654, 18422, 18423, 18728, 18730, 1967) THEN '구서귀포'
       WHEN zone_id IN (651, 737, 2629, 2341, 2629, 4874, 12661, 11453, 10490, 14430, 14518, 14520, 15304, 16621, 16898, 17465, 114, 14167, 17208, 651, 18204, 18205, 4874, 11453, 2341, 18467, 18426, 4502, 6338) THEN '구제주'
       WHEN zone_id IN (879, 14420, 7906, 12762, 13657, 14943, 17196, 17463, 13657, 14943, 879, 18424, 7941) THEN '신서귀포'
       WHEN zone_id IN (3967, 3969, 10892, 11165, 12538, 12959, 12858, 12857, 13400, 7288, 11587, 13969, 10391, 12521, 12812, 14757, 16265, 4504, 8539, 8548, 8741, 11884, 13581, 13597, 14361, 14554, 15397, 16363, 16365, 16997, 17185, 13818, 17836, 18151, 17777, 18206, 3969, 7288, 12538, 13597, 13818, 13969, 11587, 12959, 14757, 10892, 3967, 1564, 16363, 16365, 15397, 18462) THEN '외곽'
       WHEN zone_id IN (17209) THEN '공항앞'
       WHEN zone_id IN (5894, 12636, 16126, 246, 13606, 16321, 18261, 5894) THEN '중문'
       WHEN zone_id IN (18471, 18472, 18473) THEN '실증사업'
       WHEN zone_id IN (105, 9890) THEN '제주공항' ELSE cast(zone_id as string) END
); ## 마지막 업데이트 2023.12.06 신규존 추가

WITH base_geo AS (
  SELECT
    extract(year FROM created_at) as cyear,
    zid, zname,
    region1, region2, region3,
    lng, lat,
    ST_GEOGPOINT(lng, lat) as geo_st,


  FROM
  (
    SELECT
      z.id as zid,
      z.zone_name as zname,
      z.region1, z.region2, z.region3,
      z.lng, z.lat, created_at,
    FROM `tianjin_replica.carzone_info` z
    WHERE z.region1 = '제주특별자치도'
    AND state = 1
  )

),

geo2 AS (
  SELECT
    zid, zname,
    region1, region2, region3,
    lng, lat,
    geo_st,
    -- ST_CLUSTERDBSCAN(geo_st, 5000, 3) OVER() as geo_group
  FROM base_geo
),

base_p AS (
  SELECT
    extract(year from date) as year,
    extract(month from date) as m,
    extract(isoweek from date) as w,
    date,

    CASE WHEN p.zone_id IN (105, 990) THEN 'air'
         WHEN p.zone_id IN (17209) THEN 'air_infront'
         WHEN p.zone_id IN (18471, 18472, 18473) THEN 'test' ELSE 'common' END as part,

    set_region(p.zone_id) as part2,

    p.zone_id as zid,
    p.zone_name as zname,

    count(car_id) as cnt,
    sum(opr_day) as opr_day,
    sum(nuse) as use,
    sum(nuse_passport+nuse_socarpass) as use_p,
    sum(utime) as dur,
    sum(revenue) as revenue,
    sum(_rev_rent) as rent,
    sum(profit) as profit,
    sum(cost_variable) as vcost,
    sum(cost_fixed) as fcost,
    sum(nuse_round) as use_round,
    sum(nuse_oneway) as use_oneway,
    sum(nuse_d2d_round) as use_d2d_round,
    sum(nuse_d2d_oneway) as use_d2d_oneway,
    sum(nuse_z2d_oneway) as use_z_oneway,
    sum(cost_parking_zone) as parking_fee,
    sum(cost_transport_mobility) as transport_fee

  FROM    `socar_biz_profit.profit_socar_car_daily` p
  left join `tianjin_replica.carzone_info` z on p.zone_id=z.id
  WHERE TRUE AND car_sharing_type IN ('socar', 'zplus')
             AND date BETWEEN '2022-01-01' AND current_date('Asia/Seoul')
             AND p.region1 = '제주특별자치도'
             AND zone_id not in(122,2184,12072,12073,10736,10738,11947,11480,13228,13787,13858,14494,14528,14541,14542)
  GROUP BY year, m, w, date, part, part2, zid, zname
),

p_28 AS (
  SELECT
    part, part2, zid, zname,
    safe_divide(sum(dur), (sum(opr_day)*24)) as op_rate28,
    safe_divide(sum(dur), sum(use)) as dur_use28,
    safe_divide(sum(revenue), sum(opr_day)) as revenue_car28,
    safe_divide(sum(profit), sum(opr_day)) as profit_car28,
    avg(cnt) as cnt,
    avg(opr_day) as opr_day,
    sum(use) as use,
    sum(use_p) as use_p,
    sum(dur) as dur,
    sum(revenue) as revenue,
    sum(rent) as rent,
    sum(profit) as profit,
    sum(vcost) as vcost,
    sum(fcost) as fcost,
    sum(use_round) as use_round,
    sum(use_oneway) as use_oneway,
    sum(use_d2d_round) as use_d2d_round,
    sum(use_d2d_oneway) as use_d2d_oneway,
    sum(use_z_oneway) as use_z_oneway,
    sum(parking_fee) as parking_fee,
    sum(transport_fee) as transport_fee
  FROM base_p
  WHERE date >= current_date('Asia/Seoul') -28
  GROUP BY part, part2, zid, zname
),

p_2023 AS (
  SELECT
    part, part2, zid, zname,
    safe_divide(sum(dur), (sum(opr_day)*24)) as op_rate2023,
    safe_divide(sum(dur), sum(use)) as dur_use2023,
    safe_divide(sum(revenue), sum(opr_day)) as revenue_car2023,
    safe_divide(sum(profit), sum(opr_day)) as profit_car2023,
    avg(cnt) as cnt,
    avg(opr_day) as opr_day,
    sum(use) as use,
    sum(use_p) as use_p,
    sum(dur) as dur,
    sum(revenue) as revenue,
    sum(rent) as rent,
    sum(profit) as profit,
    sum(vcost) as vcost,
    sum(fcost) as fcost,
    sum(use_round) as use_round,
    sum(use_oneway) as use_oneway,
    sum(use_d2d_round) as use_d2d_round,
    sum(use_d2d_oneway) as use_d2d_oneway,
    sum(use_z_oneway) as use_z_oneway,
    sum(parking_fee) as parking_fee,
    sum(transport_fee) as transport_fee
  FROM base_p
  WHERE date >= '2023-01-01'
  GROUP BY part, part2, zid, zname
),

p_2022 AS (
  SELECT
    part, part2, zid, zname,
    safe_divide(sum(dur), (sum(opr_day)*24)) as op_rate2022,
    safe_divide(sum(dur), sum(use)) as dur_use2022,
    safe_divide(sum(revenue), sum(opr_day)) as revenue_car2022,
    safe_divide(sum(profit), sum(opr_day)) as profit_car2022,
    avg(cnt) as cnt,
    avg(opr_day) as opr_day,
    sum(use) as use,
    sum(use_p) as use_p,
    sum(dur) as dur,
    sum(revenue) as revenue,
    sum(rent) as rent,
    sum(profit) as profit,
    sum(vcost) as vcost,
    sum(fcost) as fcost,
    sum(use_round) as use_round,
    sum(use_oneway) as use_oneway,
    sum(use_d2d_round) as use_d2d_round,
    sum(use_d2d_oneway) as use_d2d_oneway,
    sum(use_z_oneway) as use_z_oneway,
    sum(parking_fee) as parking_fee,
    sum(transport_fee) as transport_fee
  FROM base_p
  WHERE date >= '2022-01-01'
  GROUP BY part, part2, zid, zname
),

base_p_union AS (
  SELECT
    g.*,
    p28.profit as profit28, p28.revenue as revenue28, p28.op_rate28, p28.dur_use28, p28.revenue_car28, profit_car28, p28.opr_day as opr_28, p28.parking_fee as parking_fee28, p28.transport_fee as transport_fee28,
    p23.profit as profit23, p23.revenue as revenue23, p23.op_rate2023, p23.dur_use2023, p23.revenue_car2023, p23.profit_car2023, p23.opr_day as opr_23, p23.parking_fee as parking_fee23, p23.transport_fee as transport_fee23,
    p22.profit as profit22, p22.revenue as revenue22, p22.op_rate2022, p22.dur_use2022, p22.revenue_car2022, p22.profit_car2022, p22.opr_day as opr_22, p22.parking_fee as parking_fee22, p22.transport_fee as transport_fee22,



  FROM base_geo g LEFT JOIN p_28 p28 ON g.zid = p28.zid
                  LEFT JOIN p_2023 p23 ON g.zid = p23.zid
                  LEFT JOIN p_2022 p22 ON g.zid = p22.zid
),

base_zClick AS (
	select
			sdate,
      zid,
			zname,
			count(distinct(mid)) as unique_click_cnt,
			count(distinct(r.id)) as rev_cnt,
			safe_divide(count(distinct(r.id)),count(distinct(mid))) as rev_Rate,
			extract(isoweek from sdate) as w

	from(
		select
			date(a.event_timestamp,"Asia/Seoul") as date, #존을클릭한날
      date(a.start_at, "Asia/Seoul") as sdate,#존클릭예약시작조회날
			a.member_id as mid,#존클릭예약자
			c.id as class_id, #존클릭조회차량클래스id
			date(a.start_at,"Asia/Seoul") as start_at,#예약시작일
			date(a.end_at,"Asia/Seoul") as end_at, #예약종료일
			a.zone_id as zid,        #존id
			z.zone_name as zname,
		from socar_server_2.get_car_classes a , tianjin_replica.carzone_info z, unnest(carClasses) c
		where true=true and a.zone_id = z.id
                    AND z.id NOT IN(13858, 17651, 17784)
                    and date(a.event_timestamp,"Asia/Seoul") < current_Date("Asia/Seoul")
                    and date(a.start_at,"Asia/Seoul") >= '2019-01-01'
                    and z.region1 = '제주특별자치도'
				-- 조회시점에 따라 위 날짜 조건을 조정해야함
		) click #존클릭자수테이블

	join tianjin_replica.member_info m on m.id = click.mid and m.imaginary in (0) #멤버정보(쏘친,쏘팸)
	left join tianjin_replica.reservation_info r on r.member_id = click.mid #예약정보테이블
                                                and r.zone_id = click.zid
                                                and date(r.start_at, "Asia/Seoul") = click.sdate #예약시작과존클릭시작을leftjoin
                                                and r.state in (1,2,3) #예약,운행,완료
                                                and r.way in ('round') #왕복예약

	group by sdate, zname, zid
	order by sdate, zname, zid
),

zclick AS (
  SELECT
    zid, zname,
    avg(CASE WHEN sdate >= current_date('Asia/Seoul')-28 THEN unique_click_cnt END) as avg_unique_click_cnt28,
    avg(CASE WHEN sdate BETWEEN '2023-01-01' AND '2023-12-31' THEN unique_click_cnt END) as avg_unique_click_cnt2023,
    avg(CASE WHEN sdate BETWEEN '2022-01-01' AND '2022-12-31' THEN unique_click_cnt END) as avg_unique_click_cnt2022,
  FROM base_zClick
  GROUP BY zid, zname
),

click_union AS (
  SELECT
    p.*,
    zc.avg_unique_click_cnt28, zc.avg_unique_click_cnt2023, zc.avg_unique_click_cnt2022
  FROM base_p_union p LEFT JOIN zclick zc ON p.zid = zc.zid
),

base_loc AS (
  WITH tmp AS (
  SELECT
    TDDATE,
    callerLog.memberId AS member_id,
    DATETIME(TIMESTAMP_TRUNC(timeMs, SECOND), "Asia/Seoul") AS created_at_kst,
    locationAction.location.lng AS lng,
    locationAction.location.lat AS lat,
  FROM
    `socar_server_2.SAVE_LOCATION_ACTION_LOG`
  WHERE
    locationAction.viewAction="RESERVED_CAR_RENTAL"
    AND fullAccuracyLocationLog.isFullAccuracyLocation IS TRUE
    AND TIMESTAMP_TRUNC(timeMs, DAY) BETWEEN TIMESTAMP("2022-01-01") AND TIMESTAMP("2022-12-31")
  ),

  rv AS (
    SELECT
      id AS reservation_id,
      member_id,
      DATETIME(created_at, "Asia/Seoul") AS created_at_kst,
      zone_id
    FROM `tianjin_replica.reservation_info`
    WHERE
      member_imaginary IN (0, 9)
      AND state IN (1,2,3)
      AND DATE(created_at, 'Asia/Seoul') BETWEEN DATE('2022-01-01') AND DATE('2022-12-31')
  ),

  loc2022 AS (
    SELECT
      rv.reservation_id as rid,
      rv.member_id  as mid,
      cast(rv.created_at_kst as datetime) as created_at,
      rv.zone_id,
      tmp.lat,
      tmp.lng,
      z.lat as zone_lat,
      z.lng as zone_lng
    FROM rv
    LEFT JOIN tmp
    USING (member_id, created_at_kst)
    LEFT JOIN `tianjin_replica.carzone_info` z ON rv.zone_id = z.id
    WHERE tmp.lat is not null
    AND z.region1 = '제주특별자치도'
    AND z.state = 1
  ),

  loc2023 AS (
    SELECT
      date as rdate,
      reservation_id as rid,
      member_id as mid,
      zone_id,
      zone_lat, zone_lng,
      reservation_created_lng as lng,
      reservation_created_lat as lat,
      cast(reservation_created_at as datetime) as created_at,
    FROM `socar_data_queries_zone_stat_viz_mart.zsv_obt` a
    LEFT JOIN `tianjin_replica.carzone_info` z ON a.zone_id = z.id
    WHERE extract(year FROM reservation_created_at) = 2023
    AND reservation_created_lng is not null
    AND z.region1 = '제주특별자치도'
    AND z.state = 1
  ),

  base_total AS (
    SELECT * FROM loc2022
    UNION ALL
    SELECT rid, mid, created_at, zone_id, lat, lng, zone_lat, zone_lng FROM loc2023
    ORDER BY created_at desc
  ),

  d300 AS (
    SELECT
      zone_id as zid,
      z.zone_name as zname,
      count(CASE WHEN date(base_total.created_at) >= current_date('Asia/Seoul') -28 THEN mid ENd) as member_cnt28,
      count(CASE WHEN date(base_total.created_at) BETWEEN '2023-01-01' AND '2023-12-31' THEN mid ENd) as member_cnt23,
      count(CASE WHEN date(base_total.created_at) BETWEEN '2022-01-01' AND '2022-12-31' THEN mid ENd) as member_cnt22,
    FROM base_total LEFT JOIN `tianjin_replica.carzone_info` z ON base_total.zone_id = z.id
    WHERE ST_DISTANCE(ST_GeogPoint(zone_lng, zone_lat), ST_Geogpoint(base_total.lng, base_total.lat)) <= 300
    GROUP BY zid, zname
  ),

  d500 AS (
    SELECT
      zone_id as zid,
      z.zone_name as zname,
      count(CASE WHEN date(base_total.created_at) >= current_date('Asia/Seoul') -28 THEN mid ENd) as member_cnt28,
      count(CASE WHEN date(base_total.created_at) BETWEEN '2023-01-01' AND '2023-12-31' THEN mid ENd) as member_cnt23,
      count(CASE WHEN date(base_total.created_at) BETWEEN '2022-01-01' AND '2022-12-31' THEN mid ENd) as member_cnt22,
    FROM base_total LEFT JOIN `tianjin_replica.carzone_info` z ON base_total.zone_id = z.id
    WHERE ST_DISTANCE(ST_GeogPoint(zone_lng, zone_lat), ST_Geogpoint(base_total.lng, base_total.lat)) <= 500
    GROUP BY zid, zname
  ),

  d1000 AS (
    SELECT
      zone_id as zid,
      z.zone_name as zname,
      count(CASE WHEN date(base_total.created_at) >= current_date('Asia/Seoul') -28 THEN mid ENd) as member_cnt28,
      count(CASE WHEN date(base_total.created_at) BETWEEN '2023-01-01' AND '2023-12-31' THEN mid ENd) as member_cnt23,
      count(CASE WHEN date(base_total.created_at) BETWEEN '2022-01-01' AND '2022-12-31' THEN mid ENd) as member_cnt22,
    FROM base_total LEFT JOIN `tianjin_replica.carzone_info` z ON base_total.zone_id = z.id
    WHERE ST_DISTANCE(ST_GeogPoint(zone_lng, zone_lat), ST_Geogpoint(base_total.lng, base_total.lat)) <= 1000
    GROUP BY zid, zname
  ),

  d1000o AS (
    SELECT
      zone_id as zid,
      z.zone_name as zname,
      count(CASE WHEN date(base_total.created_at) >= current_date('Asia/Seoul') -28 THEN mid ENd) as member_cnt28,
      count(CASE WHEN date(base_total.created_at) BETWEEN '2023-01-01' AND '2023-12-31' THEN mid ENd) as member_cnt23,
      count(CASE WHEN date(base_total.created_at) BETWEEN '2022-01-01' AND '2022-12-31' THEN mid ENd) as member_cnt22,
    FROM base_total LEFT JOIN `tianjin_replica.carzone_info` z ON base_total.zone_id = z.id
    WHERE ST_DISTANCE(ST_GeogPoint(zone_lng, zone_lat), ST_Geogpoint(base_total.lng, base_total.lat)) > 1000
    GROUP BY zid, zname
  )

  SELECT
    d3.zid, d3.zname,
    d3.member_cnt28 as member300_28, d3.member_cnt23 as member300_23, d3.member_cnt22 as member300_23,
    d5.member_cnt28 as member500_28, d5.member_cnt23 as member500_23, d5.member_cnt22 as member500_23,
    d10.member_cnt28 as member1000_28, d10.member_cnt23 as member1000_23, d10.member_cnt22 as member1000_23,
    d100.member_cnt28 as member1000o_28, d100.member_cnt23 as member1000o_23, d100.member_cnt22 as member1000o_23,

  FROM d300 d3 LEFT JOIN d500 d5 USING (zid, zname)
              LEFT JOIN d1000 d10 USING (zid, zname)
              LEFT JOIN d1000o d100 USING (zid, zname)
)

SELECT
  cu.*,
  bc.* EXCEPT (zid, zname)
FROM click_union cu
LEFT JOIN base_loc bc ON cu.zid = bc.zid
ORDER BY zid
'''

In [None]:
base = pd.io.gbq.read_gbq(
    query=query_base,
    project_id="socar-data"
)

base

Unnamed: 0,cyear,zid,zname,region1,region2,region3,lng,lat,geo_st,profit28,...,member300_23_1,member500_28,member500_23,member500_23_1,member1000_28,member1000_23,member1000_23_1,member1000o_28,member1000o_23,member1000o_23_1
0,2012,105,제주공항,제주특별자치도,제주시,도두2동,126.491417,33.494148,POINT(126.491417 33.494148),7.100751e+07,...,240,493,3674,304,676,5325,499,21824,101920,4111
1,2012,110,제주시외버스터미널,제주특별자치도,제주시,오라1동,126.514511,33.499702,POINT(126.514511 33.499702),7.944569e+04,...,5,29,147,7,38,204,11,42,254,61
2,2012,114,CGV제주 옆,제주특별자치도,제주시,이도2동,126.528336,33.500179,POINT(126.528336 33.500179),-7.552226e+04,...,20,39,416,30,53,560,45,46,503,87
3,2013,246,켄싱턴리조트 제주중문,제주특별자치도,서귀포시,색달동,126.412109,33.248318,POINT(126.412109 33.248318),-7.521326e+04,...,0,11,82,0,13,101,0,32,226,0
4,2015,1967,신신호텔 제주오션,제주특별자치도,서귀포시,서귀동,126.562408,33.245693,POINT(126.562408 33.245693),-1.232819e+06,...,0,17,17,0,22,22,0,26,26,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,2023,18623,용문로터리,제주특별자치도,제주시,용담이동,126.509826,33.508849,POINT(126.509826 33.508849),1.186649e+04,...,0,4,8,0,8,17,0,17,36,0
84,2023,18728,동문로터리,제주특별자치도,서귀포시,서귀동,126.568434,33.248415,POINT(126.568434 33.248415),-1.010736e+05,...,0,15,38,0,23,51,0,18,25,0
85,2023,18730,동홍동 대림아파트 부근,제주특별자치도,서귀포시,동홍동,126.572735,33.255422,POINT(126.572735 33.255422),5.244419e+03,...,0,11,19,0,16,27,0,13,27,0
86,2023,18857,관덕정 앞,제주특별자치도,제주시,삼도이동,126.522300,33.512750,POINT(126.5223 33.51275),2.603849e+04,...,0,21,23,0,28,30,0,40,44,0


In [None]:
query_car = """
WITH base_car AS (
    SELECT
        date,
        part,
        zid, zname,
        car_model,
        count(car_id) as cnt
    FROM (
        select
            o.date,
            case when o.zone_id in (105,9890) then 'air'
                when o.zone_id in (17209) then 'air_infront' else 'common' end as part,
            o.zone_id as zid,
            o.zone_name as zname,
            o.car_id,
            cl.car_model,

        from `socar-data.socar_biz.operation_per_car_daily_v2` o LEFT JOIN `tianjin_replica.car_info` c ON o.car_id = c.id
                                                                LEFT JOIN `tianjin_replica.car_class` cl ON c.class_id = cl.id
        where o.region1 in ("제주특별자치도") AND date BETWEEN '2022-01-01' AND '2023-12-31'
            and o.sharing_type in ('socar','zplus')
            and o.zone_id not in (122,2184)
    )
    GROUP BY date, part, zid, zname, car_model
),

base_car2 AS (
    SELECT
        date,
        part,
        zid, zname,
        "전체" as car_model,
        count(car_id) as cnt
    FROM (
        select
            o.date,
            case when o.zone_id in (105,9890) then 'air'
                when o.zone_id in (17209) then 'air_infront' else 'common' end as part,
            o.zone_id as zid,
            o.zone_name as zname,
            o.car_id,

        from `socar-data.socar_biz.operation_per_car_daily_v2` o LEFT JOIN `tianjin_replica.car_info` c ON o.car_id = c.id
                                                                LEFT JOIN `tianjin_replica.car_class` cl ON c.class_id = cl.id
        where o.region1 in ("제주특별자치도") AND date BETWEEN '2022-01-01' AND '2023-12-31'
            and o.sharing_type in ('socar','zplus')
            and o.zone_id not in (122,2184)
    )
    GROUP BY date, part, zid, zname, car_model
),

base_union AS (
    SELECT * FROM base_car
    UNION ALL
    SELECT * FROM base_car2
),

car_calc1 AS (
    SELECT
        zid, zname, car_model,
        avg(CASE WHEN date >= current_Date('Asia/Seoul') -28 THEN cnt END) as cnt28,
        avg(CASE WHEN date BETWEEN '2023-01-01' AND '2023-12-31' THEN cnt END) as cnt23,
        avg(CASE WHEN date BETWEEN '2022-01-01' AND '2022-12-31' THEN cnt END) as cnt22,
    FROM base_union
    GROUP BY zid, zname, car_model
)

SELECT *
FROM car_calc1
ORDER BY zid, car_model
"""

In [None]:
base_car = pd.io.gbq.read_gbq(
    query=query_car,
    project_id="socar-data"
)

base_car

Unnamed: 0,zid,zname,car_model,cnt28,cnt23,cnt22
0,105,제주공항,EV,77.05,75.071233,50.616438
1,105,제주공항,RV,24.00,18.169863,8.000000
2,105,제주공항,경형,34.55,41.208219,72.063014
3,105,제주공항,소형SUV,66.00,59.391781,37.046575
4,105,제주공항,수입,6.00,6.000000,
...,...,...,...,...,...,...
446,18730,동홍동 대림아파트 부근,전체,1.00,1.000000,
447,18857,관덕정 앞,소형SUV,2.00,2.000000,
448,18857,관덕정 앞,전체,2.00,2.000000,
449,18867,제주 썬호텔(호텔투숙객 Only),전체,1.00,1.000000,


In [None]:
base.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 48 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   cyear                     88 non-null     Int64  
 1   zid                       88 non-null     Int64  
 2   zname                     88 non-null     object 
 3   region1                   88 non-null     object 
 4   region2                   88 non-null     object 
 5   region3                   88 non-null     object 
 6   lng                       88 non-null     float64
 7   lat                       88 non-null     float64
 8   geo_st                    88 non-null     object 
 9   revenue28                 80 non-null     float64
 10  op_rate28                 80 non-null     float64
 11  dur_use28                 80 non-null     float64
 12  revenue_car28             80 non-null     float64
 13  profit_car28              80 non-null     float64
 14  opr_28      

In [None]:
base_common = base[~base['zid'].isin([105, 9890, 17209, 4502, 7941, 12689, 13858, 18422, 18424, 18426])]
base_car_common = base_car[~base_car['zid'].isin([105, 9890, 17209, 4502, 7941, 12689, 13858, 18422, 18424, 18426])]

In [None]:
base_common.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 79 entries, 1 to 87
Data columns (total 48 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   cyear                     79 non-null     Int64  
 1   zid                       79 non-null     Int64  
 2   zname                     79 non-null     object 
 3   region1                   79 non-null     object 
 4   region2                   79 non-null     object 
 5   region3                   79 non-null     object 
 6   lng                       79 non-null     float64
 7   lat                       79 non-null     float64
 8   geo_st                    79 non-null     object 
 9   revenue28                 78 non-null     float64
 10  op_rate28                 78 non-null     float64
 11  dur_use28                 78 non-null     float64
 12  revenue_car28             78 non-null     float64
 13  profit_car28              78 non-null     float64
 14  opr_28      

In [None]:
base_common.isnull().sum().to_frame('null')

Unnamed: 0,null
cyear,0
zid,0
zname,0
region1,0
region2,0
region3,0
lng,0
lat,0
geo_st,0
revenue28,1


# 시각화 준비

In [None]:
df = base_common
df_car = base_car_common

# 시각화(test)

In [None]:
## 1. 최근 28일의 평균 지표값 생성

In [None]:

op_28_mean = df['op_rate28'].mean()
revenue_car28_mean = df['revenue_car28'].mean()
profit_car28_mean = df['profit_car2022'].mean()
click_28 = df['unique_click_cnt28'].mean()


In [None]:
## 2. 최근 28일의 평균 지표 대비 높다면 1, 아니면 0

In [None]:
df['op28'] = np.where(df['op_rate28'] > op_28_mean, 1, 0)
df['revenue28'] = np.where(df['revenue_car28'] > revenue_car28_mean, 1, 0)
df['profit28'] = np.where(df['profit_car2022'] > profit_car28_mean, 1, 0)
df['click28'] = np.where(df['unique_click_cnt28'] > click_28, 1, 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [None]:
## 28일간의 데이터로 별도df 생성
df_28 = df[['zid', 'zname', 'lng', 'lat', 'op28', 'revenue28', 'profit28', 'click28']]

In [None]:
df_28.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 82 entries, 1 to 91
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   zid        82 non-null     Int64  
 1   zname      82 non-null     object 
 2   lng        82 non-null     float64
 3   lat        82 non-null     float64
 4   op28       82 non-null     int64  
 5   revenue28  82 non-null     int64  
 6   profit28   82 non-null     int64  
 7   click28    82 non-null     int64  
dtypes: Int64(1), float64(2), int64(4), object(1)
memory usage: 5.8+ KB


In [None]:
!pip install dash

Collecting dash
  Downloading dash-2.14.2-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m49.9 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting ansi2html (from dash)
  Downloading ansi2html-1.9.1-py3-none-any.whl (17 kB)
Installing collected packages: dash-table, dash-html-components, dash-core-components, retrying, ansi2html, dash
Successfully installed ansi2html-1.9.1 dash-2.14.2 dash-core-components-2.0.0 dash-html-components-2.0.0 dash-table-5.0.0 retrying-1.3.4


In [None]:
## dash lib
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px

## dash app 만들기
app = dash.Dash(__name__)

## layout 정의
app.layout = html.Div([
    dcc.Graph(
        id='scatter-plot',
        figure={
            'data': [
                {'x':base_common['op_rate28'], 'y':base_common['revenue_car28'], 'type': 'scatter', 'mode':'markers', 'name': 'revenue_car'},
                {'x':base_common['op_rate28'], 'y':base_common['profit_car28'], 'type': 'scatter', 'mode':'markers', 'name': 'profit_car'},
                {'x':base_common['op_rate28'], 'y':base_common['unique_click_cnt28'], 'type': 'scatter', 'mode':'markers', 'name': 'clicks'}
            ],
            'layout': {'title': 'Scatter Plot of op28 vs revenue, profit, click'}
        }
    )
])

## 실행

if __name__ == '__main__':
  app.run_server(devug=True)

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`


<IPython.core.display.Javascript object>

In [None]:
## 대체로 가동륲이 높다면 손익과 클릭도 높지만, 그렇지 않은 아웃라이어도 존재한다

In [None]:
!pip install plotly



In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 82 entries, 1 to 91
Data columns (total 58 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   zid                       82 non-null     Int64  
 1   zname                     82 non-null     object 
 2   region1                   82 non-null     object 
 3   region2                   82 non-null     object 
 4   region3                   82 non-null     object 
 5   lng                       82 non-null     float64
 6   lat                       82 non-null     float64
 7   geo_st                    82 non-null     object 
 8   op_rate28                 82 non-null     float64
 9   dur_use28                 81 non-null     float64
 10  revenue_car28             82 non-null     float64
 11  profit_car28              82 non-null     float64
 12  op_rate2023               82 non-null     float64
 13  dur_use2023               82 non-null     float64
 14  revenue_car2

In [None]:

op_28_mean = df['op_rate28'].mean()
revenue_car28_mean = df['revenue_car28'].mean()
profit_car28_mean = df['profit_car2022'].mean()
click_28 = df['unique_click_cnt28'].mean()

In [None]:
df_filter1 = df[(df['profit_car28'] > df['profit_car28'].mean()) & (df['op_rate28'] > df['op_rate28'].mean()) & (df['revenue_car28'] > df['revenue_car28'].mean()) & (df['profit_car2023'] > df['profit_car2023'].mean()) & (df['op_rate2023'] > df['op_rate2023'].mean()) & (df['revenue_car2023'] > df['revenue_car2023'].mean()) & (df['profit_car2022'] > df['profit_car2022'].mean()) & (df['op_rate2022'] > df['op_rate2022'].mean()) & (df['revenue_car2022'] > df['revenue_car2022'].mean())]

# 연도별 데이터

In [None]:
query_2023 = """
CREATE TEMP FUNCTION set_region(zone_id any TYPE) AS (
  CASE WHEN zone_id IN (737, 12689, 17046, 110, 2819, 2820, 8743, 8744, 10978, 11456, 13744, 13881, 14264, 15322, 16619, 16620, 17020, 17212, 17270, 17378, 10652, 2574, 5828, 8113, 14217, 17053, 17054, 17184, 17145, 12236, 12362, 12583, 14697, 12929, 12930, 17996, 18202, 18203, 10652, 12689, 12929, 12930, 18623, 18857,18867, 3558) THEN '공항주변'
       WHEN zone_id IN (10654, 2230, 16618, 17154, 14451, 12703, 18269, 12703, 10654, 18422, 18423, 18728, 18730, 1967) THEN '구서귀포'
       WHEN zone_id IN (651, 737, 2629, 2341, 2629, 4874, 12661, 11453, 10490, 14430, 14518, 14520, 15304, 16621, 16898, 17465, 114, 14167, 17208, 651, 18204, 18205, 4874, 11453, 2341, 18467, 18426, 4502, 6338) THEN '구제주'
       WHEN zone_id IN (879, 14420, 7906, 12762, 13657, 14943, 17196, 17463, 13657, 14943, 879, 18424, 7941) THEN '신서귀포'
       WHEN zone_id IN (3967, 3969, 10892, 11165, 12538, 12959, 12858, 12857, 13400, 7288, 11587, 13969, 10391, 12521, 12812, 14757, 16265, 4504, 8539, 8548, 8741, 11884, 13581, 13597, 14361, 14554, 15397, 16363, 16365, 16997, 17185, 13818, 17836, 18151, 17777, 18206, 3969, 7288, 12538, 13597, 13818, 13969, 11587, 12959, 14757, 10892, 3967, 1564, 16363, 16365, 15397, 18462) THEN '외곽'
       WHEN zone_id IN (17209) THEN '공항앞'
       WHEN zone_id IN (5894, 12636, 16126, 246, 13606, 16321, 18261, 5894) THEN '중문'
       WHEN zone_id IN (18471, 18472, 18473) THEN '실증사업'
       WHEN zone_id IN (105, 9890) THEN '제주공항' ELSE cast(zone_id as string) END
); ## 마지막 업데이트 2023.12.06 신규존 추가

WITH base_geo AS (
  SELECT
    extract(year FROM created_at) as cyear,
    zid, zname,
    region1, region2, region3,
    lng, lat,
    ST_GEOGPOINT(lng, lat) as geo_st,


  FROM
  (
    SELECT
      z.id as zid,
      z.zone_name as zname,
      z.region1, z.region2, z.region3,
      z.lng, z.lat, created_at,
    FROM `tianjin_replica.carzone_info` z
    WHERE z.region1 = '제주특별자치도'
    AND state = 1
  )

),

geo2 AS (
  SELECT
    zid, zname,
    region1, region2, region3,
    lng, lat,
    geo_st,
    -- ST_CLUSTERDBSCAN(geo_st, 5000, 3) OVER() as geo_group
  FROM base_geo
),

base_p AS (
  SELECT
    extract(year from date) as year,
    extract(month from date) as m,
    extract(isoweek from date) as w,
    date,

    CASE WHEN p.zone_id IN (105, 990) THEN 'air'
         WHEN p.zone_id IN (17209) THEN 'air_infront'
         WHEN p.zone_id IN (18471, 18472, 18473) THEN 'test' ELSE 'common' END as part,

    set_region(p.zone_id) as part2,

    p.zone_id as zid,
    p.zone_name as zname,

    count(car_id) as cnt,
    sum(opr_day) as opr_day,
    sum(nuse) as use,
    sum(nuse_passport+nuse_socarpass) as use_p,
    sum(utime) as dur,
    sum(revenue) as revenue,
    sum(_rev_rent) as rent,
    sum(profit) as profit,
    sum(cost_variable) as vcost,
    sum(cost_fixed) as fcost,
    sum(nuse_round) as use_round,
    sum(nuse_oneway) as use_oneway,
    sum(nuse_d2d_round) as use_d2d_round,
    sum(nuse_d2d_oneway) as use_d2d_oneway,
    sum(nuse_z2d_oneway) as use_z_oneway,
    sum(cost_parking_zone) as parking_fee,
    sum(cost_transport_mobility) as transport_fee

  FROM    `socar_biz_profit.profit_socar_car_daily` p
  left join `tianjin_replica.carzone_info` z on p.zone_id=z.id
  WHERE TRUE AND car_sharing_type IN ('socar', 'zplus')
             AND date >= '2022-01-01'
             AND p.region1 = '제주특별자치도'
             AND zone_id not in(122,2184,12072,12073,10736,10738,11947,11480,13228,13787,13858,14494,14528,14541,14542)
  GROUP BY year, m, w, date, part, part2, zid, zname
),

base_loc AS (
  WITH tmp AS (
    SELECT
      TDDATE,
      callerLog.memberId AS member_id,
      DATETIME(TIMESTAMP_TRUNC(timeMs, SECOND), "Asia/Seoul") AS created_at_kst,
      locationAction.location.lng AS lng,
      locationAction.location.lat AS lat,
    FROM
      `socar_server_2.SAVE_LOCATION_ACTION_LOG`
    WHERE
      locationAction.viewAction="RESERVED_CAR_RENTAL"
      AND fullAccuracyLocationLog.isFullAccuracyLocation IS TRUE
      AND TIMESTAMP_TRUNC(timeMs, DAY) BETWEEN TIMESTAMP("2022-01-01") AND TIMESTAMP("2022-12-31")
    ),

    rv AS (
      SELECT
        id AS reservation_id,
        member_id,
        DATETIME(created_at, "Asia/Seoul") AS created_at_kst,
        zone_id
      FROM `tianjin_replica.reservation_info`
      WHERE
        member_imaginary IN (0, 9)
        AND state IN (1,2,3)
        AND DATE(created_at, 'Asia/Seoul') BETWEEN DATE('2022-01-01') AND DATE('2022-12-31')
    ),

    loc2022 AS (
      SELECT
        rv.reservation_id as rid,
        rv.member_id  as mid,
        date(rv.created_at_kst) as cdate,
        extract(isoweek FROM rv.created_at_kst) as week,
        extract(year FROM rv.created_at_kst) as year,
        rv.zone_id,
        tmp.lat,
        tmp.lng,
        z.lat as zone_lat,
        z.lng as zone_lng
      FROM rv
      LEFT JOIN tmp
      USING (member_id, created_at_kst)
      LEFT JOIN `tianjin_replica.carzone_info` z ON rv.zone_id = z.id
      WHERE tmp.lat is not null
      AND z.region1 = '제주특별자치도'
      AND z.state = 1
    ),

    loc2023 AS (
      SELECT
        date as rdate,
        reservation_id as rid,
        member_id as mid,
        zone_id,
        zone_lat, zone_lng,
        reservation_created_lng as lng,
        reservation_created_lat as lat,
        date(reservation_created_at, 'Asia/Seoul') as cdate,
        extract(isoweek FROM reservation_created_at) week,
        extract(year FROM reservation_created_at) year,
      FROM `socar_data_queries_zone_stat_viz_mart.zsv_obt` a
      LEFT JOIN `tianjin_replica.carzone_info` z ON a.zone_id = z.id
      WHERE extract(year FROM reservation_created_at) = 2023
      AND reservation_created_lng is not null
      AND z.region1 = '제주특별자치도'
      AND z.state = 1
    ),

    base_total AS (
      SELECT * FROM loc2022
      UNION ALL
      SELECT rid, mid, cdate, week, year, zone_id, lat, lng, zone_lat, zone_lng FROM loc2023
      ORDER BY cdate desc
    ),

    d300 AS (
      SELECT
        zone_id as zid,
        z.zone_name as zname,
        year, week,
        count(mid) as member_cnt
      FROM base_total LEFT JOIN `tianjin_replica.carzone_info` z ON base_total.zone_id = z.id
      WHERE ST_DISTANCE(ST_GeogPoint(zone_lng, zone_lat), ST_Geogpoint(base_total.lng, base_total.lat)) <= 300
      GROUP BY zid, zname, year, week
    ),

    d500 AS (
      SELECT
        zone_id as zid,
        z.zone_name as zname,
        year, week,
        count(mid) as member_cnt
      FROM base_total LEFT JOIN `tianjin_replica.carzone_info` z ON base_total.zone_id = z.id
      WHERE ST_DISTANCE(ST_GeogPoint(zone_lng, zone_lat), ST_Geogpoint(base_total.lng, base_total.lat)) <= 500
      GROUP BY zid, zname , year, week),

    d1000 AS (
      SELECT
        zone_id as zid,
        z.zone_name as zname,
        year, week,
        count(mid) as member_cnt
      FROM base_total LEFT JOIN `tianjin_replica.carzone_info` z ON base_total.zone_id = z.id
      WHERE ST_DISTANCE(ST_GeogPoint(zone_lng, zone_lat), ST_Geogpoint(base_total.lng, base_total.lat)) <= 1000
      GROUP BY zid, zname , year, week
    ),

    d1000o AS (
      SELECT
        zone_id as zid,
        z.zone_name as zname,
        year, week,
        count(mid) as member_cnt
      FROM base_total LEFT JOIN `tianjin_replica.carzone_info` z ON base_total.zone_id = z.id
      WHERE ST_DISTANCE(ST_GeogPoint(zone_lng, zone_lat), ST_Geogpoint(base_total.lng, base_total.lat)) > 1000
      GROUP BY zid, zname, year, week
  )

    SELECT
      d3,year,
      d3.week,
      d3.zid, d3.zname,
      d3.member_cnt as member_300,
      d5.member_cnt as member_500,
      d10.member_cnt as member_1000,
      d100.member_cnt as member_1000o
    FROM d300 d3
    LEFT JOIN d500 d5 USING (zid, zname, year, week)
    LEFT JOIN d1000 d10 USING (zid, zname, year, week)
    LEFT JOIN d1000o d100 USING (zid, zname, year, week)
  )

SELECT
  part, part2, base_p.zid, base_p.zname,
  base_p.year, w,
  round(safe_divide(sum(dur), (sum(opr_day)*24)), 2) as op_rate,
  round(safe_divide(sum(dur), sum(use)), 0) as dur_use,
  round(safe_divide(sum(revenue), sum(opr_day)), 0) as revenue_car,
  round(safe_divide(sum(profit), sum(opr_day)), 0) as profit_car,
  round(avg(opr_day), 0) as opr_day,
  sum(use) as use,
  round(sum(dur), 0) as dur,
  round(sum(revenue), 0) as revenue,
  round(sum(profit),0 ) as profit,
  sum(use_round) as use_round,
  sum(use_d2d_round) as use_d2d_round,
  sum(use_d2d_oneway) as use_d2d_oneway,
  round(sum(parking_fee), 0) as parking_fee,
  round(sum(transport_fee), 0) as transport_fee,
  sum(member_300) as member_300,
  sum(member_500) as member_500,
  sum(member_1000) as member_1000,
  sum(member_1000o) as member_1000o
FROM base_p
LEFT JOIN base_loc m ON base_p.year = m.year AND base_p.w = m.week AND base_p.zid = m.zid
WHERE date >= '2022-01-01'
GROUP BY part, part2, zid, zname, year, w
"""

In [None]:
base2023 = pd.io.gbq.read_gbq(
    query=query_2023,
    project_id="socar-data"
)

base2023

Unnamed: 0,part,part2,zid,zname,year,w,op_rate,dur_use,revenue_car,profit_car,...,profit,use_round,use_d2d_round,use_d2d_oneway,parking_fee,transport_fee,member_300,member_500,member_1000,member_1000o
0,common,외곽,16363,O-PEACE 사계점(운영종료),2022,46,0.15,7.0,31863.0,-4333.0,...,-17330.0,2,0,0,0.0,2220.0,,,,
1,common,공항주변,17145,연동 삼성홈타운(운영종료),2023,29,0.68,38.0,62907.0,21998.0,...,153986.0,3,0,0,0.0,4332.0,,,,
2,common,공항주변,18202,뉴월드마트 신제주점 옆,2023,37,0.52,12.0,62532.0,8891.0,...,62239.0,7,0,0,49434.0,2647.0,14,14,14,
3,common,공항주변,18203,메종글래드 제주 옆,2023,29,0.29,4.0,51027.0,8492.0,...,25476.0,5,0,0,17085.0,1803.0,6,6,6,
4,common,외곽,17185,(운영종료)스테이상상,2023,22,0.74,25.0,64683.0,12120.0,...,84837.0,5,0,0,16250.0,3413.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6444,common,중문,16126,천제연폭포 앞,2023,44,0.42,9.0,58316.0,7254.0,...,101556.0,16,0,0,27210.0,6131.0,42,49,77,35
6445,common,중문,16126,천제연폭포 앞,2022,50,0.43,9.0,54393.0,2995.0,...,20963.0,8,0,0,12194.0,6081.0,7,7,7,
6446,common,중문,16126,천제연폭포 앞,2023,15,0.52,15.0,57524.0,12509.0,...,87561.0,6,0,0,13732.0,3269.0,7,7,21,14
6447,common,중문,16126,천제연폭포 앞,2023,8,0.38,8.0,56291.0,12949.0,...,90643.0,8,0,0,13375.0,2584.0,,,,


# 시각화

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# member300_23 열에서 NaN 값이나 비정상 값을 처리
df['member300_23'] = pd.to_numeric(df['member300_23'], errors='coerce')
df['member300_23'].fillna(0, inplace=True)

# member300_23 열을 리스트로 변환
size_values = df['member300_23'].tolist()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
## 존 매출과 가동률에 따른 존 분포도
fig = px.scatter(df, x='revenue_car2023', y='op_rate2023', color='zname',
                 title='Revenue vs Operation Rate by Zone',
                 labels={'revenue_car2023': 'Revenue  Car', 'op_rate2023': 'Operation Rate'})

fig.show()

In [None]:
df['GP23'] = df['profit23'] / df['revenue23']
df['GP22'] = df['profit22'] / df['revenue22']
df['GP28'] = df['profit28'] / df['revenue28']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
df['profit_car2023'] = df['profit_car2023'].round().astype(int)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
## 23년 평균 실적이 모두 평균 이상인 그룹
df_filter1 = df[(df['profit_car2023'] > df['profit_car2023'].mean()) & (df['op_rate2023'] > df['op_rate2023'].mean()) & (df['revenue_car2023'] > df['revenue_car2023'].mean())]
df_car_filter = df_car[df_car['car_model'] == '전체']
df_merge = pd.merge(df_filter1, df_car_filter, on=['zname', 'zid'], how='left')

In [None]:
## 존 매출과 가동률에 따른 존 분포도
fig = px.scatter(df_merge, x='revenue_car2023', y='profit_car2023', color='zname', size='op_rate2023',
                 title='대당 매출, 대당 손익에 따른 분포도(크기 : 가동률)',
                 labels={'revenue_car2023': 'Revenue  Car', 'profit_car2023': 'Profit Car'})

fig.show()

In [None]:
## filter1 시각화
fig = px.bar(df_merge, x="zname", y='GP23',
             title='23년 대당 매출, 대당 손익, 가동률이 모두 평균 이상인 존과 GP',
             labels={'zname' : '실적 높은 존', 'op_rate2023': '23년 가동률'} )

fig.show()

In [None]:
## filter1 시각화
fig = px.bar(df_merge, x="zname", y='profit_car2023',
             title='23년 대당 매출, 대당 손익, 가동률이 모두 평균 이상인 존과 평균 가동률',
             labels={'zname' : '실적 높은 존', 'profit_car2023': '23년 대당 손익'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='GP23',
             title='23년 대당 매출, 대당 손익, 가동률이 모두 평균 이상인 존과 평균 가동률',
             labels={'zname' : '실적 높은 존', 'GP23': '23년 GP'} )

fig.show()

In [None]:
## 23년 손익 혹은 매출이 평균 이상인 그룹
df_filter1 = df[(df['profit_car2023'] > df['profit_car2023'].mean()) | (df['revenue_car2023'] > df['revenue_car2023'].mean())]
df_car_filter = df_car[df_car['car_model'] == '전체']
df_merge = pd.merge(df_filter1, df_car_filter, on=['zname', 'zid'], how='left')

In [None]:
## 23대당 손익 ghrdms 23년 매출이 평균 이상인 존의 지표별 분포도
fig = px.scatter(df_merge, x='revenue_car2023', y='profit_car2023', color='zname', size='opr_23',
                 title='대당 매출, 대당 손익에 따른 분포도(크기 : 운영대수)',
                 labels={'revenue_car2023': 'Revenue  Car', 'profit_car2023': 'Profit Car'})

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='profit_car2023',
             title='23년 대당 매출 혹은 23년 대당 손익이 평균 이상인 존',
             labels={'zname' : '실적 높은 존', 'profit_car2023': '23년 대당 손익'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='revenue_car2023',
             title='23년 대당 매출 혹은 23년 대당 손익이 평균 이상인 존',
             labels={'zname' : '실적 높은 존', 'revenue_car2023': '23년 대당 매출'} )

fig.show()

In [None]:
## 22년 대당 매출은 평균 이하였으나 23년은 평균 이상인 존
df_filter = df[(df['revenue_car2022'] < df['revenue_car2022'].mean()) & (df['revenue_car2023'] > df['revenue_car2023'].mean())]
df_filter = df_filter[df_filter['cyear'] != 2023]
df_merge = pd.merge(df_filter, df_car_filter, on=['zname', 'zid'], how='left')

In [None]:
## 22년 대비 23년 대당 매출이 개선 된 존의 분포도
fig = px.scatter(df_merge, x='revenue_car2023', y='profit_car2023', color='zname', size='opr_23',
                 title='대당 매출, 대당 손익에 따른 분포도(크기 : 운영대수)',
                 labels={'revenue_car2023': 'Revenue  Car', 'profit_car2023': 'Profit Car'})

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='revenue_car2023',
             title='22년 대당 매출은 평균 미만이지만, 23년 대당 매출은 평균 이상인 존',
             labels={'zname' : '실적 높은 존', 'revenue_car2023': '23년 대당 매출'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='profit_car2023',
             title='22년 대당 매출은 평균 미만이지만, 23년 대당 매출은 평균 이상인 존',
             labels={'zname' : '실적 높은 존', 'profit_car2023': '23년 대당 손익'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='op_rate2023',
             title='22년 대당 매출은 평균 미만이지만, 23년 대당 매출은 평균 이상인 존',
             labels={'zname' : '실적 높은 존', 'op_rate2023': '23년 가동률'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='GP23',
             title='22년 대당 매출은 평균 미만이지만, 23년 대당 매출은 평균 이상인 존',
             labels={'zname' : '실적 높은 존', 'GP23': '23년 GP'} )

fig.show()

In [None]:
## 22년 대당 매출은 평균 이상이었으나 23년에 하락한 존
df_filter = df[(df['revenue_car2022'] > df['revenue_car2022'].mean()) & (df['revenue_car2023'] < df['revenue_car2023'].mean())]
df_filter = df_filter[df_filter['cyear'] != 2023]
df_merge = pd.merge(df_filter, df_car_filter, on=['zname', 'zid'], how='left')

In [None]:
## 23년 대당 매출이 22년 대비 하락한 존
fig = px.scatter(df_merge, x='revenue_car2023', y='profit_car2023', color='zname', size='opr_23',
                 title='대당 매출, 대당 손익에 따른 분포도(크기 : 운영대수)',
                 labels={'revenue_car2023': 'Revenue  Car', 'profit_car2023': 'Profit Car'})

fig.show()

In [None]:
df_merge['revenue_car2023'].mean()

53042.8062372603

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='revenue_car2023',
             title='22년 대당 매출은 높았지만, 23년은 대당 매출이 평균 이하인 존',
             labels={'zname' : '실적 높은 존', 'revenue_car2023': '23년 대당 매출'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='revenue_car2022',
             title='22년 대당 매출은 높았지만, 23년은 대당 매출이 평균 이하인 존',
             labels={'zname' : '실적 높은 존', 'revenue_car2022': '22년 대당 매출'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='profit_car2023',
             title='22년 손익은 높았지만, 23년은 손익이 평균 이하인 존',
             labels={'profit_car2023': '23년 손익'} )

fig.show()

In [None]:
## 22년 대비 23년 대당 손익이 감소한 쏘카존
df_filter = df[(df['profit_car2022'] > df['profit_car2022'].mean()) & (df['profit_car2023'] < df['profit_car2023'].mean())]
df_filter = df_filter[df_filter['cyear'] != 2023]
df_merge = pd.merge(df_filter, df_car_filter, on=['zname', 'zid'], how='left')

In [None]:
## 23대당 손익 ghrdms 23년 매출이 평균 이상인 존의 지표별 분포도
fig = px.scatter(df_merge, x='revenue_car2023', y='profit_car2023', color='zname', size='opr_23',
                 title='대당 매출, 대당 손익에 따른 분포도(크기 : 운영대수)',
                 labels={'revenue_car2023': 'Revenue  Car', 'profit_car2023': 'Profit Car'})

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='profit_car2022',
             title='22년 손익은 높았지만, 23년은 손익이 평균 이하인 존',
             labels={ 'profit_car2022': '22년 손익'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='GP23',
             title='22년 손익은 높았지만, 23년은 손익이 평균 이하인 존',
             labels={'GP23': '23년 GP'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='GP22',
             title='22년 손익은 높았지만, 23년은 손익이 평균 이하인 존',
             labels={'GP22': '22년 GP'} )

fig.show()

In [None]:
## 최근에 실적 하락하였지만 23년도 실적이 좋은 존
df_filter = df[(df['profit_car28'] < df['profit_car28'].mean()) & (df['profit_car2023'] > df['profit_car2023'].mean())]
df_merge = pd.merge(df_filter, df_car_filter, on=['zname', 'zid'], how='left')

In [None]:
## 23년 실적 우수하지만 최근 실적 하락 존
fig = px.scatter(df_merge, x='revenue_car2023', y='profit_car2023', color='zname', size='opr_23',
                 title='대당 매출, 대당 손익에 따른 분포도(크기 : 운영대수)',
                 labels={'revenue_car2023': 'Revenue  Car', 'profit_car2023': 'Profit Car'})

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='profit_car28',
             title='23년 실적 평균 이상이나 최근 실적 하락한 존',
             labels={'profit_car28': '최근 28일 대당 손익'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='profit_car2023',
             title='23년 실적 평균 이상이나 최근 실적 하락한 존',
             labels={'profit_car2023': '23년 대당 손익'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='GP28',
             title='23년 실적 평균 이상이나 최근 실적 하락한 존',
             labels={'GP28': '최근 28일 GP'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='GP23',
             title='23년 실적 평균 이상이나 최근 실적 하락한 존',
             labels={'GP23': '23년 GP'} )

fig.show()

In [None]:
## 주변에 접속자가 많은 존의 매출과 가동률
df_filter = df[(df['member300_23'] > df['member300_23'].mean())]
df_merge = pd.merge(df_filter, df_car_filter, on=['zname', 'zid'], how='left')

In [None]:
## 주변에 접속자가 많은 존의 매출과 가동률 (300미터 이내 23년)
fig = px.scatter(df_merge, x='revenue_car2023', y='profit_car2023', color='zname', size='opr_23',
                 title='대당 매출, 대당 손익에 따른 분포도(크기 : 운영대수)',
                 labels={'revenue_car2023': 'Revenue  Car', 'profit_car2023': 'Profit Car'})

fig.show()

In [None]:
df['member300_23'].mean()

155.41772151898735

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='member300_23',
             title='23년 300m 이내 접속자가 평균 이상인 존 ',
             labels={'member300_23': '23년 300m내 접속자'} )

fig.show()

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='revenue_car2023',
             title='23년 300m 이내 접속자가 평균 이상인 존 ',
             labels={'revenue_car2023': '23년 대당 매출'} )

fig.show()

In [None]:
df['GP23'].mean()

0.163291088602319

In [None]:
## filter1 시각화
import plotly.express as px

fig = px.bar(df_merge, x="zname", y='GP23',
             title='23년 300m 이내 접속자가 평균 이상인 존 ',
             labels={'GP23': '23년 GP'} )

fig.show()

In [None]:
## filter1 시각화
df_filter2 = df_merge[(df_merge['GP23'] >= df['GP23'].mean())]
fig = px.bar(df_filter2, x="zname", y='GP23',
             title='23년 300m 이내 접속자가 평균 이상인 존 중에 평균 GP 보다 높은 존 ',
             labels={'GP23': '23년 GP'} )

fig.show()

In [None]:
df['op_rate2023'].mean()

0.46975481839446637

In [None]:
## filter1 시각화
fig = px.bar(df_filter2, x="zname", y='op_rate2023',
             title='23년 300m 이내 접속자가 평균 이상인 존 중에 평균 GP 보다 높은 존 ',
             labels={'op_rate2023': '23년 가동률'} )

fig.show()

In [None]:
## 주변에 접속자가 많은 존의 매출과 가동률
df_filter = df[(df['member500_23'] > df['member500_23'].mean())]
df_merge = pd.merge(df_filter, df_car_filter, on=['zname', 'zid'], how='left')

fig = px.bar(df_merge, x="zname", y='member500_23',
             title='23년 300m 이내 접속자가 평균 이상인 존 ',
             labels={'member500_23': '23년 500m내 접속자'} )

fig.show()

In [None]:
df['GP23'].mean()

0.163291088602319

In [None]:
## 주변에 접속자가 많은 존의 매출과 가동률
df_merge = pd.merge(df_filter, df_car_filter, on=['zname', 'zid'], how='left')

fig = px.bar(df_merge, x="zname", y='GP23',
             title='23년 300m 이내 접속자가 평균 이상인 존 ',
             labels={'GP23': '23년 GP'} )

fig.show()