In [52]:
import requests
import pandas as pd

def get_apt_prizwinner_age_stat(page, perPage, stat_de_lt, stat_de_lte, stat_de_gt, stat_de_gte, api_key):
    base_url = "https://api.odcloud.kr/api"
    api_url = "/ApplyhomeStatSvc/v1/getAPTPrzwnerAgeStat"
    
    params = {
        "page": page,
        "perPage": perPage,
        "cond[STAT_DE::LT]": stat_de_lt,
        "cond[STAT_DE::LTE]": stat_de_lte,
        "cond[STAT_DE::GT]": stat_de_gt,
        "cond[STAT_DE::GTE]": stat_de_gte,
        "serviceKey": api_key
    }

    response = requests.get(base_url + api_url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        return data
    elif response.status_code == 401:
        print("Error: Unauthorized. Check your API key.")
        return None
    elif response.status_code == 500:
        print("Error: Internal Server Error.")
        return None
    else:
        print(f"Error: {response.status_code}")
        return None

def get_prizwinner_age_data(start_year, end_year, api_key):
    years = range(start_year, end_year + 1)
    months = range(1, 13)
    df = pd.DataFrame()

    for year in years:
        for month in months:
            stat_de = f"{year}{month:02d}"
            data = get_apt_prizwinner_age_stat(1, 100, stat_de, stat_de, "000000", "000000", api_key)
            
            if data:
                temp_df = pd.DataFrame(data['data'])
                temp_df['STAT_DE'] = stat_de
                df = pd.concat([df, temp_df], ignore_index=True)
            
            print(f"Processed: {stat_de}")
    
    return df

# API 키 설정
api_key = "vk/AjAkbf0K4e9bDC7RWG+2uj9hSsRVSVOe4WtENZY1dLBUec1AyEgn9AnEPksMUKQ/vDw+lLuRgusRy5OOLfA=="

# 2022년부터 2024년까지의 데이터 가져오기
start_year = 2022
end_year = 2024
df_prizwinner_age = get_prizwinner_age_data(start_year, end_year, api_key)

# 결과 출력
df_prizwinner_age.head()

Processed: 202201
Processed: 202202
Processed: 202203
Processed: 202204
Processed: 202205
Processed: 202206
Processed: 202207
Processed: 202208
Processed: 202209
Processed: 202210
Processed: 202211
Processed: 202212
Processed: 202301
Processed: 202302
Processed: 202303
Processed: 202304
Processed: 202305
Processed: 202306
Processed: 202307
Processed: 202308
Processed: 202309
Processed: 202310
Processed: 202311
Processed: 202312
Processed: 202401
Processed: 202402
Processed: 202403
Processed: 202404
Processed: 202405
Processed: 202406
Processed: 202407
Processed: 202408
Processed: 202409
Processed: 202410
Processed: 202411
Processed: 202412


Unnamed: 0,AGE_30,AGE_40,AGE_50,AGE_60,STAT_DE
0,9242,5178,2652,1218,202201
1,10888,5373,2519,1483,202201
2,14513,6842,3347,1815,202201
3,5579,3120,1454,737,202201
4,7211,3708,2040,1156,202201


In [53]:
df_prizwinner_age.tail()

Unnamed: 0,AGE_30,AGE_40,AGE_50,AGE_60,STAT_DE
3595,7508,3597,1719,970,202412
3596,6114,2718,1299,712,202412
3597,7664,3110,1477,757,202412
3598,6542,3040,1402,671,202412
3599,4324,2518,1439,645,202412


In [4]:
# 연령별 청약 당첨자가 많은 수

# Calculating the sum for each age group
age_sums = df_prizwinner_age[['AGE_30', 'AGE_40', 'AGE_50', 'AGE_60']].sum()

# Sorting the sums to find the highest probabilities
sorted_age_sums = age_sums.sort_values(ascending=False)

sorted_age_sums

AGE_30    32028984
AGE_40    15758487
AGE_50     7749939
AGE_60     4074415
dtype: int64

In [10]:
import requests
import pandas as pd
from datetime import datetime, timedelta

def get_apt_lttot_pblanc_score(page, perPage, house_manage_no, pblanc_no, reside_secd, api_key):
    base_url = "https://api.odcloud.kr/api"
    api_url = "/ApplyhomeInfoCmpetRtSvc/v1/getAptLttotPblancScore"
    
    params = {
        "page": page,
        "perPage": perPage,
        "returnType": "JSON",
        "cond[HOUSE_MANAGE_NO::EQ]": house_manage_no,
        "cond[PBLANC_NO::EQ]": pblanc_no,
        "cond[RESIDE_SECD::EQ]": reside_secd,
        "serviceKey": api_key
    }

    response = requests.get(base_url + api_url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Error: {response.status_code}")
        return None

def get_lttot_pblanc_score_data(start_date, end_date, api_key):
    page = 1
    perPage = 1000  # 한 번에 가져올 데이터 수
    df = pd.DataFrame()

    current_date = start_date
    while current_date <= end_date:
        house_manage_no = current_date.strftime("%Y00%m%d")
        pblanc_no = current_date.strftime("%Y00%m%d")

        for reside_secd in ["01", "02", "03"]:
            data = get_apt_lttot_pblanc_score(page, perPage, house_manage_no, pblanc_no, reside_secd, api_key)

            if data:
                temp_df = pd.DataFrame(data['data'])
                temp_df['PBLANC_DE'] = current_date.strftime("%Y-%m-%d")
                temp_df['RESIDE_SECD'] = reside_secd
                df = pd.concat([df, temp_df], ignore_index=True)

            print(f"Processed: {current_date.strftime('%Y-%m-%d')}, {reside_secd}")

        current_date += timedelta(days=1)

    return df

# API 키 설정
api_key = "vk/AjAkbf0K4e9bDC7RWG+2uj9hSsRVSVOe4WtENZY1dLBUec1AyEgn9AnEPksMUKQ/vDw+lLuRgusRy5OOLfA=="

# 시작일과 종료일 설정
start_date = datetime(2022, 3, 1).date()
end_date = datetime.now().date()

# 데이터 가져오기
df_lttot_pblanc_score = get_lttot_pblanc_score_data(start_date, end_date, api_key)

# 출력 옵션 설정
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)

# 결과 출력
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_lttot_pblanc_score)

Processed: 2022-03-01, 01
Processed: 2022-03-01, 02
Processed: 2022-03-01, 03
Processed: 2022-03-02, 01
Processed: 2022-03-02, 02
Processed: 2022-03-02, 03
Processed: 2022-03-03, 01
Processed: 2022-03-03, 02
Processed: 2022-03-03, 03
Processed: 2022-03-04, 01
Processed: 2022-03-04, 02
Processed: 2022-03-04, 03
Processed: 2022-03-05, 01
Processed: 2022-03-05, 02
Processed: 2022-03-05, 03
Processed: 2022-03-06, 01
Processed: 2022-03-06, 02
Processed: 2022-03-06, 03
Processed: 2022-03-07, 01
Processed: 2022-03-07, 02
Processed: 2022-03-07, 03
Processed: 2022-03-08, 01
Processed: 2022-03-08, 02
Processed: 2022-03-08, 03
Processed: 2022-03-09, 01
Processed: 2022-03-09, 02
Processed: 2022-03-09, 03
Processed: 2022-03-10, 01
Processed: 2022-03-10, 02
Processed: 2022-03-10, 03
Processed: 2022-03-11, 01
Processed: 2022-03-11, 02
Processed: 2022-03-11, 03
Processed: 2022-03-12, 01
Processed: 2022-03-12, 02
Processed: 2022-03-12, 03
Processed: 2022-03-13, 01
Processed: 2022-03-13, 02
Processed: 2

Processed: 2022-06-14, 03
Processed: 2022-06-15, 01
Processed: 2022-06-15, 02
Processed: 2022-06-15, 03
Processed: 2022-06-16, 01
Processed: 2022-06-16, 02
Processed: 2022-06-16, 03
Processed: 2022-06-17, 01
Processed: 2022-06-17, 02
Processed: 2022-06-17, 03
Processed: 2022-06-18, 01
Processed: 2022-06-18, 02
Processed: 2022-06-18, 03
Processed: 2022-06-19, 01
Processed: 2022-06-19, 02
Processed: 2022-06-19, 03
Processed: 2022-06-20, 01
Processed: 2022-06-20, 02
Processed: 2022-06-20, 03
Processed: 2022-06-21, 01
Processed: 2022-06-21, 02
Processed: 2022-06-21, 03
Processed: 2022-06-22, 01
Processed: 2022-06-22, 02
Processed: 2022-06-22, 03
Processed: 2022-06-23, 01
Processed: 2022-06-23, 02
Processed: 2022-06-23, 03
Processed: 2022-06-24, 01
Processed: 2022-06-24, 02
Processed: 2022-06-24, 03
Processed: 2022-06-25, 01
Processed: 2022-06-25, 02
Processed: 2022-06-25, 03
Processed: 2022-06-26, 01
Processed: 2022-06-26, 02
Processed: 2022-06-26, 03
Processed: 2022-06-27, 01
Processed: 2

Processed: 2022-09-28, 03
Processed: 2022-09-29, 01
Processed: 2022-09-29, 02
Processed: 2022-09-29, 03
Processed: 2022-09-30, 01
Processed: 2022-09-30, 02
Processed: 2022-09-30, 03
Processed: 2022-10-01, 01
Processed: 2022-10-01, 02
Processed: 2022-10-01, 03
Processed: 2022-10-02, 01
Processed: 2022-10-02, 02
Processed: 2022-10-02, 03
Processed: 2022-10-03, 01
Processed: 2022-10-03, 02
Processed: 2022-10-03, 03
Processed: 2022-10-04, 01
Processed: 2022-10-04, 02
Processed: 2022-10-04, 03
Processed: 2022-10-05, 01
Processed: 2022-10-05, 02
Processed: 2022-10-05, 03
Processed: 2022-10-06, 01
Processed: 2022-10-06, 02
Processed: 2022-10-06, 03
Processed: 2022-10-07, 01
Processed: 2022-10-07, 02
Processed: 2022-10-07, 03
Processed: 2022-10-08, 01
Processed: 2022-10-08, 02
Processed: 2022-10-08, 03
Processed: 2022-10-09, 01
Processed: 2022-10-09, 02
Processed: 2022-10-09, 03
Processed: 2022-10-10, 01
Processed: 2022-10-10, 02
Processed: 2022-10-10, 03
Processed: 2022-10-11, 01
Processed: 2

Processed: 2023-01-12, 03
Processed: 2023-01-13, 01
Processed: 2023-01-13, 02
Processed: 2023-01-13, 03
Processed: 2023-01-14, 01
Processed: 2023-01-14, 02
Processed: 2023-01-14, 03
Processed: 2023-01-15, 01
Processed: 2023-01-15, 02
Processed: 2023-01-15, 03
Processed: 2023-01-16, 01
Processed: 2023-01-16, 02
Processed: 2023-01-16, 03
Processed: 2023-01-17, 01
Processed: 2023-01-17, 02
Processed: 2023-01-17, 03
Processed: 2023-01-18, 01
Processed: 2023-01-18, 02
Processed: 2023-01-18, 03
Processed: 2023-01-19, 01
Processed: 2023-01-19, 02
Processed: 2023-01-19, 03
Processed: 2023-01-20, 01
Processed: 2023-01-20, 02
Processed: 2023-01-20, 03
Processed: 2023-01-21, 01
Processed: 2023-01-21, 02
Processed: 2023-01-21, 03
Processed: 2023-01-22, 01
Processed: 2023-01-22, 02
Processed: 2023-01-22, 03
Processed: 2023-01-23, 01
Processed: 2023-01-23, 02
Processed: 2023-01-23, 03
Processed: 2023-01-24, 01
Processed: 2023-01-24, 02
Processed: 2023-01-24, 03
Processed: 2023-01-25, 01
Processed: 2

Processed: 2023-04-28, 01
Processed: 2023-04-28, 02
Processed: 2023-04-28, 03
Processed: 2023-04-29, 01
Processed: 2023-04-29, 02
Processed: 2023-04-29, 03
Processed: 2023-04-30, 01
Processed: 2023-04-30, 02
Processed: 2023-04-30, 03
Processed: 2023-05-01, 01
Processed: 2023-05-01, 02
Processed: 2023-05-01, 03
Processed: 2023-05-02, 01
Processed: 2023-05-02, 02
Processed: 2023-05-02, 03
Processed: 2023-05-03, 01
Processed: 2023-05-03, 02
Processed: 2023-05-03, 03
Processed: 2023-05-04, 01
Processed: 2023-05-04, 02
Processed: 2023-05-04, 03
Processed: 2023-05-05, 01
Processed: 2023-05-05, 02
Processed: 2023-05-05, 03
Processed: 2023-05-06, 01
Processed: 2023-05-06, 02
Processed: 2023-05-06, 03
Processed: 2023-05-07, 01
Processed: 2023-05-07, 02
Processed: 2023-05-07, 03
Processed: 2023-05-08, 01
Processed: 2023-05-08, 02
Processed: 2023-05-08, 03
Processed: 2023-05-09, 01
Processed: 2023-05-09, 02
Processed: 2023-05-09, 03
Processed: 2023-05-10, 01
Processed: 2023-05-10, 02
Processed: 2

Processed: 2023-08-12, 01
Processed: 2023-08-12, 02
Processed: 2023-08-12, 03
Processed: 2023-08-13, 01
Processed: 2023-08-13, 02
Processed: 2023-08-13, 03
Processed: 2023-08-14, 01
Processed: 2023-08-14, 02
Processed: 2023-08-14, 03
Processed: 2023-08-15, 01
Processed: 2023-08-15, 02
Processed: 2023-08-15, 03
Processed: 2023-08-16, 01
Processed: 2023-08-16, 02
Processed: 2023-08-16, 03
Processed: 2023-08-17, 01
Processed: 2023-08-17, 02
Processed: 2023-08-17, 03
Processed: 2023-08-18, 01
Processed: 2023-08-18, 02
Processed: 2023-08-18, 03
Processed: 2023-08-19, 01
Processed: 2023-08-19, 02
Processed: 2023-08-19, 03
Processed: 2023-08-20, 01
Processed: 2023-08-20, 02
Processed: 2023-08-20, 03
Processed: 2023-08-21, 01
Processed: 2023-08-21, 02
Processed: 2023-08-21, 03
Processed: 2023-08-22, 01
Processed: 2023-08-22, 02
Processed: 2023-08-22, 03
Processed: 2023-08-23, 01
Processed: 2023-08-23, 02
Processed: 2023-08-23, 03
Processed: 2023-08-24, 01
Processed: 2023-08-24, 02
Processed: 2

Processed: 2023-11-25, 02
Processed: 2023-11-25, 03
Processed: 2023-11-26, 01
Processed: 2023-11-26, 02
Processed: 2023-11-26, 03
Processed: 2023-11-27, 01
Processed: 2023-11-27, 02
Processed: 2023-11-27, 03
Processed: 2023-11-28, 01
Processed: 2023-11-28, 02
Processed: 2023-11-28, 03
Processed: 2023-11-29, 01
Processed: 2023-11-29, 02
Processed: 2023-11-29, 03
Processed: 2023-11-30, 01
Processed: 2023-11-30, 02
Processed: 2023-11-30, 03
Processed: 2023-12-01, 01
Processed: 2023-12-01, 02
Processed: 2023-12-01, 03
Processed: 2023-12-02, 01
Processed: 2023-12-02, 02
Processed: 2023-12-02, 03
Processed: 2023-12-03, 01
Processed: 2023-12-03, 02
Processed: 2023-12-03, 03
Processed: 2023-12-04, 01
Processed: 2023-12-04, 02
Processed: 2023-12-04, 03
Processed: 2023-12-05, 01
Processed: 2023-12-05, 02
Processed: 2023-12-05, 03
Processed: 2023-12-06, 01
Processed: 2023-12-06, 02
Processed: 2023-12-06, 03
Processed: 2023-12-07, 01
Processed: 2023-12-07, 02
Processed: 2023-12-07, 03
Processed: 2

Processed: 2024-03-10, 01
Processed: 2024-03-10, 02
Processed: 2024-03-10, 03
Processed: 2024-03-11, 01
Processed: 2024-03-11, 02
Processed: 2024-03-11, 03
Processed: 2024-03-12, 01
Processed: 2024-03-12, 02
Processed: 2024-03-12, 03
Processed: 2024-03-13, 01
Processed: 2024-03-13, 02
Processed: 2024-03-13, 03
Processed: 2024-03-14, 01
Processed: 2024-03-14, 02
Processed: 2024-03-14, 03
Processed: 2024-03-15, 01
Processed: 2024-03-15, 02
Processed: 2024-03-15, 03
Processed: 2024-03-16, 01
Processed: 2024-03-16, 02
Processed: 2024-03-16, 03
Processed: 2024-03-17, 01
Processed: 2024-03-17, 02
Processed: 2024-03-17, 03
Processed: 2024-03-18, 01
Processed: 2024-03-18, 02
Processed: 2024-03-18, 03
Processed: 2024-03-19, 01
Processed: 2024-03-19, 02
Processed: 2024-03-19, 03
Processed: 2024-03-20, 01
Processed: 2024-03-20, 02
Processed: 2024-03-20, 03
Processed: 2024-03-21, 01
Processed: 2024-03-21, 02
Processed: 2024-03-21, 03
Processed: 2024-03-22, 01
Processed: 2024-03-22, 02
Processed: 2

In [12]:
import requests
import pandas as pd
from datetime import datetime, timedelta

def get_apt_lttot_pblanc_score(page, perPage, house_manage_no, pblanc_no, reside_secd, api_key):
    base_url = "https://api.odcloud.kr/api"
    api_url = "/ApplyhomeInfoCmpetRtSvc/v1/getAPTLttotPblancCmpet"
    
    params = {
        "page": page,
        "perPage": perPage,
        "returnType": "JSON",
        "cond[HOUSE_MANAGE_NO::EQ]": house_manage_no,
        "cond[PBLANC_NO::EQ]": pblanc_no,
        "cond[RESIDE_SECD::EQ]": reside_secd,
        "serviceKey": api_key
    }

    response = requests.get(base_url + api_url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Error: {response.status_code}")
        return None

def get_lttot_pblanc_score_data(start_date, end_date, api_key):
    page = 1
    perPage = 1000  # 한 번에 가져올 데이터 수
    df = pd.DataFrame()

    current_date = start_date
    while current_date <= end_date:
        house_manage_no = current_date.strftime("%Y00%m%d")
        pblanc_no = current_date.strftime("%Y00%m%d")

        for reside_secd in ["01", "02", "03"]:
            data = get_apt_lttot_pblanc_score(page, perPage, house_manage_no, pblanc_no, reside_secd, api_key)

            if data:
                temp_df = pd.DataFrame(data['data'])
                temp_df['PBLANC_DE'] = current_date.strftime("%Y-%m-%d")
                temp_df['RESIDE_SECD'] = reside_secd
                df = pd.concat([df, temp_df], ignore_index=True)

            print(f"Processed: {current_date.strftime('%Y-%m-%d')}, {reside_secd}")

        current_date += timedelta(days=1)

    return df

# API 키 설정
api_key = "vk/AjAkbf0K4e9bDC7RWG+2uj9hSsRVSVOe4WtENZY1dLBUec1AyEgn9AnEPksMUKQ/vDw+lLuRgusRy5OOLfA=="

# 시작일과 종료일 설정
start_date = datetime(2022, 3, 1).date()
end_date = datetime.now().date()

# 데이터 가져오기
df_lttot_competition_score = get_lttot_pblanc_score_data(start_date, end_date, api_key)

# 출력 옵션 설정
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)

# 결과 출력
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_lttot_pblanc_score)

Processed: 2022-03-01, 01
Processed: 2022-03-01, 02
Processed: 2022-03-01, 03
Processed: 2022-03-02, 01
Processed: 2022-03-02, 02
Processed: 2022-03-02, 03
Processed: 2022-03-03, 01
Processed: 2022-03-03, 02
Processed: 2022-03-03, 03
Processed: 2022-03-04, 01
Processed: 2022-03-04, 02
Processed: 2022-03-04, 03
Processed: 2022-03-05, 01
Processed: 2022-03-05, 02
Processed: 2022-03-05, 03
Processed: 2022-03-06, 01
Processed: 2022-03-06, 02
Processed: 2022-03-06, 03
Processed: 2022-03-07, 01
Processed: 2022-03-07, 02
Processed: 2022-03-07, 03
Processed: 2022-03-08, 01
Processed: 2022-03-08, 02
Processed: 2022-03-08, 03
Processed: 2022-03-09, 01
Processed: 2022-03-09, 02
Processed: 2022-03-09, 03
Processed: 2022-03-10, 01
Processed: 2022-03-10, 02
Processed: 2022-03-10, 03
Processed: 2022-03-11, 01
Processed: 2022-03-11, 02
Processed: 2022-03-11, 03
Processed: 2022-03-12, 01
Processed: 2022-03-12, 02
Processed: 2022-03-12, 03
Processed: 2022-03-13, 01
Processed: 2022-03-13, 02
Processed: 2

Processed: 2022-06-14, 03
Processed: 2022-06-15, 01
Processed: 2022-06-15, 02
Processed: 2022-06-15, 03
Processed: 2022-06-16, 01
Processed: 2022-06-16, 02
Processed: 2022-06-16, 03
Processed: 2022-06-17, 01
Processed: 2022-06-17, 02
Processed: 2022-06-17, 03
Processed: 2022-06-18, 01
Processed: 2022-06-18, 02
Processed: 2022-06-18, 03
Processed: 2022-06-19, 01
Processed: 2022-06-19, 02
Processed: 2022-06-19, 03
Processed: 2022-06-20, 01
Processed: 2022-06-20, 02
Processed: 2022-06-20, 03
Processed: 2022-06-21, 01
Processed: 2022-06-21, 02
Processed: 2022-06-21, 03
Processed: 2022-06-22, 01
Processed: 2022-06-22, 02
Processed: 2022-06-22, 03
Processed: 2022-06-23, 01
Processed: 2022-06-23, 02
Processed: 2022-06-23, 03
Processed: 2022-06-24, 01
Processed: 2022-06-24, 02
Processed: 2022-06-24, 03
Processed: 2022-06-25, 01
Processed: 2022-06-25, 02
Processed: 2022-06-25, 03
Processed: 2022-06-26, 01
Processed: 2022-06-26, 02
Processed: 2022-06-26, 03
Processed: 2022-06-27, 01
Processed: 2

Processed: 2022-09-28, 01
Processed: 2022-09-28, 02
Processed: 2022-09-28, 03
Processed: 2022-09-29, 01
Processed: 2022-09-29, 02
Processed: 2022-09-29, 03
Processed: 2022-09-30, 01
Processed: 2022-09-30, 02
Processed: 2022-09-30, 03
Processed: 2022-10-01, 01
Processed: 2022-10-01, 02
Processed: 2022-10-01, 03
Processed: 2022-10-02, 01
Processed: 2022-10-02, 02
Processed: 2022-10-02, 03
Processed: 2022-10-03, 01
Processed: 2022-10-03, 02
Processed: 2022-10-03, 03
Processed: 2022-10-04, 01
Processed: 2022-10-04, 02
Processed: 2022-10-04, 03
Processed: 2022-10-05, 01
Processed: 2022-10-05, 02
Processed: 2022-10-05, 03
Processed: 2022-10-06, 01
Processed: 2022-10-06, 02
Processed: 2022-10-06, 03
Processed: 2022-10-07, 01
Processed: 2022-10-07, 02
Processed: 2022-10-07, 03
Processed: 2022-10-08, 01
Processed: 2022-10-08, 02
Processed: 2022-10-08, 03
Processed: 2022-10-09, 01
Processed: 2022-10-09, 02
Processed: 2022-10-09, 03
Processed: 2022-10-10, 01
Processed: 2022-10-10, 02
Processed: 2

Processed: 2023-01-11, 02
Processed: 2023-01-11, 03
Processed: 2023-01-12, 01
Processed: 2023-01-12, 02
Processed: 2023-01-12, 03
Processed: 2023-01-13, 01
Processed: 2023-01-13, 02
Processed: 2023-01-13, 03
Processed: 2023-01-14, 01
Processed: 2023-01-14, 02
Processed: 2023-01-14, 03
Processed: 2023-01-15, 01
Processed: 2023-01-15, 02
Processed: 2023-01-15, 03
Processed: 2023-01-16, 01
Processed: 2023-01-16, 02
Processed: 2023-01-16, 03
Processed: 2023-01-17, 01
Processed: 2023-01-17, 02
Processed: 2023-01-17, 03
Processed: 2023-01-18, 01
Processed: 2023-01-18, 02
Processed: 2023-01-18, 03
Processed: 2023-01-19, 01
Processed: 2023-01-19, 02
Processed: 2023-01-19, 03
Processed: 2023-01-20, 01
Processed: 2023-01-20, 02
Processed: 2023-01-20, 03
Processed: 2023-01-21, 01
Processed: 2023-01-21, 02
Processed: 2023-01-21, 03
Processed: 2023-01-22, 01
Processed: 2023-01-22, 02
Processed: 2023-01-22, 03
Processed: 2023-01-23, 01
Processed: 2023-01-23, 02
Processed: 2023-01-23, 03
Processed: 2

Processed: 2023-04-27, 01
Processed: 2023-04-27, 02
Processed: 2023-04-27, 03
Processed: 2023-04-28, 01
Processed: 2023-04-28, 02
Processed: 2023-04-28, 03
Processed: 2023-04-29, 01
Processed: 2023-04-29, 02
Processed: 2023-04-29, 03
Processed: 2023-04-30, 01
Processed: 2023-04-30, 02
Processed: 2023-04-30, 03
Processed: 2023-05-01, 01
Processed: 2023-05-01, 02
Processed: 2023-05-01, 03
Processed: 2023-05-02, 01
Processed: 2023-05-02, 02
Processed: 2023-05-02, 03
Processed: 2023-05-03, 01
Processed: 2023-05-03, 02
Processed: 2023-05-03, 03
Processed: 2023-05-04, 01
Processed: 2023-05-04, 02
Processed: 2023-05-04, 03
Processed: 2023-05-05, 01
Processed: 2023-05-05, 02
Processed: 2023-05-05, 03
Processed: 2023-05-06, 01
Processed: 2023-05-06, 02
Processed: 2023-05-06, 03
Processed: 2023-05-07, 01
Processed: 2023-05-07, 02
Processed: 2023-05-07, 03
Processed: 2023-05-08, 01
Processed: 2023-05-08, 02
Processed: 2023-05-08, 03
Processed: 2023-05-09, 01
Processed: 2023-05-09, 02
Processed: 2

Processed: 2023-08-10, 03
Processed: 2023-08-11, 01
Processed: 2023-08-11, 02
Processed: 2023-08-11, 03
Processed: 2023-08-12, 01
Processed: 2023-08-12, 02
Processed: 2023-08-12, 03
Processed: 2023-08-13, 01
Processed: 2023-08-13, 02
Processed: 2023-08-13, 03
Processed: 2023-08-14, 01
Processed: 2023-08-14, 02
Processed: 2023-08-14, 03
Processed: 2023-08-15, 01
Processed: 2023-08-15, 02
Processed: 2023-08-15, 03
Processed: 2023-08-16, 01
Processed: 2023-08-16, 02
Processed: 2023-08-16, 03
Processed: 2023-08-17, 01
Processed: 2023-08-17, 02
Processed: 2023-08-17, 03
Processed: 2023-08-18, 01
Processed: 2023-08-18, 02
Processed: 2023-08-18, 03
Processed: 2023-08-19, 01
Processed: 2023-08-19, 02
Processed: 2023-08-19, 03
Processed: 2023-08-20, 01
Processed: 2023-08-20, 02
Processed: 2023-08-20, 03
Processed: 2023-08-21, 01
Processed: 2023-08-21, 02
Processed: 2023-08-21, 03
Processed: 2023-08-22, 01
Processed: 2023-08-22, 02
Processed: 2023-08-22, 03
Processed: 2023-08-23, 01
Processed: 2

Processed: 2023-11-24, 01
Processed: 2023-11-24, 02
Processed: 2023-11-24, 03
Processed: 2023-11-25, 01
Processed: 2023-11-25, 02
Processed: 2023-11-25, 03
Processed: 2023-11-26, 01
Processed: 2023-11-26, 02
Processed: 2023-11-26, 03
Processed: 2023-11-27, 01
Processed: 2023-11-27, 02
Processed: 2023-11-27, 03
Processed: 2023-11-28, 01
Processed: 2023-11-28, 02
Processed: 2023-11-28, 03
Processed: 2023-11-29, 01
Processed: 2023-11-29, 02
Processed: 2023-11-29, 03
Processed: 2023-11-30, 01
Processed: 2023-11-30, 02
Processed: 2023-11-30, 03
Processed: 2023-12-01, 01
Processed: 2023-12-01, 02
Processed: 2023-12-01, 03
Processed: 2023-12-02, 01
Processed: 2023-12-02, 02
Processed: 2023-12-02, 03
Processed: 2023-12-03, 01
Processed: 2023-12-03, 02
Processed: 2023-12-03, 03
Processed: 2023-12-04, 01
Processed: 2023-12-04, 02
Processed: 2023-12-04, 03
Processed: 2023-12-05, 01
Processed: 2023-12-05, 02
Processed: 2023-12-05, 03
Processed: 2023-12-06, 01
Processed: 2023-12-06, 02
Processed: 2

Processed: 2024-03-08, 02
Processed: 2024-03-08, 03
Processed: 2024-03-09, 01
Processed: 2024-03-09, 02
Processed: 2024-03-09, 03
Processed: 2024-03-10, 01
Processed: 2024-03-10, 02
Processed: 2024-03-10, 03
Processed: 2024-03-11, 01
Processed: 2024-03-11, 02
Processed: 2024-03-11, 03
Processed: 2024-03-12, 01
Processed: 2024-03-12, 02
Processed: 2024-03-12, 03
Processed: 2024-03-13, 01
Processed: 2024-03-13, 02
Processed: 2024-03-13, 03
Processed: 2024-03-14, 01
Processed: 2024-03-14, 02
Processed: 2024-03-14, 03
Processed: 2024-03-15, 01
Processed: 2024-03-15, 02
Processed: 2024-03-15, 03
Processed: 2024-03-16, 01
Processed: 2024-03-16, 02
Processed: 2024-03-16, 03
Processed: 2024-03-17, 01
Processed: 2024-03-17, 02
Processed: 2024-03-17, 03
Processed: 2024-03-18, 01
Processed: 2024-03-18, 02
Processed: 2024-03-18, 03
Processed: 2024-03-19, 01
Processed: 2024-03-19, 02
Processed: 2024-03-19, 03
Processed: 2024-03-20, 01
Processed: 2024-03-20, 02
Processed: 2024-03-20, 03
Processed: 2

In [13]:
df_prizwinner_age 

Unnamed: 0,AGE_30,AGE_40,AGE_50,AGE_60,STAT_DE
0,9242,5178,2652,1218,202201
1,10888,5373,2519,1483,202201
2,14513,6842,3347,1815,202201
3,5579,3120,1454,737,202201
4,7211,3708,2040,1156,202201
5,11454,5557,2647,1338,202201
6,12782,6024,2945,1453,202201
7,8339,3862,1711,945,202201
8,9222,4331,2067,1221,202201
9,9623,4999,2526,1258,202201


In [None]:
# Converting score columns to numeric, handling non-numeric values
df_lttot_pblanc_score['AVRG_SCORE'] = pd.to_numeric(df_lttot_pblanc_score['AVRG_SCORE'], errors='coerce')
df_lttot_pblanc_score['LWET_SCORE'] = pd.to_numeric(df_lttot_pblanc_score['LWET_SCORE'], errors='coerce')
df_lttot_pblanc_score['TOP_SCORE'] = pd.to_numeric(df_lttot_pblanc_score['TOP_SCORE'], errors='coerce')

# Adding a common key to both dataframes for merging
df_prizwinner_age['PBLANC_DE'] = '202201'

# Merging the age data with score data
df_combined_final = pd.merge(df_lttot_pblanc_score, df_prizwinner_age, how='left', left_on='PBLANC_DE', right_on='STAT_DE')

# Dropping the STAT_DE column as it is redundant after the merge
df_combined_final.drop(columns=['STAT_DE'], inplace=True)

import ace_tools as tools; tools.display_dataframe_to_user(name="Combined Data for Prediction Model", dataframe=df_combined_final)

df_combined_final.head()

In [58]:
# 선택하여 열 이름 변경하기
df_prizwinner_age = df_prizwinner_age.rename(columns={'STAT_DE':'PBLANC_DE'})

In [59]:
df_prizwinner_age.tail()

Unnamed: 0,AGE_30,AGE_40,AGE_50,AGE_60,PBLANC_DE
3595,7508,3597,1719,970,202412
3596,6114,2718,1299,712,202412
3597,7664,3110,1477,757,202412
3598,6542,3040,1402,671,202412
3599,4324,2518,1439,645,202412


In [39]:
df_lttot_pblanc_score.head()

Unnamed: 0,AVRG_SCORE,HOUSE_MANAGE_NO,HOUSE_TY,LWET_SCORE,MODEL_NO,PBLANC_NO,RESIDE_SECD,RESIDE_SENM,TOP_SCORE,PBLANC_DE
0,66.0,2022000301,084.4666A,60.0,1,2022000301,1,해당지역,73.0,2022-03-01
1,62.6,2022000301,084.2149B,60.0,2,2022000301,1,해당지역,69.0,2022-03-01
2,,2022000301,122.0903,,3,2022000301,1,해당지역,,2022-03-01
3,,2022000301,140.4553,,4,2022000301,1,해당지역,,2022-03-01
4,0.0,2022000301,084.4666A,0.0,1,2022000301,2,기타지역,0.0,2022-03-01


In [61]:
df_lttot_pblanc_score['PBLANC_DE'] = pd.to_datetime(df_lttot_pblanc_score['PBLANC_DE']).dt.strftime('%Y%m')

In [63]:
df_lttot_pblanc_score.head()

Unnamed: 0,AVRG_SCORE,HOUSE_MANAGE_NO,HOUSE_TY,LWET_SCORE,MODEL_NO,PBLANC_NO,RESIDE_SECD,RESIDE_SENM,TOP_SCORE,PBLANC_DE
0,66.0,2022000301,084.4666A,60.0,1,2022000301,1,해당지역,73.0,202203
1,62.6,2022000301,084.2149B,60.0,2,2022000301,1,해당지역,69.0,202203
2,,2022000301,122.0903,,3,2022000301,1,해당지역,,202203
3,,2022000301,140.4553,,4,2022000301,1,해당지역,,202203
4,0.0,2022000301,084.4666A,0.0,1,2022000301,2,기타지역,0.0,202203


In [81]:
# 'PBLANC_DE' 컬럼의 데이터 타입을 문자열로 통일
df_prizwinner_age['PBLANC_DE'] = df_prizwinner_age['PBLANC_DE'].astype(str)
df_lttot_pblanc_score['PBLANC_DE'] = df_lttot_pblanc_score['PBLANC_DE'].astype(str)

# pd.concat() 함수를 사용하여 두 데이터프레임을 연결
merged_df = pd.merge(df_prizwinner_age, df_lttot_pblanc_score, on='PBLANC_DE', how='inner')

merged_df.count()

AGE_30             194100
AGE_40             194100
AGE_50             194100
AGE_60             194100
PBLANC_DE          194100
AVRG_SCORE          64900
HOUSE_MANAGE_NO    194100
HOUSE_TY           194100
LWET_SCORE          64900
MODEL_NO           194100
PBLANC_NO          194100
RESIDE_SECD        194100
RESIDE_SENM        194100
TOP_SCORE           64900
dtype: int64

In [99]:
# pd.concat() 함수를 사용하여 두 데이터프레임을 연결
merged_df = pd.merge(df_prizwinner_age, df_lttot_pblanc_score, on='PBLANC_DE', how='inner')

merged_df.count()

AGE_30             194100
AGE_40             194100
AGE_50             194100
AGE_60             194100
PBLANC_DE          194100
AVRG_SCORE          64900
HOUSE_MANAGE_NO    194100
HOUSE_TY           194100
LWET_SCORE          64900
MODEL_NO           194100
PBLANC_NO          194100
RESIDE_SECD        194100
RESIDE_SENM        194100
TOP_SCORE           64900
dtype: int64

In [101]:
merged_df = merged_df.loc[merged_df['AVRG_SCORE'].notnull()]
merged_df.tail()

Unnamed: 0,AGE_30,AGE_40,AGE_50,AGE_60,PBLANC_DE,AVRG_SCORE,HOUSE_MANAGE_NO,HOUSE_TY,LWET_SCORE,MODEL_NO,PBLANC_NO,RESIDE_SECD,RESIDE_SENM,TOP_SCORE
194051,4324,2518,1439,645,202401,0.0,2024000124,084.7705T,0.0,6,2024000124,2,기타지역,0.0
194084,4324,2518,1439,645,202401,61.28,2024000129,084.9437A,54.0,1,2024000129,1,해당지역,79.0
194085,4324,2518,1439,645,202401,56.4,2024000129,084.9841B,44.0,2,2024000129,1,해당지역,73.0
194092,4324,2518,1439,645,202401,0.0,2024000129,084.9437A,0.0,1,2024000129,2,기타지역,0.0
194093,4324,2518,1439,645,202401,0.0,2024000129,084.9841B,0.0,2,2024000129,2,기타지역,0.0


In [85]:
df_lttot_competition_score['PBLANC_DE'] = pd.to_datetime(df_lttot_competition_score['PBLANC_DE']).dt.strftime('%Y%m')


Unnamed: 0,CMPET_RATE,HOUSE_MANAGE_NO,HOUSE_TY,MODEL_NO,PBLANC_NO,REQ_CNT,RESIDE_SECD,RESIDE_SENM,SUBSCRPT_RANK_CODE,SUPLY_HSHLDCO,PBLANC_DE
0,125.50,2022000301,084.4666A,1,2022000301,1506,1,해당지역,1.0,12.0,202203
1,-,2022000301,084.4666A,1,2022000301,0,1,해당지역,2.0,12.0,202203
2,40.42,2022000301,084.2149B,2,2022000301,485,1,해당지역,1.0,12.0,202203
3,-,2022000301,084.2149B,2,2022000301,0,1,해당지역,2.0,12.0,202203
4,28.18,2022000301,122.0903,3,2022000301,479,1,해당지역,1.0,17.0,202203


In [102]:
df_lttot_competition_score = df_lttot_competition_score.loc[~(df_lttot_competition_score['CMPET_RATE'].str.contains('△') | df_lttot_competition_score['CMPET_RATE'].isin(['-']))]

In [103]:
df_lttot_competition_score.tail()

Unnamed: 0,CMPET_RATE,HOUSE_MANAGE_NO,HOUSE_TY,MODEL_NO,PBLANC_NO,REQ_CNT,RESIDE_SECD,RESIDE_SENM,SUBSCRPT_RANK_CODE,SUPLY_HSHLDCO,PBLANC_DE
3854,1.45,2024000129,102.9716,3,2024000129,230,1,해당지역,1.0,159.0,202401
3856,1.66,2024000129,121.9554,4,2024000129,274,1,해당지역,1.0,165.0,202401
3859,8.33,2024000129,139.9830A,5,2024000129,50,1,해당지역,2.0,121.0,202401
3862,1.8,2024000129,139.9600C,7,2024000129,223,1,해당지역,1.0,124.0,202401
3864,2.0,2024000129,166.9819,8,2024000129,18,1,해당지역,1.0,9.0,202401


In [104]:
# 'PBLANC_DE' 컬럼의 데이터 타입을 문자열로 통일
df_lttot_competition_score['PBLANC_DE'] = df_lttot_competition_score['PBLANC_DE'].astype(str)

# pd.concat() 함수를 사용하여 두 데이터프레임을 연결
complete_df = pd.merge(merged_df, df_lttot_competition_score, on='PBLANC_DE', how='outer')

In [106]:
complete_df.tail()

Unnamed: 0,AGE_30,AGE_40,AGE_50,AGE_60,PBLANC_DE,AVRG_SCORE,HOUSE_MANAGE_NO_x,HOUSE_TY_x,LWET_SCORE,MODEL_NO_x,PBLANC_NO_x,RESIDE_SECD_x,RESIDE_SENM_x,TOP_SCORE,CMPET_RATE,HOUSE_MANAGE_NO_y,HOUSE_TY_y,MODEL_NO_y,PBLANC_NO_y,REQ_CNT,RESIDE_SECD_y,RESIDE_SENM_y,SUBSCRPT_RANK_CODE,SUPLY_HSHLDCO
3372395,4324,2518,1439,645,202401,0.0,2024000129,084.9841B,0.0,2,2024000129,2,기타지역,0.0,1.45,2024000129,102.9716,3,2024000129,230,1,해당지역,1.0,159.0
3372396,4324,2518,1439,645,202401,0.0,2024000129,084.9841B,0.0,2,2024000129,2,기타지역,0.0,1.66,2024000129,121.9554,4,2024000129,274,1,해당지역,1.0,165.0
3372397,4324,2518,1439,645,202401,0.0,2024000129,084.9841B,0.0,2,2024000129,2,기타지역,0.0,8.33,2024000129,139.9830A,5,2024000129,50,1,해당지역,2.0,121.0
3372398,4324,2518,1439,645,202401,0.0,2024000129,084.9841B,0.0,2,2024000129,2,기타지역,0.0,1.8,2024000129,139.9600C,7,2024000129,223,1,해당지역,1.0,124.0
3372399,4324,2518,1439,645,202401,0.0,2024000129,084.9841B,0.0,2,2024000129,2,기타지역,0.0,2.0,2024000129,166.9819,8,2024000129,18,1,해당지역,1.0,9.0


In [107]:
complete_df = complete_df.loc[complete_df['HOUSE_TY_x'].eq(complete_df['HOUSE_TY_y'])]

In [111]:
complete_df.head()

Unnamed: 0,AGE_30,AGE_40,AGE_50,AGE_60,PBLANC_DE,AVRG_SCORE,HOUSE_MANAGE_NO_x,HOUSE_TY_x,LWET_SCORE,MODEL_NO_x,PBLANC_NO_x,RESIDE_SECD_x,RESIDE_SENM_x,TOP_SCORE,CMPET_RATE,HOUSE_MANAGE_NO_y,HOUSE_TY_y,MODEL_NO_y,PBLANC_NO_y,REQ_CNT,RESIDE_SECD_y,RESIDE_SENM_y,SUBSCRPT_RANK_CODE,SUPLY_HSHLDCO
0,9242,5178,2652,1218,202203,66.0,2022000301,084.4666A,60.0,1,2022000301,1,해당지역,73.0,125.5,2022000301,084.4666A,1,2022000301,1506,1,해당지역,1.0,12.0
51,9242,5178,2652,1218,202203,62.6,2022000301,084.2149B,60.0,2,2022000301,1,해당지역,69.0,40.42,2022000301,084.2149B,2,2022000301,485,1,해당지역,1.0,12.0
100,9242,5178,2652,1218,202203,0.0,2022000301,084.4666A,0.0,1,2022000301,2,기타지역,0.0,125.5,2022000301,084.4666A,1,2022000301,1506,1,해당지역,1.0,12.0
151,9242,5178,2652,1218,202203,0.0,2022000301,084.2149B,0.0,2,2022000301,2,기타지역,0.0,40.42,2022000301,084.2149B,2,2022000301,485,1,해당지역,1.0,12.0
204,9242,5178,2652,1218,202203,45.83,2022000304,071.6421B,39.0,1,2022000304,1,해당지역,52.0,7.88,2022000304,071.6421B,1,2022000304,63,1,해당지역,1.0,8.0


In [113]:
model_df = complete_df[['PBLANC_DE', 'AGE_30', 'AGE_40', 'AGE_50', 'AGE_60', 'AVRG_SCORE', 'HOUSE_MANAGE_NO_x', 'HOUSE_TY_x', 'LWET_SCORE', 'RESIDE_SENM_x', 'TOP_SCORE', 'CMPET_RATE']]

In [115]:
model_df.head()

Unnamed: 0,PBLANC_DE,AGE_30,AGE_40,AGE_50,AGE_60,AVRG_SCORE,HOUSE_MANAGE_NO_x,HOUSE_TY_x,LWET_SCORE,RESIDE_SENM_x,TOP_SCORE,CMPET_RATE
0,202203,9242,5178,2652,1218,66.0,2022000301,084.4666A,60.0,해당지역,73.0,125.5
51,202203,9242,5178,2652,1218,62.6,2022000301,084.2149B,60.0,해당지역,69.0,40.42
100,202203,9242,5178,2652,1218,0.0,2022000301,084.4666A,0.0,기타지역,0.0,125.5
151,202203,9242,5178,2652,1218,0.0,2022000301,084.2149B,0.0,기타지역,0.0,40.42
204,202203,9242,5178,2652,1218,45.83,2022000304,071.6421B,39.0,해당지역,52.0,7.88


In [116]:
zero_count = model_df['AVRG_SCORE'].value_counts().get(0.0, 0)
print(f"AVRG_SCORE 컬럼에서 0.0인 값의 개수: {zero_count}개")

AVRG_SCORE 컬럼에서 0.0인 값의 개수: 31300개


In [119]:
model_df = model_df.loc[(model_df['AVRG_SCORE'] != 0.0) | (model_df['CMPET_RATE'] != 0)]

In [122]:
model_df = model_df.loc[(model_df['AVRG_SCORE'] != 0.00) | (model_df['CMPET_RATE'] != 0)]

In [123]:
model_df.head()

Unnamed: 0,PBLANC_DE,AGE_30,AGE_40,AGE_50,AGE_60,AVRG_SCORE,HOUSE_MANAGE_NO_x,HOUSE_TY_x,LWET_SCORE,RESIDE_SENM_x,TOP_SCORE,CMPET_RATE
0,202203,9242,5178,2652,1218,66.0,2022000301,084.4666A,60.0,해당지역,73.0,125.5
51,202203,9242,5178,2652,1218,62.6,2022000301,084.2149B,60.0,해당지역,69.0,40.42
100,202203,9242,5178,2652,1218,0.0,2022000301,084.4666A,0.0,기타지역,0.0,125.5
151,202203,9242,5178,2652,1218,0.0,2022000301,084.2149B,0.0,기타지역,0.0,40.42
204,202203,9242,5178,2652,1218,45.83,2022000304,071.6421B,39.0,해당지역,52.0,7.88


In [124]:
model_df['AVRG_SCORE'] = model_df['AVRG_SCORE'].astype(float)
model_df['LWET_SCORE'] = model_df['LWET_SCORE'].astype(float)
model_df['TOP_SCORE'] = model_df['TOP_SCORE'].astype(float)
model_df['CMPET_RATE'] = model_df['CMPET_RATE'].astype(float)

In [126]:
model_df.head()

Unnamed: 0,PBLANC_DE,AGE_30,AGE_40,AGE_50,AGE_60,AVRG_SCORE,HOUSE_MANAGE_NO_x,HOUSE_TY_x,LWET_SCORE,RESIDE_SENM_x,TOP_SCORE,CMPET_RATE
0,202203,9242,5178,2652,1218,66.0,2022000301,084.4666A,60.0,해당지역,73.0,125.5
51,202203,9242,5178,2652,1218,62.6,2022000301,084.2149B,60.0,해당지역,69.0,40.42
100,202203,9242,5178,2652,1218,0.0,2022000301,084.4666A,0.0,기타지역,0.0,125.5
151,202203,9242,5178,2652,1218,0.0,2022000301,084.2149B,0.0,기타지역,0.0,40.42
204,202203,9242,5178,2652,1218,45.83,2022000304,071.6421B,39.0,해당지역,52.0,7.88


In [129]:
df = model_df.loc[model_df['AVRG_SCORE'] > 0]

In [132]:
df.head()

Unnamed: 0,PBLANC_DE,AGE_30,AGE_40,AGE_50,AGE_60,AVRG_SCORE,HOUSE_MANAGE_NO_x,HOUSE_TY_x,LWET_SCORE,RESIDE_SENM_x,TOP_SCORE,CMPET_RATE
0,202203,9242,5178,2652,1218,66.0,2022000301,084.4666A,60.0,해당지역,73.0,125.5
51,202203,9242,5178,2652,1218,62.6,2022000301,084.2149B,60.0,해당지역,69.0,40.42
204,202203,9242,5178,2652,1218,45.83,2022000304,071.6421B,39.0,해당지역,52.0,7.88
255,202203,9242,5178,2652,1218,49.36,2022000304,074.9423A,43.0,해당지역,60.0,12.0
306,202203,9242,5178,2652,1218,39.0,2022000304,074.9945C,31.0,해당지역,52.0,5.0


In [161]:
# 학습완료 모델 : rf_model

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 특성과 타겟 변수 분리
X = df[['AGE_30', 'AGE_40', 'AGE_50', 'AGE_60', 'AVRG_SCORE', 'LWET_SCORE', 'TOP_SCORE']]  # 예시로 일부 특성만 사용
y = df['CMPET_RATE']

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 랜덤 포레스트 모델 학습
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# 모델 평가
y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')

# 사용자 입력 받기
adr_do = input('거주 지역(도/광역시 등): ')
recruit_date = input('모집 공고일 또는 청약 접수일: ')
age = int(input('나이: '))
score = float(input('가점 계산 점수: '))

# 사용자 입력을 기반으로 예측
user_data = pd.DataFrame({
    'AGE_30': [age >= 30 and age < 40],
    'AGE_40': [age >= 40 and age < 50],
    'AGE_50': [age >= 50 and age < 60],
    'AGE_60': [age >= 60],
    'AVRG_SCORE': [score],
    'LWET_SCORE': [score * 0.9],
    'TOP_SCORE': [score * 1.1]
})

win_prob = rf_model.predict(user_data)[0]

# 당첨 확률에 따른 분류
if win_prob < 30:
    win_rate = '낮음'
elif win_prob < 60:
    win_rate = '보통'
else:
    win_rate = '높음'

print(f'당첨 확률: {win_prob:.2f}%')
print(f'당첨 가능성: {win_rate}')

Mean Squared Error: 1246.76
R-squared: 0.71
거주 지역(도/광역시 등): 서울특별시
모집 공고일 또는 청약 접수일: 20220302
나이: 30
가점 계산 점수: 50
당첨 확률: 26.07%
당첨 가능성: 낮음


In [162]:
import pickle

# 모델 학습 코드 생략
# 모델을 학습한 후 저장할 때, protocol=2를 사용하여 저장합니다.
with open('rf_model.pkl', 'wb') as file:
    pickle.dump(model, file, protocol=2)


In [158]:
import pandas as pd
import random

# 랜덤한 사용자 데이터 생성 함수
def generate_user_data():
    adr_do = random.choice(['서울특별시', '경기도', '인천광역시', '부산광역시', '대구광역시'])
    recruit_date = f'2023-{random.randint(1, 12):02d}-{random.randint(1, 28):02d}'
    age = random.randint(20, 65)
    score = random.uniform(0, 100)
    return adr_do, recruit_date, age, score

# 10개의 사용자 데이터 생성
user_data_list = [generate_user_data() for _ in range(10)]

# 사용자 데이터를 기반으로 예측
for i, user_data in enumerate(user_data_list, start=1):
    adr_do, recruit_date, age, score = user_data
    
    user_df = pd.DataFrame({
        'AGE_30': [age >= 30 and age < 40],
        'AGE_40': [age >= 40 and age < 50],
        'AGE_50': [age >= 50 and age < 60],
        'AGE_60': [age >= 60],
        'AVRG_SCORE': [score],
        'LWET_SCORE': [score * 0.9],
        'TOP_SCORE': [score * 1.1]
    })
    
    win_prob = rf_model.predict(user_df)[0]
    
    if win_prob < 30:
        win_rate = '낮음'
    elif win_prob < 60:
        win_rate = '보통'
    else:
        win_rate = '높음'
    
    print(f'사용자 {i}')
    print(f'거주 지역: {adr_do}')
    print(f'모집 공고일 또는 청약 접수일: {recruit_date}')
    print(f'나이: {age}')
    print(f'가점 계산 점수: {score:.2f}')
    print(f'당첨 확률: {win_prob:.2f}%')
    print(f'당첨 가능성: {win_rate}')
    print('---')

사용자 1
거주 지역: 경기도
모집 공고일 또는 청약 접수일: 2023-09-01
나이: 22
가점 계산 점수: 10.97
당첨 확률: 1.00%
당첨 가능성: 낮음
---
사용자 2
거주 지역: 부산광역시
모집 공고일 또는 청약 접수일: 2023-07-28
나이: 22
가점 계산 점수: 46.10
당첨 확률: 8.61%
당첨 가능성: 낮음
---
사용자 3
거주 지역: 서울특별시
모집 공고일 또는 청약 접수일: 2023-04-04
나이: 53
가점 계산 점수: 37.71
당첨 확률: 3.74%
당첨 가능성: 낮음
---
사용자 4
거주 지역: 서울특별시
모집 공고일 또는 청약 접수일: 2023-03-10
나이: 28
가점 계산 점수: 69.67
당첨 확률: 140.61%
당첨 가능성: 높음
---
사용자 5
거주 지역: 대구광역시
모집 공고일 또는 청약 접수일: 2023-11-19
나이: 48
가점 계산 점수: 96.05
당첨 확률: 524.64%
당첨 가능성: 높음
---
사용자 6
거주 지역: 인천광역시
모집 공고일 또는 청약 접수일: 2023-04-07
나이: 30
가점 계산 점수: 49.38
당첨 확률: 20.21%
당첨 가능성: 낮음
---
사용자 7
거주 지역: 서울특별시
모집 공고일 또는 청약 접수일: 2023-12-09
나이: 23
가점 계산 점수: 99.68
당첨 확률: 524.64%
당첨 가능성: 높음
---
사용자 8
거주 지역: 서울특별시
모집 공고일 또는 청약 접수일: 2023-04-26
나이: 30
가점 계산 점수: 80.83
당첨 확률: 524.64%
당첨 가능성: 높음
---
사용자 9
거주 지역: 인천광역시
모집 공고일 또는 청약 접수일: 2023-05-11
나이: 45
가점 계산 점수: 66.13
당첨 확률: 64.85%
당첨 가능성: 높음
---
사용자 10
거주 지역: 서울특별시
모집 공고일 또는 청약 접수일: 2023-12-23
나이: 61
가점 계산 점수: 31.17
당첨 확률: 2.71%
당첨 가능성: 낮음
---
