In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
# 한글 설정
# pip install koreanize_matplotlib
plt.rc('font', family='Malgun Gothic')
plt.rc('axes', unicode_minus=False)
# root = 'C:/workspace/python/project/data/'
root = 'G:/workspace/python/python_project/data/'

# 구글드라이브 : https://drive.google.com/drive/folders/1zIzm1o8-3uxcWSU2DoWpB8aV0Oxdfz_P?usp=sharing

In [None]:
'''
주거실태 데이터 프레임
'''
abode_family_df = pd.read_csv(root + '주거실태_Data/거처의_종류_및_거처__가구__가구원__시군구_20241120174650.csv', encoding = 'cp949')
abode_house_category = pd.read_csv(root + '주거실태_Data/주택의_종류별_주택__읍면동_연도_끝자리_0__5___시군구_그_외_연도__20241120174542.csv', encoding = 'cp949')

In [None]:
# 주거실태 정보 확인
abode_family_df.info()
print()
print('+=' * 20)
print()
abode_house_category.info()

In [None]:
'''
경제활동인구 데이터 프레임
'''
economy_activity_df = pd.read_csv(root + '주거실태_Data/경제활동인구.csv')

In [None]:
# 경제활동인구 정보 확인
display(economy_activity_df.info())

In [None]:
'''
인구수 데이터 프레임
'''
population_df = pd.read_csv(root + '인구수_Data/2021-2023_인구__가구_및_주택_–_읍면동_연도_끝자리_0__5___시군구_그_외_연도__20241121124805.csv', encoding = 'cp949')
seoul_pop_df = pd.read_csv(root + '인구수_Data/서울시_인구수.csv')

In [None]:
# 인구수 정보 확인
display(population_df.info())
print()
print('+=' * 20)
print()
display(seoul_pop_df.info())

In [None]:
# 주거실태 정보 확인
abode_family_df.info()
print()
print('+=' * 20)
print()
abode_house_category.info()
# 경제활동인구 정보 확인
display(economy_activity_df.info())
# 인구수 정보 확인
display(population_df.info())
print()
print('+=' * 20)
print()
display(seoul_pop_df.info())

In [None]:
abode_family_df.describe()

In [None]:
abode_family_df.columns

In [None]:
year_2021 = abode_family_df.columns.str.contains('2021')
year_2021[0] = True
year_2021[1] = True

year_2022 = abode_family_df.columns.str.contains('2022')
year_2022[0] = True
year_2022[1] = True

year_2023 = abode_family_df.columns.str.contains('2023')
year_2023[0] = True
year_2023[1] = True

In [None]:
abode_family_2021_df = abode_family_df.loc[:, year_2021]
abode_family_2022_df = abode_family_df.loc[:, year_2022]
abode_family_2023_df = abode_family_df.loc[:, year_2023]

columns_2021 = abode_family_2021_df.loc[0].values
columns_2022 = abode_family_2022_df.loc[0].values
columns_2023 = abode_family_2023_df.loc[0].values

abode_family_2021_df.columns = columns_2021
abode_family_2022_df.columns = columns_2022
abode_family_2023_df.columns = columns_2023

display(abode_family_2021_df[:3])
display(abode_family_2022_df[:3])
display(abode_family_2023_df[:3])

In [None]:
'''
0 행 삭제 클래스
'''
class ManagedDataFrame:
    def __init__(self, name, df):
        self.name = name
        self.df = df
        self.row_deleted = False  # 플래그 초기화

    def delete_row_once(self):
        if not self.row_deleted and 0 in self.df.index:
            self.df = self.df.drop(0)
            self.row_deleted = True
            print(f"{self.name}: 0행이 삭제되었습니다.")
        else:
            print(f"{self.name}: 삭제 작업이 이미 완료되었거나 0행이 없습니다.")

    # 객체 출력 시 사용자 정의 내용 반환
    def __repr__(self):
        return f"ManagedDataFrame(name={self.name}, row_deleted={self.row_deleted}, df=\n{self.df}\n)"

In [None]:
abode_family_2021_df = ManagedDataFrame('abode_family_2021', abode_family_2021_df)
abode_family_2022_df = ManagedDataFrame('abode_family_2022', abode_family_2022_df)
abode_family_2023_df = ManagedDataFrame('abode_family_2023', abode_family_2023_df)

abode_family_2021_df.delete_row_once()
abode_family_2022_df.delete_row_once()
abode_family_2023_df.delete_row_once()

In [None]:
abode_family_2021_df = abode_family_2021_df.df
abode_family_2022_df = abode_family_2022_df.df
abode_family_2023_df = abode_family_2023_df.df

In [None]:
abode_family_2021_df.columns

In [None]:
abode_family_2021_df

In [None]:
abode_family_2021_df = abode_family_2021_df.rename(columns={'행정구역별(시군구)':'구'})
abode_family_2022_df = abode_family_2022_df.rename(columns={'행정구역별(시군구)':'구'})
abode_family_2023_df = abode_family_2023_df.rename(columns={'행정구역별(시군구)':'구'})

In [None]:
abode_family_2021_df

In [None]:
# gu_2021 = abode_family_2021_df.loc[:,'구']
# 중복 제거
# unique_gu = gu_2021.drop_duplicates()
# DataFrame으로 변환
# unique_gu_df = unique_gu.to_frame(name='구')

# '구' 컬럼의 중복 제거
unique_gu_df = abode_family_2021_df[['구']].drop_duplicates()
# 인덱스 초기화
unique_gu_df = unique_gu_df.reset_index(drop=True)
print(unique_gu_df)

print()
print('+=' * 20)
print()

# '거처의 종류' 컬럼의 중복 제거
unique_type_df = abode_family_2021_df[['거처의 종류']].drop_duplicates()
# 인덱스 초기화
unique_type_df = unique_type_df.reset_index(drop=True)
print(unique_type_df)

In [None]:
abode_family_2021_df_sum = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "계")].reset_index(drop=True)
abode_family_2022_df_sum = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "계")].reset_index(drop=True)
abode_family_2023_df_sum = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "계")].reset_index(drop=True)

In [None]:
display(abode_family_2021_df_sum[:3])

In [None]:
abode_family_2021_df_house_sum = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "주택_계")].reset_index(drop=True)
abode_family_2022_df_house_sum = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "주택_계")].reset_index(drop=True)
abode_family_2023_df_house_sum = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "주택_계")].reset_index(drop=True)

In [None]:
display(abode_family_2022_df_house_sum[:5])

In [None]:
abode_family_2021_df_singlehouse = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "주택_단독주택")].reset_index(drop=True)
abode_family_2022_df_singlehouse = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "주택_단독주택")].reset_index(drop=True)
abode_family_2023_df_singlehouse = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "주택_단독주택")].reset_index(drop=True)

In [None]:
display(abode_family_2023_df_singlehouse[:5])

In [None]:
abode_family_2021_df_apt = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "주택_아파트")].reset_index(drop=True)
abode_family_2022_df_apt = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "주택_아파트")].reset_index(drop=True)
abode_family_2023_df_apt = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "주택_아파트")].reset_index(drop=True)

In [None]:
abode_family_2021_df_apt[:3]

In [None]:
abode_family_2021_df_coalitionhouse = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "주택_연립주택")].reset_index(drop=True)
abode_family_2022_df_coalitionhouse = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "주택_연립주택")].reset_index(drop=True)
abode_family_2023_df_coalitionhouse = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "주택_연립주택")].reset_index(drop=True)

In [None]:
abode_family_2021_df_coalitionhouse[:3]

In [None]:
abode_family_2021_df_multihouse = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "주택_다세대주택")].reset_index(drop=True)
abode_family_2022_df_multihouse = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "주택_다세대주택")].reset_index(drop=True)
abode_family_2023_df_multihouse = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "주택_다세대주택")].reset_index(drop=True)

In [None]:
abode_family_2021_df_multihouse

In [None]:
abode_family_2021_df_nonresident = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "주택_비주거용 건물 내 주택")].reset_index(drop=True)
abode_family_2022_df_nonresident = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "주택_비주거용 건물 내 주택")].reset_index(drop=True)
abode_family_2023_df_nonresident = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "주택_비주거용 건물 내 주택")].reset_index(drop=True)

In [None]:
abode_family_2021_df_nonresident[:3]

In [None]:
abode_family_2021_df_etchouse = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "주택 이외의 거처_계")].reset_index(drop=True)
abode_family_2022_df_etchouse = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "주택 이외의 거처_계")].reset_index(drop=True)
abode_family_2023_df_etchouse = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "주택 이외의 거처_계")].reset_index(drop=True)

In [None]:
abode_family_2021_df_etchouse[:3]

In [None]:
abode_family_2021_df_officetel = abode_family_2021_df.loc[(abode_family_2021_df["거처의 종류"] == "주택 이외의 거처_오피스텔")].reset_index(drop=True)
abode_family_2022_df_officetel = abode_family_2022_df.loc[(abode_family_2022_df["거처의 종류"] == "주택 이외의 거처_오피스텔")].reset_index(drop=True)
abode_family_2023_df_officetel = abode_family_2023_df.loc[(abode_family_2023_df["거처의 종류"] == "주택 이외의 거처_오피스텔")].reset_index(drop=True)

In [None]:
abode_family_2021_df_officetel[:3]

In [None]:
# 2021 주거실태_Data/거처의_종류_및_거처__가구__가구원__시군구
# 2021 전체
abode_family_2021_df
# 2021 계
abode_family_2021_df_sum
# 2021 주택_계
abode_family_2021_df_house_sum
# 2021 주택_단독주택
abode_family_2021_df_singlehouse
# 2021 주택_아파트
abode_family_2021_df_apt
# 2021 주택_연립주택
abode_family_2021_df_coalitionhouse
# 2021 주택_다세대주택
abode_family_2021_df_multihouse
# 2021 주택_비주거용 건물 내 주택
abode_family_2021_df_nonresident
# 2021 주택 이외의 거처_계
abode_family_2021_df_etchouse
# 2021 주택 이외의 거처_오피스텔
abode_family_2021_df_officetel

In [None]:
# 2022 주거실태_Data/거처의_종류_및_거처__가구__가구원__시군구
# 2022 전체
abode_family_2022_df
# 2022 계
abode_family_2022_df_sum
# 2022 주택_계
abode_family_2022_df_house_sum
# 2022 주택_단독주택
abode_family_2022_df_singlehouse
# 2022 주택_아파트
abode_family_2022_df_apt
# 2022 주택_연립주택
abode_family_2022_df_coalitionhouse
# 2022 주택_다세대주택
abode_family_2022_df_multihouse
# 2022 주택_비주거용 건물 내 주택
abode_family_2022_df_nonresident
# 2022 주택 이외의 거처_계
abode_family_2022_df_etchouse
# 2022 주택 이외의 거처_오피스텔
abode_family_2022_df_officetel

In [None]:
# 2023 주거실태_Data/거처의_종류_및_거처__가구__가구원__시군구
# 2023 전체
abode_family_2023_df
# 2023 계
abode_family_2023_df_sum
# 2023 주택_계
abode_family_2023_df_house_sum
# 2023 주택_단독주택
abode_family_2023_df_singlehouse
# 2023 주택_아파트
abode_family_2023_df_apt
# 2023 주택_연립주택
abode_family_2023_df_coalitionhouse
# 2023 주택_다세대주택
abode_family_2023_df_multihouse
# 2023 주택_비주거용 건물 내 주택
abode_family_2023_df_nonresident
# 2023 주택 이외의 거처_계
abode_family_2023_df_etchouse
# 2023 주택 이외의 거처_오피스텔
abode_family_2023_df_officetel

In [None]:
abode_house_category[:10]

In [None]:
category_2021 = abode_house_category.columns.str.contains('2021')
category_2021[0] = True
category_2021[1] = True

category_2022 = abode_house_category.columns.str.contains('2022')
category_2022[0] = True
category_2022[1] = True

category_2023 = abode_house_category.columns.str.contains('2023')
category_2023[0] = True
category_2023[1] = True

In [None]:
abode_house_category_2021_df = abode_house_category.loc[:, category_2021]
abode_house_category_2022_df = abode_house_category.loc[:, category_2022]
abode_house_category_2023_df = abode_house_category.loc[:, category_2023]

house_col_2021 = abode_house_category_2021_df.loc[0].values
house_col_2022 = abode_house_category_2022_df.loc[0].values
house_col_2023 = abode_house_category_2023_df.loc[0].values

abode_house_category_2021_df.columns = house_col_2021
abode_house_category_2022_df.columns = house_col_2022
abode_house_category_2023_df.columns = house_col_2023

display(abode_house_category_2021_df[:3])
display(abode_house_category_2022_df[:3])
display(abode_house_category_2023_df[:3])

In [None]:
abode_house_category_2021_df = ManagedDataFrame('abode_house_category_2021', abode_house_category_2021_df)
abode_house_category_2022_df = ManagedDataFrame('abode_house_category_2022', abode_house_category_2022_df)
abode_house_category_2023_df = ManagedDataFrame('abode_house_category_2023', abode_house_category_2023_df)

abode_house_category_2021_df.delete_row_once()
abode_house_category_2022_df.delete_row_once()
abode_house_category_2023_df.delete_row_once()

In [None]:
abode_house_category_2021_df = abode_house_category_2021_df.df
abode_house_category_2022_df = abode_house_category_2022_df.df
abode_house_category_2023_df = abode_house_category_2023_df.df

In [None]:
abode_house_category_2021_df.columns

In [None]:
abode_house_category_2021_df

In [None]:
abode_house_category_2021_df = abode_house_category_2021_df.rename(columns={'행정구역별(읍면동)':'구'})
abode_house_category_2022_df = abode_house_category_2022_df.rename(columns={'행정구역별(읍면동)':'구'})
abode_house_category_2023_df = abode_house_category_2023_df.rename(columns={'행정구역별(읍면동)':'구'})

In [None]:
'''
def drop_columns_once(df, column_names):
    if not hasattr(drop_columns_once, "executed"):
        drop_columns_once.executed = False

    if not drop_columns_once.executed:
        # 컬럼 삭제
        result = df.drop(columns=column_names)
        drop_columns_once.executed = True
        return result
    else:
        print("This function can only be executed once.")
        return df
'''

In [None]:
abode_house_category_2021_df = abode_house_category_2021_df.drop(columns=["단독주택-일반", "단독주택-다가구", "단독주택-영업겸용"])
abode_house_category_2022_df = abode_house_category_2022_df.drop(columns=["단독주택-일반", "단독주택-다가구", "단독주택-영업겸용"])
abode_house_category_2023_df = abode_house_category_2023_df.drop(columns=["단독주택-일반", "단독주택-다가구", "단독주택-영업겸용"])

In [None]:
abode_house_category_2021_df = abode_house_category_2021_df.rename(columns={'단독주택-계':'단독주택'})
abode_house_category_2022_df = abode_house_category_2022_df.rename(columns={'단독주택-계':'단독주택'})
abode_house_category_2023_df = abode_house_category_2023_df.rename(columns={'단독주택-계':'단독주택'})

In [None]:
display(abode_house_category_2021_df)

In [None]:
display(abode_house_category_2022_df)

In [None]:
display(abode_house_category_2023_df)