<a href="https://colab.research.google.com/github/garden-bell/computational_physics/blob/main/Computational_Physics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import math
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping


# 최대 출력 열 수 조정 (예: 50열)
pd.set_option('display.max_columns', 200)

In [None]:
# CSV 파일에서 데이터 불러오기
divorce_rate = pd.read_csv('data/divorce_rate.csv', header=None)
higher_education = pd.read_csv('data/higher_education.csv', header=None)
housing_prices = pd.read_csv('data/housing_prices.csv', header=None)
inflation_rate = pd.read_csv('data/inflation_rate.csv', header=None)
women_employment = pd.read_csv('data/women_employment.csv', header=None)
birth_rate = pd.read_csv('data/birth_rate.csv', header=None)

# 데이터 합치기 위해 값으로 변경
divorce_rate = divorce_rate.values.tolist()
higher_education = higher_education.values.tolist()
housing_prices = housing_prices.values.tolist()
inflation_rate = inflation_rate.values.tolist()
women_employment = women_employment.values.tolist()
birth_rate = birth_rate.values.tolist()

In [None]:
data = []
for i in range(len(divorce_rate)):
  for j in range(len(divorce_rate[i])):
    data.append([divorce_rate[i][j], higher_education[i][j], housing_prices[i][j], inflation_rate[i][j], women_employment[i][j], birth_rate[i][j]])

for i in range(len(data)) :
    for j in range(len(data[i])) :
        if math.isnan(data[i][j]) :
            via = 1
            while math.isnan(data[i + via][j]) and not(i + via == len(data)) :
                via += 1
            if (via + i == len(data)) :
                pass
            else :
                data[i][j] = data[i + via][j]


df = pd.DataFrame(data)

df.columns = ['divorce_rate', 'higher_education', 'housing_prices', 'inflation_rate', 'women_employment', 'birth_rate']

In [None]:
# 특성과 목표 변수 분리
X = df[['divorce_rate', 'higher_education', 'housing_prices', 'inflation_rate', 'women_employment']]
y = df['birth_rate']

# 데이터 분할: 훈련 세트와 테스트 세트
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# 데이터 스케일링
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# 모델 구축
model = Sequential()
model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))   # 입력층과 첫 번째 히든 레이어
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))   # 두 번째 히든 레이어
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))   # 세 번째 히든 레이어
model.add(Dropout(0.2))
model.add(Dense(1))   # 출력층

In [None]:
# 모델 컴파일
model.compile(optimizer=Adam(learning_rate=0.0005), loss='mean_squared_error')

# 조기 종료 설정
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

# 모델 학습
history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=10, validation_split=0.2, callbacks=[early_stopping])

***First***, We make a predict by using 2013 ~ 2022 Korea Data, Kosis

In [None]:
# 한국 데이터 불러오기
divorce_rate_korea = pd.read_csv('data/divorce_rate_korea.csv', header=None)
higher_education_korea = pd.read_csv('data/higher_education_korea.csv', header=None)
housing_prices_korea = pd.read_csv('datahousing_prices_korea.csv', header=None)
inflation_rate_korea = pd.read_csv('data/inflation_rate_korea.csv', header=None)
women_employment_korea = pd.read_csv('data/women_employment_korea.csv', header=None)

# 데이터 합치기 위해 값으로 변경
divorce_rate_korea = divorce_rate_korea.values.tolist()
higher_education_korea = higher_education_korea.values.tolist()
housing_prices_korea = housing_prices_korea.values.tolist()
inflation_rate_korea = inflation_rate_korea.values.tolist()
women_employment_korea = women_employment_korea.values.tolist()

data = []
for i in range(len(divorce_rate_korea)):
  for j in range(len(divorce_rate_korea[i])):
    data.append([divorce_rate_korea[i][j], higher_education_korea[i][j], housing_prices_korea[i][j], inflation_rate_korea[i][j], women_employment_korea[i][j]])

df = pd.DataFrame(data)

df.columns = ['divorce_rate', 'higher_education', 'housing_prices', 'inflation_rate', 'women_employment']

In [None]:
# 한국 데이터 스케일링
korea_data_scaled = scaler.transform(df[['divorce_rate', 'higher_education', 'housing_prices', 'inflation_rate', 'women_employment']])

# 예측 수행
birth_rate_korea_pred = model.predict(korea_data_scaled)

print(birth_rate_korea_pred)

# 연도별 예측 결과 출력
for year, pred in zip(range(2013, 2023), birth_rate_korea_pred):
    print(f'{year}년 예측된 한국의 출산율: {pred[0]}')

In [None]:
# 그래프 생성
plt.figure(figsize=(20, 5))

plt.subplot(1, 2, 1)
plt.plot(range(2013, 2023), birth_rate_korea_pred, marker='o', linestyle='-', color='b', label='Birth Rate Prediction')

# 그래프 제목 추가
plt.title('Predicted Birth Rate in Korea')

# x축, y축 레이블 추가
plt.xlabel('Year')
plt.ylabel('Birth Rate')
plt.ylim(0.7, 2.5)
plt.xticks(range(2013,2023))

birth_rate_korea_real = [1.19, 1.21, 1.24, 1.17, 1.05, 0.98, 0.92, 0.84, 0.81, 0.78]

plt.subplot(1, 2, 2)
plt.title('Real Birth Rate in Korea')
plt.plot(range(2013, 2023), birth_rate_korea_real, marker='o', linestyle='-', color='b', label='Birth Rate Prediction')
plt.xlabel('Year')
plt.ylabel('Birth Rate')
plt.ylim(0.7, 2.5)
plt.xticks(range(2013,2023))
plt.show()

***Second,*** We do same work by Poland Data, earned by OECD stats & data.

---


*Poland data is not used in training.*

In [None]:
# 폴란드 데이터 불러오기
divorce_rate_poland = pd.read_csv('data/divorce_rate_poland.csv', header=None)
higher_education_poland = pd.read_csv('data/higher_education_poland.csv', header=None)
housing_prices_poland = pd.read_csv('data/housing_prices_poland.csv', header=None)
inflation_rate_poland = pd.read_csv('data/inflation_rate_poland.csv', header=None)
women_employment_poland = pd.read_csv('data/women_employment_poland.csv', header=None)

# 데이터 합치기 위해 값으로 변경
divorce_rate_poland = divorce_rate_poland.values.tolist()
higher_education_poland = higher_education_poland.values.tolist()
housing_prices_poland = housing_prices_poland.values.tolist()
inflation_rate_poland = inflation_rate_poland.values.tolist()
women_employment_poland = women_employment_poland.values.tolist()

data = []
for i in range(len(divorce_rate_poland)):
  for j in range(len(divorce_rate_poland[i])):
    data.append([divorce_rate_poland[i][j], higher_education_poland[i][j], housing_prices_poland[i][j], inflation_rate_poland[i][j], women_employment_poland[i][j]])

df = pd.DataFrame(data)

df.columns = ['divorce_rate', 'higher_education', 'housing_prices', 'inflation_rate', 'women_employment']

# 폴란드 데이터 스케일링
poland_data_scaled = scaler.transform(df[['divorce_rate', 'higher_education', 'housing_prices', 'inflation_rate', 'women_employment']])

# 예측 수행
birth_rate_poland_pred = model.predict(poland_data_scaled)

print(birth_rate_poland_pred)

# 연도별 예측 결과 출력
for year, pred in zip(range(2013, 2023), birth_rate_poland_pred):
    print(f'{year}년 예측된 폴란드의 출산율: {pred[0]}')

# 그래프 생성
plt.figure(figsize=(20, 5))

plt.subplot(1, 2, 1)
plt.plot(range(2013, 2023), birth_rate_poland_pred, marker='o', linestyle='-', color='b', label='Birth Rate Prediction')

# 그래프 제목 추가
plt.title('Predicted Birth Rate in Poland')

# x축, y축 레이블 추가
plt.xlabel('Year')
plt.ylabel('Birth Rate')
plt.ylim(0.7, 2.5)
plt.xticks(range(2013,2023))

birth_rate_poland_real = [1.26, 1.29, 1.29, 1.36, 1.45, 1.44, 1.42, 1.39, 1.33, 1.26]

plt.subplot(1, 2, 2)
plt.title('Real Birth Rate in Poland')
plt.plot(range(2013, 2023), birth_rate_poland_real, marker='o', linestyle='-', color='b', label='Birth Rate Prediction')
plt.xlabel('Year')
plt.ylabel('Birth Rate')
plt.ylim(0.7, 2.5)
plt.xticks(range(2013,2023))
plt.show()


***Third,*** To conduct cross-validation for validation, use Spain data that also earned by OECD stats & data.

---

*Spain data is not used in training.*

In [None]:
# 스페인 데이터 불러오기
divorce_rate_spain = pd.read_csv('data/divorce_rate_spain.csv', header=None)
higher_education_spain = pd.read_csv('data/higher_education_spain.csv', header=None)
housing_prices_spain = pd.read_csv('data/housing_prices_spain.csv', header=None)
inflation_rate_spain = pd.read_csv('data/inflation_rate_spain.csv', header=None)
women_employment_spain = pd.read_csv('data/women_employment_spain.csv', header=None)

# 데이터 합치기 위해 값으로 변경
divorce_rate_spain = divorce_rate_spain.values.tolist()
higher_education_spain = higher_education_spain.values.tolist()
housing_prices_spain = housing_prices_spain.values.tolist()
inflation_rate_spain = inflation_rate_spain.values.tolist()
women_employment_spain = women_employment_spain.values.tolist()

data = []
for i in range(len(divorce_rate_spain)):
  for j in range(len(divorce_rate_spain[i])):
    data.append([divorce_rate_spain[i][j], higher_education_spain[i][j], housing_prices_spain[i][j], inflation_rate_spain[i][j], women_employment_spain[i][j]])

df = pd.DataFrame(data)

df.columns = ['divorce_rate', 'higher_education', 'housing_prices', 'inflation_rate', 'women_employment']

# 스페인 데이터 스케일링
spain_data_scaled = scaler.transform(df[['divorce_rate', 'higher_education', 'housing_prices', 'inflation_rate', 'women_employment']])

# 예측 수행
birth_rate_spain_pred = model.predict(spain_data_scaled)

print(birth_rate_spain_pred)

# 연도별 예측 결과 출력
for year, pred in zip(range(2013, 2023), birth_rate_spain_pred):
    print(f'{year}년 예측된 스페인의 출산율: {pred[0]}')

# 그래프 생성
plt.figure(figsize=(20, 5))

plt.subplot(1, 2, 1)
plt.plot(range(2013, 2023), birth_rate_spain_pred, marker='o', linestyle='-', color='b', label='Birth Rate Prediction')

# 그래프 제목 추가
plt.title('Predicted Birth Rate in Spain')

# x축, y축 레이블 추가
plt.xlabel('Year')
plt.ylabel('Birth Rate')
plt.ylim(0.7, 2.5)
plt.xticks(range(2013,2023))

birth_rate_spain_real = [1.93, 1.89, 1.85, 1.82, 1.78, 1.75, 1.70, 1.63, 1.72, 1.80]

plt.subplot(1, 2, 2)
plt.title('Real Birth Rate in Spain')
plt.plot(range(2013, 2023), birth_rate_spain_real, marker='o', linestyle='-', color='b', label='Birth Rate Prediction')
plt.xlabel('Year')
plt.ylabel('Birth Rate')
plt.ylim(0.7, 2.5)
plt.xticks(range(2013,2023))
plt.show()
