In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor

# 예제 데이터셋 생성
data_A = {
    'year': [2000, 2001, 2002, 2003, 2004],
    'value_A': [10, 12, 15, 13, 14],
    'target': [100, 110, 130, 115, 120]
}
data_B = {
    'age': [20, 21, 22, 23, 24],
    'value_B': [5, 6, 7, 6, 8],
    'target': [100, 105, 120, 110, 125]
}

dataset_A = pd.DataFrame(data_A)
dataset_B = pd.DataFrame(data_B)

# 데이터셋 A에 대한 모델 훈련
X_A = dataset_A[['year', 'value_A']]
y_A = dataset_A['target']
X_A_train, X_A_test, y_A_train, y_A_test = train_test_split(X_A, y_A, test_size=0.2, random_state=0)
model_A = LinearRegression()
model_A.fit(X_A_train, y_A_train)

# 데이터셋 B에 대한 모델 훈련
X_B = dataset_B[['age', 'value_B']]
y_B = dataset_B['target']
X_B_train, X_B_test, y_B_train, y_B_test = train_test_split(X_B, y_B, test_size=0.2, random_state=0)
model_B = LinearRegression()
model_B.fit(X_B_train, y_B_train)

# 예측 수행
predictions_A = model_A.predict(X_A_test)
predictions_B = model_B.predict(X_B_test)

# 결과 결합
combined_predictions = (predictions_A[:len(predictions_B)] + predictions_B) / 2

print("Model A Predictions:", predictions_A)
print("Model B Predictions:", predictions_B)
print("Combined Predictions:", combined_predictions)


Model A Predictions: [125.]
Model B Predictions: [114.56521739]
Combined Predictions: [119.7826087]


In [73]:
import pandas as pd
import numpy as np

# 데이터 정의
data1 = {
    'year': [2000, 2001, 2002, np.nan],
    'value_A': [10, 12, np.nan, 13],
    'value_B': [np.nan, 6, 7, 6],
    'target': [100, 110, 130, np.nan]
}

data2 = {
    'year': [2000, 2000, 2001, 2001, 2002, np.nan],
    'value_A': [10, 12, np.nan, 13, 14, 12],
    'value_B': [np.nan, 6, 7, 7, 6, 8],
    'target': [100, 115, np.nan, 110, 130, np.nan]
}

data3 = {
    'year': [2000, 2000, 2000, 2001, 2001, 2001, np.nan],
    'value_A': [10, 12, np.nan, 13, 12, np.nan, 13],
    'value_B': [np.nan, 6, 7, 6, 8, 6, 7],
    'target': [100, 110, 130, 115, np.nan, 110, 130]
}

data4 = {
    'year': [2000, 2001, 2002, 2000, 2001, np.nan],
    'value_A': [10, 12, np.nan, 13, np.nan, 13],
    'value_B': [np.nan, 6, 7, 6, 8, 7],
    'target': [100, 130, 115, np.nan, 115, np.nan]
}

# 결측치를 처리할 함수
def fill_missing_year(df):
    for i in range(len(df)):
        if pd.isna(df.loc[i, 'year']):
            prev_year = df.loc[i-1, 'year'] if i > 0 else None
            next_year = df.loc[i+1, 'year'] if i < len(df) - 1 else None

            # 결측치를 채울 연도를 예측하는 로직
            if prev_year is not None and next_year is not None:
                # 두 연도 사이에 큰 차이가 없다면, 중간 연도 추정
                if next_year - prev_year == 1:
                    df.loc[i, 'year'] = prev_year + 1
                else:
                    # 간격이 2 이상인 경우, 결측치는 가장 가까운 연도를 선택
                    df.loc[i, 'year'] = prev_year + 1
            elif prev_year is not None:
                # 이전 값으로부터 추정
                df.loc[i, 'year'] = prev_year + 1
            elif next_year is not None:
                # 이후 값으로부터 추정
                df.loc[i, 'year'] = next_year - 1
    return df

# 데이터프레임 생성
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
df3 = pd.DataFrame(data3)
df4 = pd.DataFrame(data4)

# 결측치 처리
df1 = fill_missing_year(df1)
df2 = fill_missing_year(df2)
df3 = fill_missing_year(df3)
df4 = fill_missing_year(df4)

# 결과 출력
print(f"Data1 after filling missing year:\n{df1.tail(1)}\n")
print(f"Data2 after filling missing year:\n{df2.tail(1)}\n")
print(f"Data3 after filling missing year:\n{df3.tail(1)}\n")
print(f"Data4 after filling missing year:\n{df4.tail(1)}\n")


Data1 after filling missing year:
     year  value_A  value_B  target
3  2003.0     13.0      6.0     NaN

Data2 after filling missing year:
     year  value_A  value_B  target
5  2003.0     12.0      8.0     NaN

Data3 after filling missing year:
     year  value_A  value_B  target
6  2002.0     13.0      7.0   130.0

Data4 after filling missing year:
     year  value_A  value_B  target
5  2002.0     13.0      7.0     NaN



In [12]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# 데이터 프레임 생성
data1 = pd.DataFrame({'year': [2000, 2001, 2002, np.nan]})
data2 = pd.DataFrame({'year': [2000, 2000, 2001, 2001, 2002, np.nan]})
data3 = pd.DataFrame({'year': [2000, 2000, 2000, 2001, 2001, 2001, np.nan]})
data4 = pd.DataFrame({'year': [2000, 2001, 2000, 2001, np.nan]})

def create_lstm_model():
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(1, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

def fill_missing_values_with_lstm(data):
    # 데이터 스케일링
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)

    # 결측치가 있는 인덱스 저장
    missing_idx = data[data['year'].isna()].index

    # 결측치가 아닌 데이터로 모델 학습
    x_train = np.array([i for i in range(len(data)) if i not in missing_idx]).reshape(-1, 1, 1)
    y_train = data_scaled[~data.index.isin(missing_idx)]

    model = create_lstm_model()
    model.fit(x_train, y_train, epochs=100, verbose=0)

    # 결측치 예측
    x_missing = np.array(missing_idx).reshape(-1, 1, 1)
    y_missing_pred = model.predict(x_missing)

    # 스케일링 복원 및 데이터 채우기
    y_missing_pred_rescaled = scaler.inverse_transform(y_missing_pred)
    data.loc[missing_idx, 'year'] = y_missing_pred_rescaled

    return data

# 각 데이터 프레임에 결측치 채우기
data1_filled = fill_missing_values_with_lstm(data1)
data2_filled = fill_missing_values_with_lstm(data2)
data3_filled = fill_missing_values_with_lstm(data3)
data4_filled = fill_missing_values_with_lstm(data4)

# 결과 출력
print('Data1:', data1_filled)
print('Data2:', data2_filled)
print('Data3:', data3_filled)
print('Data4:', data4_filled)


ImportError: Traceback (most recent call last):
  File "C:\Users\Admin\AppData\Roaming\Python\Python312\site-packages\tensorflow\python\pywrap_tensorflow.py", line 70, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: DLL 초기화 루틴을 실행할 수 없습니다.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.