### BUILD NEIGHBORS

In [21]:
N_NEIGHBORS_MAX = 65 

class Neighbors:
    def __init__(self, 
                 name: str, 
                 pivot: pd.DataFrame, 
                 p: float, 
                 metric: str = 'minkowski', 
                 metric_params: object = None, 
                 exclude_self: bool = True,
                 ):
        self.name = name
        self.exclude_self = exclude_self
        self.p = p
        self.metric = metric
        self.neighbors = np.empty((0, 65), dtype=int)  # 빈 2D 배열로 초기화

        nn = NearestNeighbors(
            n_neighbors=N_NEIGHBORS_MAX, 
            p=p, 
            metric=metric, 
            metric_params=metric_params
        )
        
        # 이웃을 찾을 이전 window 벙뮈
        window_size = 10000
        col_names = pivot.columns
        index_name = pivot.index.name
        
        for t in range(len(pivot)) :
            # window size 까지의 데이터는 random NN 설정
            # 1 ~ window 범위의 데이터는 추후 버려야 함
            if t < window_size :
                update_array = np.random.permutation(np.arange(1, 66))
                self.neighbors = np.append(self.neighbors, [update_array], axis = 0)
                
            else :
                pvdf = pd.DataFrame(pivot.iloc[t-10000:t])
                pvdf.columns = [list(col_names)]
                pvdf = pvdf.rename_axis(index_name)
                nn.fit(pvdf)
                
                update_array = nn.kneighbors(pivot.iloc[t].values.reshape(1, -1), return_distance=False)
                update_array = update_array.reshape(-1)
                self.neighbors = np.append(self.neighbors, [update_array], axis = 0)
                # if t // 10000 == 0 :
                #    print(t,self.neighbors)
        self.columns = self.index = self.feature_values = self.feature_col = None

    def rearrange_feature_values(self, df: pd.DataFrame, feature_col: str) -> None:
        raise NotImplementedError()

    def make_nn_feature(self, n=5, agg=np.mean) -> pd.DataFrame:
        assert self.feature_values is not None, "should call rearrange_feature_values beforehand"

        start = 1 if self.exclude_self else 0

        pivot_aggs = pd.DataFrame(
            agg(self.feature_values[start:n,:,0], axis=0), 
            columns=self.columns, 
            index=self.index
        )

        dst = pivot_aggs.reset_index() # unstack().
        # print("dst.shape:", dst.shape)
        new_column_names = ['time_id', f'{self.feature_col}_nn{n}_{self.name}_{agg.__name__}'] # 3개를 예측했는데 2개만 들어왔다??
        dst.columns = new_column_names 
        return dst
    

class TimeIdNeighbors(Neighbors):
    def rearrange_feature_values(self, df: pd.DataFrame, feature_col: str) -> None:
        # feature_pivot = df.pivot(index='time_id', values=feature_col)
        # feature_pivot = feature_pivot.fillna(feature_pivot.mean())

        feature_df = df[['time_id', feature_col]]
        feature_df.set_index('time_id', inplace=True)
        feature_df = feature_df.fillna(feature_df.mean())

        feature_values = np.zeros((N_NEIGHBORS_MAX, feature_df.shape[0], 1))

        for i in range(N_NEIGHBORS_MAX):
            feature_values[i, :, 0] += feature_df.values[self.neighbors[:, i], 0]

        self.columns = list(feature_df.columns)
        self.index = list(feature_df.index)
        self.feature_values = feature_values
        self.feature_col = feature_col

    def __repr__(self) -> str:
        return f"time-id NN (name={self.name}, metric={self.metric}, p={self.p})"


In [23]:
import pickle

# time_id_neighbors 변수를 저장할 파일 이름
file_name = "time_id_neighbors.pkl"

# time_id_neighbors 변수를 Pickle 파일로 저장
with open(file_name, 'wb') as file:
    pickle.dump(time_id_neighbors, file)

print(f"{file_name} 파일이 성공적으로 저장되었습니다.")

time_id_neighbors.pkl 파일이 성공적으로 저장되었습니다.
