# Import

In [None]:
import csv
import os
import sys

import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm

## Add configuration file

In [None]:
sys.path.append("/home/jovyan/core/config/")
sys.path.append("/home/jovyan/core/util/")

In [None]:
from ALL import config 
from util import *

## Set condition

In [None]:
tqdm.pandas()
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 50)

In [None]:
data_type="AgNewsTitle"
vectorize_type = "doc2vec"

# Read data

In [None]:
df = pd.read_csv(
    f"../../Preprocessing/data/{data_type}/master.csv", index_col=0
)

In [None]:
with open(f"../../Preprocessing/data/{data_type}/class.csv", mode="r") as f:
    reader = csv.reader(f)
    class_labels = [label for label in reader]

In [None]:
max_vector_model_nums = config["vectorize"][vectorize_type]["max_model_num"]
vector_dims = config["vectorize"][vectorize_type]["dims"]

# Functions

In [None]:
def centralize_array(array):
    return array - np.mean(array, axis=0)

In [None]:
def normarize_array(array):
    return array / np.sqrt(np.sum(array * array, axis=1).reshape(-1, 1))

In [None]:
def normarize_vector(vector):
    return vector / np.sqrt(np.sum(vector * vector))

In [None]:
def get_average_vector(vectors):
    sum_vector = np.sum(vectors, axis=0)
    return normarize_vector(sum_vector)

# Centralize

In [None]:
vectors_path = f"../../Vectorize/data/{data_type}/{vectorize_type}/vector"
converted_vectors_path = f"../data/{data_type}/{vectorize_type}/vector"
for vector_model_num in range(max_vector_model_nums):
    for vector_dim in tqdm(vector_dims):
        vector = np.loadtxt(
            f"{vectors_path}/{vector_dim}/{vector_model_num}.csv",
            delimiter=",",
        )
        centralized_vector = centralize_array(vector)
        normarized_vector = normarize_array(centralized_vector)

        np.save(
            make_filepath(
                f"{converted_vectors_path}/{vector_dim}/centralized/{vector_model_num}.npy"
            ),
            centralized_vector,
        )
        np.save(
            make_filepath(
                f"{converted_vectors_path}/{vector_dim}/normalized/{vector_model_num}.npy"
            ),
            normarized_vector,
        )