# Implements

In [2]:
import numpy as np
import os
import os.path
from tqdm import tqdm

from pathlib import Path

# Parameter Settings

In [3]:
# 데이터 셋 폴더 생성 위치
current_dir = Path('dataset_2.ipynb').parent
dest = current_dir / 'DATASET_32X32'

if not os.path.exists(dest):
    os.mkdir(dest)
    print("데이터 셋 폴더 생성")

데이터 셋 폴더 생성


In [4]:
# 샘플 수 설정
num_samples = 500

In [5]:
# SNR 범위 설정
SNR = range(-5, 21, 1)

In [6]:
classes = ["CONV", "HAMMING", "PRODUCT", "POLAR", "BCH", "RM", "GOLAY"]

# Channel Coding

In [94]:
# C(2,1,5) [27,31] (10111, 11001)
def CONV(datas, m = None):
    m_num = 4
    g_func = np.array([[False,True,True,True],[True,False, False,True]])
    code_rate = 1/2
    
    if m is None:
        m = np.zeros(m_num)
        datas.extend([0 for i in range(m_num)])

    result = []
    tr_m = np.array(m)

    for data in datas:
        for j in range(tr_m[g_func[0]].shape[0]):
            if j == 0:
                result_1 = (tr_m[g_func[0]][j]+data) % 2
            else:
                result_1 = (tr_m[g_func[0]][j]+result_1) % 2

        for j in range(tr_m[g_func[1]].shape[0]):
            if j == 0:
                result_2 = (tr_m[g_func[1]][j]+data) % 2
            else:
                result_2 = (tr_m[g_func[1]][j]+result_2) % 2

        result.extend([result_1, result_2])

        tr_m = np.roll(tr_m, 1)
        tr_m[0] = data
    
    return np.array(result)

In [95]:
# Product (8, 4)
def Product(msg):
    G = np.array([
        [1, 0, 1, 0, 0 ,0, 1, 0],
        [0, 1 ,1, 0, 0, 0, 0, 1],
        [0, 0, 0, 1, 0, 1, 1, 0],
        [0, 0, 0, 1, 1, 1, 0, 1]
    ], dtype='int')

    # msg = np.random.randint(0,2, (data_size, 4))
    codeword = np.dot(msg, G) %2

    return codeword

In [96]:
# Hamming (8, 4)
def Hamming(msg):
    G = np.array([
        [1, 1, 1, 0, 0 ,0, 0, 1],
        [1, 0 ,0, 1, 1, 0, 0, 1],
        [0, 1, 0, 1, 0, 1, 0, 1],
        [1, 1, 0, 1, 0, 0, 1, 0]
    ], dtype='int')

    # msg = np.random.randint(0,2, (data_size, 4))
    codeword = np.dot(msg, G) %2

    return codeword

In [97]:
# (n, k) Polar code

def make_F(power):
    F = np.array([[1,0],[1,1]])
    for i in range(1,power):
        first = np.concatenate((F,np.zeros((2**i, 2**i))), axis=1)
        second = np.concatenate((F,F), axis=1)
        F = np.concatenate((first, second), axis =0)

    return F
def Compute_z(z, k, i = 1):
    for j in range(i):
        z[(2*i,2*j)] = 2*z[(i,j)] - (z[(i,j)])**2
        z[(2*i,2*j+1)] = (z[(i,j)])**2
    if 2*i < 2**k:
        z = Compute_z(z, k, 2*i)

    return z

def Frozen_bits(n, z, slice_index):
    bit_index = np.zeros(n)
    for i in range(n):
        bit_index[i] = z[(n, i)]

    bit_index = np.argsort(bit_index)[::-1]

    frozen_bit_index = bit_index[:slice_index]
    message_bit_index = bit_index[slice_index:]
    
    return np.sort(frozen_bit_index), np.sort(message_bit_index)

def Polar(msg):
    k = 16
    n = 2*k
    power = int(np.log2(n))

    F = make_F(power)

    z = {}
    z[(1,0)] = 0.5
    z = Compute_z(z, power)

    frozen_bit_index, message_bit_index = Frozen_bits(n, z, int(n-k))

    # msg = np.random.randint(0,2,(k))
    
    u = np.zeros((n))
    u[message_bit_index] = msg

    codeword = np.dot(u ,F) %2
    codeword_int = codeword.astype(int)
    
    return codeword_int

In [98]:
# BCH code (15, 7)
def BCH(msg):
    c_length = 15
    m_length = 7

    # BCH 15,7
    # G(x) = x^8+x^7+x^6+x^4+1
    G_X_15_7 = [1,1,1,0,1,0,0,0,1]

    BCH_15_7_G = np.zeros((7,15))

    for i in range(7):
        BCH_15_7_G[i,i:i+9] = G_X_15_7

    for i in range(7):
        for j in range(i+1,7):
            if BCH_15_7_G[i, j] == 1:
                BCH_15_7_G[i] = (BCH_15_7_G[i] + BCH_15_7_G[j])%2

    G = BCH_15_7_G # G =BCH_7_4_G
    # msg = np.random.randint(0, 2,(1, msg_length))
    codeword = np.dot(msg, G)%2
    codeword_int = codeword.astype(int)
    
    return codeword_int

In [99]:
#RM code(16, 11), r=3까지는 가능, 4부터는 안됨

def G_matrix(length, m, r):
    G = np.ones(length)
    for i in range(m):
        v = np.zeros((int(length/(2**(i+1)))))
        v = np.hstack((v, np.ones((int(length/(2**(i+1)))))))
        while v.shape[0] < length :
            v = np.hstack((v, np.zeros((int(length/(2**(i+1)))))))
            v = np.hstack((v, np.ones((int(length/(2**(i+1)))))))
        G = np.vstack((G,v))
    if r == 1:
        return G
    elif r > 1 :
        for i in range(1,m):
            for j in range(i+1,m+1):
                G = np.vstack((G,(G[i]*G[j])))
        if r == 3:
            G = np.vstack((G,(G[1]*G[2]*G[3])))
            G = np.vstack((G,(G[1]*G[3]*G[4])))
            G = np.vstack((G,(G[1]*G[2]*G[4])))
            G = np.vstack((G,(G[2]*G[3]*G[4])))
        return G
    return G

def RM(msg):
    m = 4
    r = 2
    length = 2**m

    if r == 1:
        masking_length=0
        msg_length = m+r
    elif r == 2:
        masking_length=6
        msg_length = 11
    elif r == 3:
        masking_length=10
        msg_length = 15

    G = G_matrix(length, m, r)
    # msg = np.random.randint(2,(1,msg_length))
    codeword = np.dot(msg, G) %2
    codeword_int = codeword.astype(int)
    
    return codeword_int

In [100]:
# (23, 12) Golay code
def Golay(msg):
    m_len = 12

    P = np.array([
        [1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1],
        [1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1],
        [1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1],
        [1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1],
        [1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0],
        [0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0],
        [0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1],
        [1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0],
        [0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0],
        [0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1],
        [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0],
        [0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1],
    ], dtype='int')

    G = np.concatenate((np.eye(m_len, dtype="int"), P), axis=1)

    # msg = np.random.randint(2, size=(1, m_len))
    codeword = np.dot(msg, G) %2
    
    return codeword

# Modulation

In [40]:
class Modulation():
    def __init__(self, case):
        if case == 1:
            self.M = 1
            self.padding_len = 0

    def mod(self, data):
        if len(data)%self.M != 0:
            data = self.padding(data)

        self.data_len = int(len(data)/self.M)

        if self.M == 1:
            return self.BPSK_modulation(data)

    def demod(self, data):
        self.M = 1
        if self.M == 1:
            return self.BPSK_demodulation(data)


    def padding(self, data):
        self.padding_len = int(self.M-len(data)%self.M)
        paddinng_data = np.concatenate((data, np.zeros(self.padding_len)))
        return paddinng_data
    
    def toInt(self, data):
        power_list = [2**i for i in range(self.M)][::-1]
        int_data = np.zeros((self.data_len), dtype=int)
        for i in range(self.data_len):
            int_data[i] += int(np.dot(power_list,data[self.M*i:self.M*i+self.M]))

        return int_data
    
    def BPSK_modulation(self, data):
        bpsk_data = np.zeros((self.data_len), dtype=complex)
        for i in range(self.data_len):
            bpsk_data[i] = (-1)**data[i]

        return bpsk_data


    def BPSK_demodulation(self, data):
        demod_data = np.zeros((self.data_len))
        for i in range(self.data_len):
            if data[i] > 0:
                demod_data[i] = 0
            else:
                demod_data[i] = 1

        return demod_data


# Channel Noise

In [15]:
def AWGN(dB, code_rate, data):
    dB = 10**(dB/10)
    sigma = np.sqrt(1/(2*code_rate*dB))
    noise_i = sigma * np.random.randn(len(data))
    noise_q = sigma * np.random.randn(len(data))

    noise_data = data + noise_i + 1j * noise_q

    return noise_data

# Create Dataset

## Def

In [77]:
# classes = ["CONV", "HAMMING", "PRODUCT", "POLAR", "BCH", "RM", "GOLAY"]
def create_encoded(code_len, code_num):
    encoded = []
    code_rate = 0.5
    
    if code_num == 0: # CONV
        data_len = code_len // 2 - 4
        random_bits = np.random.randint(0, 2, size=(data_len))
        encoded = CONV(list(random_bits))
        
    elif code_num == 1: # HAMM
        data_len = 4
        num_blocks = code_len // data_len
        for i in range(num_blocks):
            random_bits = np.random.randint(0, 2, size=data_len)
            encoded_block = Hamming(random_bits) 
            encoded.extend(encoded_block)

        encoded = encoded[:code_len]
               
    elif code_num == 2: # PRODUCT
        data_len = 4
        num_blocks = code_len // data_len
        for i in range(num_blocks):
            random_bits = np.random.randint(0, 2, size=data_len)
            encoded_block = Product(random_bits) 
            encoded.extend(encoded_block)

        encoded = encoded[:code_len] 
              
    elif code_num == 3: # POLAR
        data_len = code_len // 2
        random_bits = np.random.randint(0, 2, size=(data_len))
        encoded = Polar(random_bits)
        
    elif code_num == 4: # BCH(15, 7)
        data_len = 7
        num_blocks = code_len // data_len
        for i in range(num_blocks):
            random_bits = np.random.randint(0, 2, size=data_len)
            encoded_block = BCH(random_bits) 
            encoded.extend(encoded_block)

        encoded = encoded[:code_len] 
        
    elif code_num == 5: # RM(16, 11)
        data_len = 11
        num_blocks = code_len // data_len
        for i in range(num_blocks):
            random_bits = np.random.randint(0, 2, size=data_len)
            encoded_block = RM(random_bits) 
            encoded.extend(encoded_block)

        encoded = encoded[:code_len] 
        code_rate = 11/16
        
    elif code_num == 6: # GOLAY(23, 12)
        data_len = 12
        num_blocks = code_len // data_len
        for i in range(num_blocks):
            random_bits = np.random.randint(0, 2, size=data_len)
            encoded_block = Golay(random_bits) 
            encoded.extend(encoded_block)

        encoded = encoded[:code_len] 
        code_rate = 12/23
        
    return encoded, code_rate

## Use

In [101]:
# 데이터와 레이블을 저장할 리스트 초기화
demoded_list = []

data_list = []
label_list = []

In [103]:
dataset_size = 32

bpsk = Modulation(1)

for snr in SNR:
    for i in range(num_samples):
        for code in range(len(classes)):
            demoded_list.clear()  # Clear the list for each new sample
            
            for j in range(dataset_size):
                encoded, code_rate = create_encoded(dataset_size, code) 
                
                moded = bpsk.mod(encoded)
                
                noise_data = AWGN(int(snr), code_rate, moded)
                
                demoded = bpsk.demod(noise_data)  # shape = (1, 32)
                
                demoded_list.append(demoded) 
            
            sample = np.vstack(demoded_list)  # shape = (32, 32)
            
            data_list.append(sample)
            label_list.append((classes[code], snr))

print("END")

END


In [105]:
# 데이터를 넘파이 배열로 변환
data_array = np.array(data_list)
label_array = np.array(label_list)

In [120]:
print(data_array.shape)
print(label_array.shape)
print(label_array[:3])

(91000, 32, 32)
(91000, 2)
[['CONV' '-4']
 ['CONV' '6']
 ['CONV' '17']]


In [106]:
# 데이터 셔플링
np.random.seed(42)
indices = np.arange(data_array.shape[0])
np.random.shuffle(indices)

data_array = data_array[indices]
label_array = label_array[indices]

In [107]:
# 훈련, 테스트, 검증 데이터 분할 (80% 훈련, 10% 테스트, 10% 검증)
n_examples = data_array.shape[0]
n_train = int(0.8 * n_examples)
n_test = int(0.1 * n_examples)
n_val = n_examples - n_train - n_test

# idx
train_idx = indices[:n_train]
test_idx = indices[n_train:n_train + n_test]
val_idx = indices[n_train + n_test:]

# X
X_train = data_array[train_idx]
X_test = data_array[test_idx]
X_val = data_array[val_idx]

# Y
Y_train = label_array[train_idx]
Y_test = label_array[test_idx]
Y_val = label_array[val_idx]


In [121]:
# 원-핫 인코딩 함수
def to_onehot(labels, num_classes):
    onehot_labels = np.zeros((labels.shape[0], num_classes))
    for i, label in enumerate(labels):
        onehot_labels[i, label] = 1
    return onehot_labels

# 레이블의 첫 번째 열(CL)을 원-핫 인코딩=
num_classes = len(set(label_array[:, 0]))  # 실제 클래스 수 계산
Y_train_onehot = to_onehot(Y_train[:, 0], num_classes)
Y_test_onehot = to_onehot(Y_test[:, 0], num_classes)
Y_val_onehot = to_onehot(Y_val[:, 0], num_classes)


KeyError: 'C'

In [111]:
# npy 파일로 저장
np.save(os.path.join(dest, "x_train.npy"), X_train)
np.save(os.path.join(dest, "x_test.npy"), X_test)
np.save(os.path.join(dest, "x_val.npy"), X_val)

np.save(os.path.join(dest, "y_train.npy"), Y_train_onehot)
np.save(os.path.join(dest, "y_test.npy"), Y_test_onehot)
np.save(os.path.join(dest, "y_val.npy"), Y_val_onehot)

np.save(os.path.join(dest, "lbl.npy"), label_array)

# idx도 저장
np.save(os.path.join(dest, "train_idx.npy"), train_idx)
np.save(os.path.join(dest, "test_idx.npy"), test_idx)
np.save(os.path.join(dest, "val_idx.npy"), val_idx)

print("데이터 생성 완료")

데이터 생성 완료


In [115]:
train = np.load(os.path.join(dest, "lbl.npy"))
print(train)

[['CONV' '-4']
 ['CONV' '6']
 ['CONV' '17']
 ...
 ['POLAR' '8']
 ['HAMMING' '20']
 ['HAMMING' '16']]
