In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from torch.utils.data import Dataset, DataLoader
import random
import os, pickle
import h5py
import csv
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

torch.manual_seed(1)
np.random.seed(1)
random.seed(1) 
torch.manual_seed(1)


In [None]:
# load data and do FFT

def loaddata():
    
    f1 = h5py.File("/kaggle/input/modulation-prediction/data.hdf5", 'r')
    train_data_raw = np.array(f1['train'])
    test_data_raw = np.array(f1['test'])

    train_data_cmplx = train_data_raw[:,:,0] + 1j*train_data_raw[:,:,1]
    test_data_cmplx = test_data_raw[:,:,0] + 1j*test_data_raw[:,:,1]
    fft_train_data = np.fft.fft(train_data_cmplx,axis=1)
    fft_test_data = np.fft.fft(test_data_cmplx,axis=1)

    fft_train_data_real = np.real(fft_train_data)
    fft_train_data_imag = np.imag(fft_train_data)
    fft_test_data_real = np.real(fft_test_data)
    fft_test_data_imag = np.imag(fft_test_data)


    fft_train_data_complex = np.stack((fft_train_data_real,fft_train_data_imag),-1)
    fft_test_data_complex = np.stack((fft_test_data_real,fft_test_data_imag),-1)

    train_data = np.concatenate((train_data_raw, fft_train_data_complex), axis=2)
    test_data = np.concatenate((test_data_raw, fft_test_data_complex), axis=2)

    class_dic = { 'FM':0, 'OQPSK':1, 'BPSK':2, '8PSK':3, 'AM-SSB-SC':4, '4ASK':5, '16PSK':6, 'AM-DSB-SC':7, 'QPSK':8, 'OOK': 9 }
    train_label = []

    with open('/kaggle/input/modulation-prediction/train_labels.csv', newline='') as f2:
        reader = csv.reader(f2)
        for row in reader:
            if row[1] in class_dic:
                train_label.append(class_dic[row[1]])
    train_label = np.array(train_label)
    
    train_data = np.float32(train_data).swapaxes(1,2)
    test_data = np.float32(test_data).swapaxes(1,2)
    return train_data, test_data, train_label


# use FFT data do CNN doesn't have good performance
# so I just use raw data rather than FFT data

train_data, test_data, train_label = loaddata()
X_train, X_val, y_train, y_val = train_test_split(train_data[:,:2,:], train_label, test_size=1/15, random_state=42)
X_train = np.float32(X_train)
X_val = np.float32(X_val)
X_test = np.float32(test_data[:,:2,:])

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

In [None]:
# transfer data into torch format

transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])
class MyDataset(Dataset):
    def __init__(self, data, label=None, transform=None):
        datas = []
        for i in range(data.shape[0]):
            datas.append((data[i], int(label[i])))
        self.datas = datas
        self.transform = transform

    def __getitem__(self, index):
        fn, label = self.datas[index]
        data = torch.from_numpy(fn)
        return data,label

    def __len__(self):
        return len(self.datas)

train = MyDataset(data = np.float32(X_train.reshape(-1, 2, 1024)), label = y_train)
trainloader = DataLoader(dataset=train, batch_size=32, shuffle=True)

val = MyDataset(data = np.float32(X_val.reshape(-1, 2, 1024)), label = y_val)
valloader = DataLoader(dataset=val, batch_size=64, shuffle=True)

y_test = np.zeros(X_test.shape[0])
test = MyDataset(data = np.float32(X_test[:,:].reshape(-1, 2, 1024)), label = y_test)
testloader = DataLoader(dataset=test, batch_size=32)

In [None]:
# Define Neural Network

class FeedFrwdNet(nn.Module):
    def __init__(self):
        super(FeedFrwdNet,self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv1d(in_channels=2, out_channels=32, kernel_size=5, stride=1, padding=2), # output shape (32, 1024)
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2) # output shape (32, 512) 
        )
        self.conv2 = nn.Sequential(  
            nn.Conv1d(32, 32, 3, 1, 1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2)  # output shape (32, 256)
        )
        self.conv3 = nn.Sequential(  
            nn.Conv1d(32, 32, 3, 1, 1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2)  # output shape (32, 128)
        )
        self.conv4 = nn.Sequential(  
            nn.Conv1d(32, 32, 1, 1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2)  # output shape (32, 64)
        )
        self.conv5 = nn.Sequential(  
            nn.Conv1d(32, 32, 1, 1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2)  # output shape (32, 32)
        )
        self.conv6 = nn.Sequential(  
            nn.Conv1d(32, 32, 1, 1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2)  # output shape (32, 16)
        )
        self.conv7 = nn.Sequential(  
            nn.Conv1d(32, 32, 1, 1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2)  # output shape (32, 8)
        )

        # loss one
        self.drop11 = nn.Dropout(0.5)
        self.dense11 = nn.Sequential(
            nn.Linear(32 * 8, 128),
            nn.ReLU()
        )
        self.dense21 = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU()
        )
        self.dense31 = nn.Sequential(
            nn.Linear(128, 10),
        )

        # loss two
        self.drop12 = nn.Dropout(0.5)
        self.dense12 = nn.Sequential(
            nn.Linear(32 * 64, 128),
            nn.ReLU()
        )
        self.dense22 = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU()
        )
        self.dense32 = nn.Sequential(
            nn.Linear(128, 10),
        )

    def forward(self,input):
        x1 = self.conv1(input)
        x2 = self.conv2(x1)
        x3 = self.conv3(x2)
        x4 = self.conv4(x3)
        x5 = self.conv5(x4)
        x6 = self.conv6(x5)
        x7 = self.conv7(x6)
        
        out1 = self.drop11(x7)
        out1 = out1.view(out1.size(0), -1)
        out1 = self.dense11(out1)
        out1 = self.dense21(out1)
        out1 = self.dense31(out1)

        out2 = self.drop12(x4)
        out2 = out2.view(out2.size(0), -1)
        out2 = self.dense12(out2)
        out2 = self.dense22(out2)
        out2 = self.dense32(out2)

        return out1, out2

    
# test function for train and val
# store models with the top 3 score on val dateset

def test(acc_max1, acc_max2, acc_max3, loader, kind):
    correct = 0
    total = 0
    for data in loader:
        inputs, labels = data
        model.eval()
        outputs = model(inputs)
        _, predicted = torch.max(outputs[0].data * 1 + outputs[1].data * 0.5, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    accuracy = 100.0 * correct / total
    print("acc of " + kind + " is "+ str(accuracy))
    if accuracy > acc_max1:
        acc_max1 = accuracy
        print("acc_max1:", acc_max1)
        torch.save(model, 'model1.pkl')
    elif accuracy <= acc_max1 and accuracy > acc_max2:
        acc_max2 = accuracy
        print("acc_max2:", acc_max2)
        torch.save(model, 'model2.pkl')
    elif accuracy <= acc_max2 and accuracy > acc_max3:
        acc_max3 = accuracy
        print("acc_max3:", acc_max3)
        torch.save(model, 'model3.pkl')
    return accuracy, acc_max1, acc_max2, acc_max3


# define the model, loss and optimizer

model = FeedFrwdNet()
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9,0.99), eps=1e-08, weight_decay=0.001) 

In [None]:
#Training the model

def train(epoch, log_interval=100):
    for batch_idx, data in enumerate(trainloader,0):
        inputs, labels = data
        optimizer.zero_grad()
        model.train()
        output1, output2 = model(inputs)
        loss_v = 1 * loss(output1, labels) + 0.5 * loss(output2, labels)
        loss_v.backward()
        optimizer.step()

acc_60 = 0
acc_80 = 0
acc_100 = 0
acc_120 = 0
acc_140 = 0
epochs= 140
acc_max1 = 0
acc_max2 = 0
acc_max3 = 0
model_save = FeedFrwdNet()

for epoch in range(1, epochs + 1):
    if epoch < 60:
        print(epoch)
        train(epoch)
        # test(trainloader, 'train')
        tmp, acc_max1, acc_max2, acc_max3 = test(acc_max1, acc_max2, acc_max3, valloader, 'val')
        acc_60 += tmp
        
    if epoch >= 60 and epoch <80: # print average score on val set for each stage
        print(epoch)
        train(epoch)
        # test(trainloader, 'train')
        tmp, acc_max1, acc_max2, acc_max3 = test(acc_max1, acc_max2, acc_max3, valloader, 'val')
        acc_80 += tmp
        
    if epoch >= 80 and epoch <100:
        print(epoch)
        train(epoch)
        # test(trainloader, 'train')
        tmp, acc_max1, acc_max2, acc_max3 = test(acc_max1, acc_max2, acc_max3, valloader, 'val')
        acc_100 += tmp
        
    if epoch >= 100 and epoch <120:
        print(epoch)
        train(epoch)
        # test(trainloader, 'train')
        tmp, acc_max1, acc_max2, acc_max3 = test(acc_max1, acc_max2, acc_max3, valloader, 'val')
        acc_120 += tmp
        
    if epoch >= 120 and epoch <140:
        print(epoch)
        train(epoch)
        # test(trainloader, 'train')
        tmp, acc_max1, acc_max2, acc_max3 = test(acc_max1, acc_max2, acc_max3, valloader, 'val')
        acc_140 += tmp

print(acc_60/60)  
print(acc_80/20)
print(acc_100/20)
print(acc_120/20)
print(acc_140/20)

In [None]:
# in the previous training process, we stored the top 3 model
# here, load one of the three models
# because the third top model achieve the best score on private board
# we just use this model model3.pkl 
# if you don't want to wait for more than 2 hours to train the model
# you can directly use the model I attached at the Zip file
# which is the same as here

def test(loader):
    correct = 0
    total = 0
    model = FeedFrwdNet()
    model = torch.load('model3.pkl')
    result = []
    for data in loader:
        inputs, labels = data
        model.eval()
        outputs = model(inputs)
        _, predicted = torch.max(outputs[0].data * 1 + outputs[1].data * 0.5, 1)
        result.extend(predicted.numpy().tolist())
    return result

test_result = test(testloader) # result of test data

In [None]:
# store result in csv file

test_pre = test_result
rows_write = []
name_dic = { 0:'FM', 1:'OQPSK', 2:'BPSK', 3:'8PSK', 4:'AM-SSB-SC', 5:'4ASK', 6:'16PSK', 7:'AM-DSB-SC', 8:'QPSK', 9:'OOK' }
for i in range(len(test_pre)):
    rows_write.append(name_dic[test_pre[i]])
submit_df = pd.DataFrame({"Id": range(len(test_pre)), "Category": rows_write})
submit_df.to_csv("submission3.csv", index=False)