In [1]:
# for processing numbers and files csv
import pandas as pd
import numpy as np

# preprocessing 
from sklearn.preprocessing import MinMaxScaler

# model related
import tensorflow as tf
from tensorflow.keras.utils import Sequence

In [2]:
model_name = "cnn_model.keras"
# for working in kaggle use this paths
kag_path_in = "/kaggle/input/mars-spec-train/"
kag_path_out = "/kaggle/working/"

# uncomment below if working locally
kag_path_in = ""
kag_path_out = ""

In [3]:
metadata = pd.read_csv(kag_path_in+'metadata.csv').fillna(0)
train_labels = pd.read_csv(kag_path_in+'train_labels.csv')

num_mass_values = 1400
num_time_values = 2700
matrix_shape = (num_time_values, num_mass_values)

def create_matrix(id):
    
    sample_id, split, derivate, path, _ = metadata.iloc[id]
    data = pd.read_csv(kag_path_in+path)

    data.time = (data.time*50).round()
    data.mass = (data.mass*2).round()
    data.time = data.time.astype(int)
    data.mass = data.mass.astype(int)
    
    data.intensity = MinMaxScaler().fit_transform(data.intensity.values.reshape(-1, 1))
    
    matrix = np.zeros(matrix_shape)
    matrix[data['time'],data['mass']] = data['intensity']

    return matrix 

class DataGenerator(Sequence):
    def __init__(self, data, batch_size):
        self.data = data
        self.batch_size = batch_size
    def __len__(self):
        return len(self.data) // self.batch_size

    def __getitem__(self, idx):        
        
        low = idx * self.batch_size
        high = min(low + self.batch_size, len(self.data))
        batch_x = []

        for i,data_sample in enumerate(self.data[low:high]):
            processed_sample = self.preprocess_data(data_sample)
            batch_x.append(processed_sample)
        return np.array(batch_x)

    def preprocess_data(self, data_sample):
        matrix= create_matrix(data_sample)
        return matrix 


In [4]:
sample_predictions = pd.read_csv(kag_path_in+"submission_format.csv")

In [5]:
preds = []
model = tf.keras.models.load_model(model_name)

In [6]:
y_pred = model.predict(DataGenerator(range(809,1121),1), verbose = 0)

In [None]:
predictions = pd.DataFrame(np.array(y_pred),columns=sample_predictions.columns[1:])
predictions["sample_id"] = sample_predictions["sample_id"]

In [None]:
predictions = predictions.set_index("sample_id")

In [None]:
predictions.to_csv("val_labels_Shlorp.csv")