# Introduction
Hey, thanks for viewing my Kernel!

If you like my work, please, leave an upvote: it will be really appreciated and it will motivate me in offering more content to the Kaggle community ! :)

In [None]:
import pandas as pd
import numpy as np
import warnings 
import matplotlib.pyplot as plt
import seaborn as sns
import time

sns.set()
warnings.simplefilter("ignore")

In [None]:
data_null = pd.read_csv("../input/tabular-playground-series-jun-2022/data.csv")
sub = pd.read_csv("../input/tabular-playground-series-jun-2022/sample_submission.csv")
display(data_null.head())
display(sub.head())

In [None]:
print("data.shape:", data_null.shape)
print("sub.shape:", sub.shape)

In [None]:
display(data_null.isna().sum().sum())

In [None]:
data = data_null.fillna(1)

In [None]:
display(data.isna().sum().sum())

# Automated Submission Function

In [None]:
def automated_sub_func_iterrows(data, sub):
    sub_values = []
    for index, row in sub.iterrows():
        data_row, data_col = row['row-col'].split("-")
        sub_values.append(data.loc[data['row_id']==int(data_row), data_col].values[0])
    sub['value'] = sub_values
    return sub

In [None]:
start_time = time.clock()
new_sub = automated_sub_func_iterrows(data, sub.copy())
print(time.clock() - start_time, "seconds")
new_sub.head()

In [None]:
def automated_sub_func_apply(data, sub):
    sub_temp = sub.copy()
    sub_temp[['row', 'col']] = sub_temp['row-col'].str.split('-', expand=True)
    sub_temp['row'] = sub_temp['row'].astype(int)
    sub_temp['value'] = sub_temp.apply(lambda row: data.loc[data['row_id']==row['row'], row['col']].values[0], axis=1)
    sub['value'] = sub_temp['value']
    return sub

In [None]:
start_time = time.clock()
new_sub = automated_sub_func_apply(data, sub.copy())
print(time.clock() - start_time, "seconds")
new_sub.head()

In [None]:
def automated_sub_func_itertuples(data, sub):
    sub_temp = sub.copy()
    sub_temp[['row', 'col']] = sub_temp['row-col'].str.split('-', expand=True)
    sub_temp['row'] = sub_temp['row'].astype(int)
    for row in sub_temp.itertuples():
        sub_temp.loc[row.Index, 'value'] = data.loc[data['row_id']==row.row, row.col].values[0]
    sub['value'] = sub_temp['value']
    return sub

In [None]:
start_time = time.clock()
new_sub = automated_sub_func_itertuples(data, sub.copy())
print(time.clock() - start_time, "seconds")
new_sub.head()

In [None]:
def automated_sub_func_list_comprehensions(data, sub):
    sub_temp = sub.copy()
    sub_temp[['row', 'col']] = sub_temp['row-col'].str.split('-', expand=True)
    sub_temp['row'] = sub_temp['row'].astype(int)
    for i, row, col in zip(sub_temp.index, sub_temp['row'], sub_temp['col']):
        sub_temp.loc[i, 'value'] = data.loc[data['row_id']==row, col].values[0]
    sub['value'] = sub_temp['value']
    return sub

In [None]:
start_time = time.clock()
new_sub = automated_sub_func_list_comprehensions(data, sub.copy())
print(time.clock() - start_time, "seconds")
new_sub.head()

In [None]:
def automated_sub_func_melt(data, sub):
    features = ['F_1_0', 'F_1_1', 'F_1_2', 'F_1_3', 'F_1_4', 'F_1_5', 'F_1_6', 'F_1_7', 'F_1_8', 'F_1_9', 
                'F_1_10', 'F_1_11', 'F_1_12', 'F_1_13', 'F_1_14', 'F_3_0', 'F_3_1', 'F_3_2', 'F_3_3', 'F_3_4', 
                'F_3_5', 'F_3_6', 'F_3_7', 'F_3_8', 'F_3_9', 'F_3_10', 'F_3_11', 'F_3_12', 'F_3_13', 'F_3_14', 
                'F_3_15', 'F_3_16', 'F_3_17', 'F_3_18', 'F_3_19', 'F_3_20', 'F_3_21', 'F_3_22', 'F_3_23', 'F_3_24', 
                'F_4_0', 'F_4_1', 'F_4_2', 'F_4_3', 'F_4_4', 'F_4_5', 'F_4_6', 'F_4_7', 'F_4_8', 'F_4_9', 'F_4_10', 
                'F_4_11', 'F_4_12', 'F_4_13', 'F_4_14']
    melt_data = pd.melt(data, id_vars='row_id', value_vars=features, var_name='Column', value_name='Value')
    melt_data['row_id'] = melt_data['row_id'].astype(np.int32)
    melt_data['row-col'] = melt_data['row_id'].astype(str) + '-' + melt_data['Column']
    melt_data = melt_data.loc[melt_data['row-col'].isin(sub['row-col']), :]
    melt_data['sort1'] = melt_data['Column'].str.split('_', expand=True)[1]
    melt_data['sort1'] = melt_data['sort1'].astype(np.int8)
    melt_data['sort2'] = melt_data['Column'].str.split('_', expand=True)[2]
    melt_data['sort2'] = melt_data['sort2'].astype(np.int8)
    melt_data.sort_values(['row_id', 'sort1', 'sort2'], ascending=True, inplace=True)
    sub['value'] = melt_data['Value'].values
    return sub

In [None]:
import gc
gc.collect()

start_time = time.clock()
new_sub = automated_sub_func_melt(data, sub.copy())
print(time.clock() - start_time, "seconds")
new_sub.head()

# Conclusions

1. **automated_sub_func_melt**: 103.812 seconds 10x Faster 👑
2. **automated_sub_func_apply**: 1109.014 seconds
3. **automated_sub_func_iterrows**: 1168.848 seconds
4. **automated_sub_func_list_comprehensions**: 8579.903 seconds
5. **automated_sub_func_itertuples**: 9181.761 seconds