In [None]:
# https://platform.olimpiada-ai.ro/problems/26

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import warnings

warnings.filterwarnings('ignore')

In [None]:
train = pd.read_csv("/kaggle/input/skeletons-dont-lie/train.csv")
test = pd.read_csv("/kaggle/input/skeletons-dont-lie/test.csv")

def prepare_data(df):
    features = [c for c in df.columns if c not in ['IDSample', 'FrameNumber', 'Action', 'Camera']]
    agg_dict = {}
    for col in features:
        agg_dict[col] = ['mean', 'std', 'min', 'max', 'count', 'first', 'last']
    
    for col in ['Action', 'Camera']:
        if col in df.columns:
            agg_dict[col] = ['first']
    
    df = df.groupby('IDSample').agg(agg_dict)
    df.columns = ['_'.join(c) for c in df.columns]
    df = df.reset_index()

    for col in ['Action_first', 'Camera_first']:
        if col not in df.columns:
            return df

    df = df.rename(columns={'Action_first': 'Action', 'Camera_first': 'Camera'})
    return df

train = prepare_data(train)
test = prepare_data(test)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from catboost import Pool

features = [col for col in train.columns if col not in ['IDSample', 'Action', 'Camera']]

scaler = StandardScaler()

X, y = train[features], train['Action']
X_test = test[features]
X = scaler.fit_transform(X)
X_test = scaler.transform(X_test)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.1, random_state=42)

train_pool = Pool(X_train, y_train)
valid_pool = Pool(X_valid, y_valid)

In [None]:
from catboost import CatBoostClassifier

model_cat = CatBoostClassifier(
    iterations=200,
    learning_rate=0.05,
    loss_function='MultiClass',
    eval_metric='Accuracy',
    metric_period=25,
    max_depth=6
)

model_cat.fit(train_pool, eval_set=valid_pool)

In [None]:
from sklearn.ensemble import RandomForestClassifier

model_frs = RandomForestClassifier(
    verbose=True,
)

model_frs.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score

probs_cat = model_cat.predict_proba(X_valid)
probs_frs = model_frs.predict_proba(X_valid)
preds = ((probs_cat + probs_frs) / 2).argmax(axis=1)
print(f"Accuracy: {accuracy_score(y_valid, preds):.5f}")

In [None]:
probs_cat = model_cat.predict_proba(X_test)
probs_frs = model_frs.predict_proba(X_test)
action_preds = ((probs_cat + probs_frs) / 2).argmax(axis=1).tolist()

In [None]:
y = train['Camera']

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.1, random_state=42)

train_pool = Pool(X_train, y_train)
valid_pool = Pool(X_valid, y_valid)

In [None]:
from catboost import CatBoostClassifier

model_cat = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.1,
    loss_function='MultiClass',
    eval_metric='Accuracy',
    metric_period=100,
    max_depth=4
)

model_cat.fit(train_pool, eval_set=valid_pool)

In [None]:
from sklearn.ensemble import RandomForestClassifier

model_frs = RandomForestClassifier(
    verbose=True,
)

model_frs.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score

probs_cat = model_cat.predict_proba(X_valid)
probs_frs = model_frs.predict_proba(X_valid)
preds = ((probs_cat + probs_frs) / 2).argmax(axis=1)
print(f"Accuracy: {accuracy_score(y_valid, preds):.5f}")

In [None]:
probs_cat = model_cat.predict_proba(X_test)
probs_frs = model_frs.predict_proba(X_test)
camera_preds = ((probs_cat + probs_frs) / 2).argmax(axis=1).tolist()

In [None]:
import os
import zipfile
import shutil
import pandas as pd

orig_test = pd.read_csv("/kaggle/input/skeletons-dont-lie/test.csv")

unique_ids = orig_test['IDSample'].unique()
submission = pd.DataFrame({
    'IDSample': unique_ids,
    'Subtask1': orig_test.groupby('IDSample')['FrameNumber'].agg(len).tolist(),
    'Subtask2': action_preds,
    'Subtask3': camera_preds
})

# IniÈ›ializÄƒm o listÄƒ goalÄƒ pentru a stoca rezultatele
result = []

# IterÄƒm prin fiecare rÃ¢nd al setului de date de test
for _, row in submission.iterrows():
    # IterÄƒm prin subtasks (Subtask1 pÃ¢nÄƒ la Subtask5)
    for subtask_id in range(1, 4):
        # AdÄƒugÄƒm un dicÈ›ionar cu valorile corespunzÄƒtoare fiecÄƒrui subtask
        result.append({
            'subtaskID': subtask_id,  # ID-ul subtask-ului
            'datapointID': row['IDSample'],  # ID-ul datapoint-ului din rÃ¢ndul curent
            'answer': row[f'Subtask{subtask_id}']  # RÄƒspunsul pentru subtask-ul curent
        })

# CreÄƒm un DataFrame cu rezultatele obÈ›inute
df_output = pd.DataFrame(result)

# AfiÈ™Äƒm primele 5 rÃ¢nduri din DataFrame-ul rezultat
df_output.head()

# SalvÄƒm rezultatele Ã®ntr-un fiÈ™ier CSV pe care sÄƒ-l putem apoi submite pe platformÄƒ
df_output.to_csv('submission.csv', index=False)