# Pre-configuration steps:
### Google Colab -
1. Go to Kaggle -> Profile -> Settings -> API.
2. Click on "create new API key", it will download a kaggle.json file to your local system.
3. Upload the "kaggle.json" file to the "~/content/" folderin Colab workspace.
4. Run the Below 2 cells to download the competition dataset and extract it to the same "~/content/" folder.

Local -
1. Go to Kaggle -> Profile -> Settings -> API.
2. Click on "create new API key", it will download a kaggle.json file to your local system.
3. Put the "kaggle.json" file in the same directory as this notebook and run the next command.
4. Also, you'll have to install the required libraries in your local system, as Colab has the libraries such as Numpy and Pandas pre-installed.

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

! kaggle competitions download -c widsdatathon2025

In [None]:
# Extract the downloaded dataset
from zipfile import ZipFile

file = "widsdatathon2025.zip"

# open the zip file in read mode
with ZipFile(file, 'r') as zip:
    # list all the contents of the zip file
    zip.printdir()

    # extract all files
    print('extraction...')
    zip.extractall()
    print('Done!')

In [None]:
!pip install ydata-profiling

In [None]:
# Imports
import pandas as pd
import numpy as np
from ydata_profiling import ProfileReport
from  scipy.stats import zscore
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the training dataset
train_solution = pd.read_excel("TRAIN/TRAINING_SOLUTIONS.xlsx")
train_categorical_meta = pd.read_excel("TRAIN/TRAIN_CATEGORICAL_METADATA.xlsx")
train_quantitative_meta = pd.read_excel("TRAIN/TRAIN_QUANTITATIVE_METADATA.xlsx")
train_function_metric = pd.read_csv("TRAIN/TRAIN_FUNCTIONAL_CONNECTOME_MATRICES.csv")

print(train_solution.shape, train_categorical_meta.shape, train_quantitative_meta.shape, train_function_metric.shape)


In [None]:
train_solution.head()

In [None]:
train_categorical_meta.head()

In [None]:
train_quantitative_meta.head()

In [None]:
train_function_metric.head()

# Exploratory Data Analysis (EDA)

In [None]:
train_categorical_EDA = ProfileReport(train_categorical_meta, title="Categorical Metadata EDA")
train_quantitative_EDA = ProfileReport(train_quantitative_meta, title="Quantitative Metadata EDA")

In [None]:
train_categorical_EDA

In [None]:
train_quantitative_EDA

In [None]:
# train_categorical_meta['participant_id'] = train_categorical_meta['participant_id'].apply('str')
train_demographic_data_with_sol = pd.merge(train_quantitative_meta, train_categorical_meta, on="participant_id", how="inner")
train_demographic_data_with_sol = pd.merge(train_demographic_data_with_sol, train_solution, on="participant_id", how="inner")

In [None]:
# EDA on the demographic data (Brain functioning metrics excluded)
train_demographic_EDA = ProfileReport(train_demographic_data_with_sol, title="Demoggraphic data profile")

In [None]:
train_demographic_EDA

# EDA on functional connectome metrics

In [None]:
# Describe functional metrics
train_function_metric.describe()

In [None]:
missing_columns_function = train_function_metric.columns[train_function_metric.isnull().sum() > 0].tolist()
print("Missing columns: ")
print(missing_columns_function)

In [None]:
train_functional_metric_with_label = pd.merge(train_function_metric, train_solution, on="participant_id", how="inner")
train_functional_metric_with_label = train_functional_metric_with_label.drop('participant_id', axis=1)

In [None]:
corr_with_adhd = train_functional_metric_with_label.corrwith(train_functional_metric_with_label['ADHD_Outcome'], axis=0)
corr_with_adhd.head()

In [None]:
corr_with_sex_f = train_functional_metric_with_label.corrwith(train_functional_metric_with_label['Sex_F'], axis=0)
corr_with_sex_f.describe()