# Panic Project (DHLAB) - Multiclass Classification PyCaret Model for Panic Severity Prediction

author:  `@cyshin971`  

date:    `2025-06-xx`  

version: `0.1`

In [1]:
version = "0-1"

# 📚 | Import Libraries 

In [2]:
import config as cfg
import logging

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
logging.getLogger('matplotlib').setLevel(logging.WARNING)

from library.pandas_utils import move_column, remove_columns, create_empty_df, read_csv
from library.text_utils import save_as_csv
from library.json_utils import save_dict_to_file, load_dict_from_file
from library.path_utils import get_file_path

from pycaret.classification import *

# 📁 | Path Variables 

In [3]:
DATA_PATH = "./_data"
TMP_PATH = "./cys/_tmp"
OUTPUT_PATH = "./cys/_output"

# ⚒️ | Preprocessed Data

In [4]:
try:
	features_dict = load_dict_from_file(OUTPUT_PATH, 'panic_features_dict')
except FileNotFoundError:
    raise FileNotFoundError(f"File not found: {get_file_path(OUTPUT_PATH, 'panic_features_dict')}. Please run data_analysis.ipynb first.")
print(f"Loaded features dict with {len(features_dict)} keys:")
scraped_data_filename = None
for k, v in features_dict.items():
    if k == 'scraped_data_filename':
        print(f"  {k}: {v}.csv")
        scraped_data_filename = v
    elif k == 'preproc_version':
        preproc_version = v
    elif k == 'analysis_version':
        analysis_version = v
    else:
        print(f"  {k}: {len(v)}")

if scraped_data_filename is None:
	raise ValueError("scraped_data_filename not found in features_dict")

pre_data = read_csv(get_file_path(OUTPUT_PATH, f'panic_pre_data_filled_{analysis_version}({scraped_data_filename}).csv'))
display(pre_data.head(5))
metadata = read_csv(get_file_path(OUTPUT_PATH, f'panic_metadata_{preproc_version}({scraped_data_filename}).csv'))
display(metadata.head(5))
demography_data = read_csv(get_file_path(OUTPUT_PATH, f'panic_demography_data_{preproc_version}({scraped_data_filename}).csv'))
display(demography_data.head(5))
patient_data = read_csv(get_file_path(OUTPUT_PATH, f'panic_patient_analysis_{analysis_version}({scraped_data_filename}).csv'))
display(patient_data.head(5))

DEBUG - (json_utils.py) load_dict_from_file: Dictionary loaded successfully from C:\Users\cyshi\OneDrive\Documents\GitHub\Panic-Project-CYS\cys\_output\panic_features_dict.json


Loaded features dict with 13 keys:
  scraped_data_filename: final_result_diary_20250617_02.csv
  demography: 8
  dailylog: 13
  lifelog: 37
  questionnaire: 17
  diary: 2
  excluded: 9
  id: 4
  label: 4
  metadata: 10
  metadata_calc: 3


Unnamed: 0,entry_id,dataset,ID,date,panic,PHQ_9,STAI_X2,CSM,CTQ_1,CTQ_2,...,SLT1,SLT2,SLT3,SLT4,SLT5,SLT6,total_sleep,severity,dbp,panic_label
0,PXPN_10006_2024-11-04,PXPN,PXPN_10006,2024-11-04,0.0,0.0,32.0,31.0,11.0,13.0,...,,,,,,,6.95,,,0
1,PXPN_10006_2024-11-05,PXPN,PXPN_10006,2024-11-05,0.0,0.0,32.0,31.0,11.0,13.0,...,0.0,4.47,3.62,4.67,0.65,1.85,15.26,,,0
2,PXPN_10006_2024-11-06,PXPN,PXPN_10006,2024-11-06,1.0,0.0,32.0,31.0,11.0,13.0,...,0.0,0.0,0.2,4.07,1.43,1.68,7.38,,1.0,0
3,PXPN_10006_2024-11-07,PXPN,PXPN_10006,2024-11-07,2.0,0.0,32.0,31.0,11.0,13.0,...,0.0,0.0,0.14,5.08,0.0,0.97,6.19,1.0,0.0,1
4,PXPN_10006_2024-11-08,PXPN,PXPN_10006,2024-11-08,0.0,0.0,32.0,31.0,11.0,13.0,...,,,,,,,6.95,,,0


Unnamed: 0,entry_id,ID,date,dataset,coffee,smoking,total_sleep,dailylog_data,lifelog_data,questionnaire_data,...,diary_data,dbp,panic,n_prior_data,valid_entry_3,valid_entry_2,valid_entry_1,ref_event_id,panic_label,severity
0,SYM2-1-96_2021-08-04,SYM2-1-96,2021-08-04,SYM2,,,,1,1,1,...,0,,0.0,7,1,1,1,,0,
1,SYM2-1-96_2021-08-03,SYM2-1-96,2021-08-03,SYM2,,,,1,1,1,...,0,,0.0,7,1,1,1,,0,
2,SYM2-1-96_2021-08-02,SYM2-1-96,2021-08-02,SYM2,,,,1,1,1,...,0,,0.0,7,1,1,1,,0,
3,SYM2-1-96_2021-08-01,SYM2-1-96,2021-08-01,SYM2,,,,1,0,1,...,0,,0.0,7,1,1,1,,0,
4,SYM2-1-96_2021-07-31,SYM2-1-96,2021-07-31,SYM2,,,,1,0,1,...,0,,0.0,7,1,1,1,,0,


Unnamed: 0,ID,gender,age,marriage,job,smkHx,drinkHx,suicideHx,suicide_need
0,PXPN_10006,0,32.0,0.0,1.0,1.0,1.0,0.0,0.0
1,PXPN_10007,1,38.0,1.0,1.0,0.0,0.0,0.0,0.0
2,PXPN_10008,0,38.0,1.0,0.0,0.0,1.0,0.0,0.0
3,PXPN_10009,1,28.0,0.0,0.0,1.0,0.0,1.0,0.0
4,PXPN_10010,1,21.0,0.0,0.0,1.0,1.0,0.0,0.0


Unnamed: 0,ID,n_entries,n_valid_3_entries,n_valid_2_entries,n_valid_1_entries,n_panic,max_severity,min_severity,mean_severity,n_dailylog,n_lifelog,n_questionnaire,sum_dtype,mean_dtype,n_diary,coffee_mean,coffee_n,smoking_mean,total_sleep_mean
0,PXPN_10006,29,18,21,25,3,2.0,1.0,1.33,29,28,29,86,2.97,0,1.0,4,,6.95
1,PXPN_10007,29,16,20,24,4,2.0,1.0,1.25,29,28,29,86,2.97,0,1.53,17,,
2,PXPN_10008,29,18,21,25,3,2.0,1.0,1.67,29,27,29,85,2.93,0,1.0,1,,
3,PXPN_10009,29,18,20,23,5,4.0,1.0,2.6,29,28,29,86,2.97,0,1.17,12,5.0,
4,PXPN_10010,29,26,27,28,0,,,,29,28,29,86,2.97,0,1.37,19,8.57,


# 

# 🔄️ | Data Processing

In [5]:
days_before_panic_param = 3

filtered_panic_data = metadata[metadata['panic_label'] == 1]
print(f"Found {len(filtered_panic_data)} entries with panic label.")
filtered_data_entry_ids = filtered_panic_data[filtered_panic_data['n_prior_data'] >= days_before_panic_param]['entry_id'].tolist()
print(f"Found {len(filtered_data_entry_ids)} entries with panic label and at least {days_before_panic_param} days of prior data.")
# filtered_metadata = metadata[metadata['ref_event_id'].isin(filtered_data_ids)]

# print(f"Filtered data contains {len(filtered_data)} entries with panic label and valid entry {days_before_panic_param} days before panic event.")
# print(f"Filtered data contains {len(filtered_data_ids)} unique ref_event_ids.")
# display(filtered_data.head(5))

Found 785 entries with panic label.
Found 322 entries with panic label and at least 3 days of prior data.


# 🔪 | Data Split

# 🤖 | Modeling

In [6]:
# target_col = "severity"
# # filter out rows with NaN in the target column
# data = raw_data[raw_data[target_col].notna()]
# print("Number of data points:", len(data))

# 🚂 | Training

In [7]:
# s = setup(data, target = target_col)

# 📋 | Results

In [8]:
# best_model = compare_models()