<a href="https://colab.research.google.com/github/jacobgreen4477/Construction-Equipment-Oil-Condition-Classification-AI-Competition/blob/main/ETRI_v1_0_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

> title : 제 4회 ETRI 휴먼이해 인공지능 논문경진대회 <br>
> author : hjy <br>

In [6]:
import pandas as pd
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
from tqdm.auto import tqdm
import pandas as pd
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

warnings.filterwarnings('ignore')

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
# pandas 옵션
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 999)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.float_format', lambda x: '%0.4f' % x)

In [9]:
def plot_rssi_by_subject_topN(df, TOPN):
    """
    subject_id별로 가장 자주 등장한 상위 N개 BSSID에 대해 RSSI 시계열 시각화
    - RSSI -70 이하 신호는 제외 (None 처리)
    - 선 투명도 50%
    """
    grouped = df.groupby("subject_id")

    for subject, group in grouped:
        timestamps = []
        bssid_counter = Counter()

        # 모든 BSSID 등장 횟수 카운트
        for _, row in group.iterrows():
            bssid_counter.update(row['bssid'])

        # TOP N BSSID 추출
        target_bssids = [b for b, _ in bssid_counter.most_common(TOPN)]
        time_series = {bssid: [] for bssid in target_bssids}

        # 시간별 RSSI 수집 (단, -60 이하는 제거)
        for _, row in group.iterrows():
            timestamps.append(pd.to_datetime(row['timestamp']))
            row_bssid = row['bssid']
            row_rssi = row['rssi']

            for bssid in target_bssids:
                if bssid in row_bssid:
                    idx = row_bssid.index(bssid)
                    rssi_value = row_rssi[idx]
                    if rssi_value > -60:
                        time_series[bssid].append(rssi_value)
                    else:
                        time_series[bssid].append(None)  # 약한 신호는 제외
                else:
                    time_series[bssid].append(None)  # 없는 BSSID

        # 시각화
        plt.figure(figsize=(16, 7))
        for bssid in target_bssids:
            plt.plot(timestamps, time_series[bssid], label=bssid, marker='o', alpha=0.5)

        plt.title(f"[{subject}] 시간별 RSSI 변화 (TOP {TOPN}, -60 이상만)", fontsize=14)
        plt.xlabel("시간")
        plt.ylabel("RSSI (dBm)")
        plt.xticks(rotation=45)
        plt.legend(title="BSSID", bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.grid(True)
        plt.tight_layout()
        plt.show()

In [10]:
def filter_strong_rssi(df, threshold=-60):
    """
    각 row의 RSSI 값 중 threshold 이하인 항목 제거

    Parameters:
    - df: DataFrame with 'bssid' and 'rssi' as lists
    - threshold: int, RSSI 값 기준 (기본: -60)

    Returns:
    - 필터링된 DataFrame (in-place 수정 아님)
    """
    filtered_df = df.copy()

    def filter_row(row):
        bssids = row['bssid']
        rssis = row['rssi']
        # RSSI > threshold 조건 만족하는 항목만 추출
        filtered = [(b, r) for b, r in zip(bssids, rssis) if r > threshold]
        if filtered:
            new_bssids, new_rssis = zip(*filtered)
            return pd.Series({'bssid': list(new_bssids), 'rssi': list(new_rssis)})
        else:
            return pd.Series({'bssid': [], 'rssi': []})

    filtered_df[['bssid', 'rssi']] = filtered_df.apply(filter_row, axis=1)
    return filtered_df

In [11]:
def daily_wifi_features_by_user(df):
    df = df.copy()
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    features = []

    # subject_id + lifelog_date 기준 그룹화
    grouped = df.groupby(['subject_id', 'lifelog_date'])

    for (subject_id, date), group in grouped:
        scan_count = len(group)
        bssid_flat = sum(group['bssid'], [])  # flatten
        rssi_flat = sum(group['rssi'], [])    # flatten

        unique_bssid_count = len(set(bssid_flat))
        avg_rssi = sum(rssi_flat) / len(rssi_flat) if rssi_flat else None
        max_rssi = max(rssi_flat) if rssi_flat else None
        min_rssi = min(rssi_flat) if rssi_flat else None
        strong_rssi_ratio = sum(1 for r in rssi_flat if r > -60) / len(rssi_flat) if rssi_flat else 0
        empty_scan_count = sum(1 for b in group['bssid'] if len(b) == 0)

        # 가장 많이 탐지된 BSSID
        bssid_counter = Counter(bssid_flat)
        top_bssid, top_bssid_count = bssid_counter.most_common(1)[0] if bssid_counter else (None, 0)

        first_time = group['timestamp'].min()
        last_time = group['timestamp'].max()
        hour_span = (last_time - first_time).total_seconds() / 60  # 분 단위

        features.append({
            'subject_id': subject_id,
            'lifelog_date': date,
            'scan_count': scan_count,
            'unique_bssid_count': unique_bssid_count,
            'avg_rssi': avg_rssi,
            'max_rssi': max_rssi,
            'min_rssi': min_rssi,
            'strong_signal_ratio': strong_rssi_ratio,
            'empty_scan_count': empty_scan_count,
            'top_bssid': top_bssid,
            'top_bssid_count': top_bssid_count,
            #'time_first_seen': first_time,
            # 'time_last_seen': last_time,
            'hour_span_minutes': hour_span
        })

    return pd.DataFrame(features)

### 데이터 읽기

In [12]:
path = '/content/drive/MyDrive/data/ch2025_data_items/'

# 1
ch2025_mACStatus = pd.read_parquet(path+'ch2025_mACStatus.parquet')
ch2025_mActivity = pd.read_parquet(path+'ch2025_mActivity.parquet')
ch2025_mAmbience = pd.read_parquet(path+'ch2025_mAmbience.parquet')
ch2025_mBle = pd.read_parquet(path+'ch2025_mBle.parquet')
ch2025_mGps = pd.read_parquet(path+'ch2025_mGps.parquet')
ch2025_mLight = pd.read_parquet(path+'ch2025_mLight.parquet')
ch2025_mScreenStatus = pd.read_parquet(path+'ch2025_mScreenStatus.parquet')
ch2025_mUsageStats = pd.read_parquet(path+'ch2025_mUsageStats.parquet')
ch2025_mWifi = pd.read_parquet(path+'ch2025_mWifi.parquet')
ch2025_wHr = pd.read_parquet(path+'ch2025_wHr.parquet')
ch2025_wLight = pd.read_parquet(path+'ch2025_wLight.parquet')
ch2025_wPedo = pd.read_parquet(path+'ch2025_wPedo.parquet')

# 2
train = pd.read_csv('/content/drive/MyDrive/data/ch2025_metrics_train.csv')
test = pd.read_csv('/content/drive/MyDrive/data/ch2025_submission_sample.csv')

### mWifi
- Wifi devices around individual subject.

In [13]:
def extract_wifi_info(row):
    wifi_data = row['m_wifi']
    bssids = [item['bssid'] for item in wifi_data]
    rssis = [item['rssi'] for item in wifi_data]
    return pd.Series({'bssid': bssids, 'rssi': rssis})

ch2025_mWifi[['bssid', 'rssi']] = ch2025_mWifi.apply(extract_wifi_info, axis=1)
ch2025_mWifi['lifelog_date'] = ch2025_mWifi['timestamp'].astype(str).str[:10]
ch2025_mWifi = ch2025_mWifi.drop(columns=['m_wifi'])
ch2025_mWifi.head(1)

Unnamed: 0,subject_id,timestamp,bssid,rssi,lifelog_date
0,id01,2024-06-26 12:03:00,"[a0:0f:37:9a:5d:8b, a0:0f:37:9a:5d:8c, a0:0f:37:9a:5d:8d, a0:0f:37:9a:5d:8e, a0:0f:37:9a:5d:8f, a0:0f:37:96:56:ef, 88:36:6c:86:75:84, a0:0f:37:96:56:ee, a0:0f:37:96:56:ed, 86:25:19:b5:b2:a5, a0:0f:37:96:56:ec, 1e:39:29:8e:fb:e9, 52:c2:e8:c7:9b:e4, a0:0f:37:96:56:eb, 12:e3:c7:09:20:34, 58:86:94:4a:08:b8, 90:9f:33:28:d0:2e, 00:26:66:bc:4e:18, f6:0a:f4:43:4b:ba, 10:e3:c7:09:20:35, 10:e3:c7:09:20:34, 1c:39:29:48:04:92, 12:e3:c7:07:9d:df, 86:25:19:c3:44:07, a0:0f:37:9a:37:2f, a0:0f:37:9a:37:2e, a0:0f:37:9a:37:2d, 0a:09:b4:74:05:ec, a0:0f:37:9a:37:2c, a0:0f:37:9a:37:2b, 0a:09:b4:74:05:eb, c0:25:2f:d8:c1:a6, 16:7f:67:bb:fa:f8, 3c:f3:92:ff:00:01, 06:09:b4:74:05:ec, 06:09:b4:74:05:eb, 12:e3:c7:0a:74:d1, 88:36:6c:a9:6f:8e, 02:e3:c7:09:20:34, 00:09:b4:74:05:eb, 00:09:b4:74:05:ec, 00:1d:93:93:cf:fe, 8e:e2:ac:a5:9d:15]","[-78, -78, -78, -78, -78, -58, -72, -58, -58, -61, -58, -71, -82, -58, -88, -82, -78, -85, -45, -63, -89, -82, -83, -84, -76, -76, -76, -72, -76, -76, -59, -82, -79, -82, -72, -59, -78, -63, -88, -60, -72, -19, -72]",2024-06-26


In [None]:
# wifi 약신호 제거
# ch2025_mWifi = filter_strong_rssi(ch2025_mWifi, threshold=-60)
# ch2025_mWifi.head(1)

In [14]:
ch2025_mWifi_daily = daily_wifi_features_by_user(ch2025_mWifi)
ch2025_mWifi_daily.head()

Unnamed: 0,subject_id,lifelog_date,scan_count,unique_bssid_count,avg_rssi,max_rssi,min_rssi,strong_signal_ratio,empty_scan_count,top_bssid,top_bssid_count,hour_span_minutes
0,id01,2024-06-26,69,393,-70.1964,-19,-91,0.2309,0,86:25:19:9f:9b:be,19,716.0
1,id01,2024-06-27,126,357,-69.0629,-26,-92,0.2701,0,04:09:a5:3a:c8:6a,54,1430.0
2,id01,2024-06-28,118,376,-69.0941,-26,-92,0.2594,0,04:09:a5:3a:c8:6a,47,1430.0
3,id01,2024-06-29,134,258,-67.7897,-24,-91,0.3063,0,04:09:a5:3a:c8:6a,117,1420.0
4,id01,2024-06-30,108,242,-68.2999,-23,-90,0.2946,0,04:09:a5:3a:c8:6a,70,1310.0


### mAmbience
- Ambient sound identification labels and their respective probabilities.

In [15]:
# - mAmbience: Ambient sound identification labels and their respective probabilities.

def extract_labels_and_probs(row):
    items = row['m_ambience']
    labels = [item[0] for item in items]
    probs = [item[1] for item in items]
    return pd.Series({'labels': labels, 'prob': probs})

ch2025_mAmbience[['labels', 'prob']]  = ch2025_mAmbience.apply(extract_labels_and_probs, axis=1)
ch2025_mAmbience['lifelog_date'] = ch2025_mAmbience['timestamp'].astype(str).str[:10]
ch2025_mAmbience = ch2025_mAmbience.drop(columns=['m_ambience'])
ch2025_mAmbience.head(1)

Unnamed: 0,subject_id,timestamp,labels,prob,lifelog_date
0,id01,2024-06-26 13:00:10,"[Music, Vehicle, Motor vehicle (road), Outside, urban or manmade, Outside, rural or natural, Car, Speech, Inside, large room or hall, Truck, Sound effect]","[0.30902618, 0.081680894, 0.04035286, 0.037144363, 0.032663062, 0.03199804, 0.029806137, 0.01684492, 0.016206821, 0.01591479]",2024-06-26


In [16]:
ch2025_mAmbience.head(2)

Unnamed: 0,subject_id,timestamp,labels,prob,lifelog_date
0,id01,2024-06-26 13:00:10,"[Music, Vehicle, Motor vehicle (road), Outside, urban or manmade, Outside, rural or natural, Car, Speech, Inside, large room or hall, Truck, Sound effect]","[0.30902618, 0.081680894, 0.04035286, 0.037144363, 0.032663062, 0.03199804, 0.029806137, 0.01684492, 0.016206821, 0.01591479]",2024-06-26
1,id01,2024-06-26 13:02:10,"[Music, Vehicle, Bell, Hiss, Jingle bell, Chime, Car, Motor vehicle (road), Bicycle, Chink, clink]","[0.62307084, 0.021118319, 0.018510727, 0.013137147, 0.012887808, 0.012369333, 0.011385, 0.0107314605, 0.010630278, 0.010412726]",2024-06-26


In [25]:
import pandas as pd
from scipy.stats import entropy

def generate_derived_features(df,
                             calculate_mean=True,
                             calculate_max=True,
                             calculate_entropy=True,
                             calculate_presence=False,
                             target_label=None):

    # 데이터 파싱 개선 함수
    def parse_list(x):
        if isinstance(x, str):
            # 1. 대괄호 제거
            cleaned = x.strip('[]')
            # 2. 쉼표(,) 기준 분리 (공백 유무 관계없이 처리)
            items = [item.strip() for item in cleaned.split(',')]
            # 3. 숫자인 경우 float 변환, 레이블은 문자열 유지
            try:
                return list(map(float, items)) if '.' in cleaned else items
            except:
                return items
        return x

    # 데이터 전처리 (공통)
    df = df.assign(
        labels=df['labels'].apply(parse_list),
        prob=df['prob'].apply(parse_list)
    ).explode(['labels', 'prob'])

    # 타입 강제 변환 (추가 안전장치)
    df['prob'] = pd.to_numeric(df['prob'], errors='coerce')
    df = df.dropna(subset=['prob'])

    # 그룹화 기준
    group_keys = ['subject_id', 'lifelog_date']

    # 결과 저장 딕셔너리
    results = {}

    # 1. 평균 확률 계산
    if calculate_mean:
        mean_prob = df.groupby(group_keys + ['labels'])['prob'].mean().unstack().reset_index()
        results['mean_probability'] = mean_prob

    # 2. 최대 확률 레이블
    if calculate_max:
        max_prob = df.loc[df.groupby(group_keys)['prob'].idxmax()][group_keys + ['labels', 'prob']]
        results['max_prob_label'] = max_prob

    # 3. 엔트로피 계산
    # - 확률 분포를 이용한 엔트로피 계산 (다양성 측정)
    if calculate_entropy:
        entropy_df = df.groupby(group_keys + ['labels'])['prob'].mean() \
                     .groupby(group_keys).apply(entropy).reset_index(name='entropy')
        results['label_entropy'] = entropy_df

    # 4. 특정 레이블 존재 여부
    if calculate_presence and target_label:
        presence_df = df.groupby(group_keys)['labels'] \
                      .apply(lambda x: (x == target_label).any().astype(int)) \
                      .reset_index(name=f'has_{target_label}')
        results['label_presence'] = presence_df

    return results

# 사용 예시
results = generate_derived_features(
    ch2025_mAmbience,
    calculate_mean=True,
    calculate_max=True,
    calculate_entropy=True,
    calculate_presence=True,
    target_label='Music'
)

In [26]:
results['label_presence'].head()

Unnamed: 0,subject_id,lifelog_date,has_Music
0,id01,2024-06-26,1
1,id01,2024-06-27,1
2,id01,2024-06-28,1
3,id01,2024-06-29,1
4,id01,2024-06-30,1


In [27]:
results['mean_probability'].head()

labels,subject_id,lifelog_date,A capella,"Accelerating, revving, vroom",Accordion,Acoustic guitar,Afrobeat,Air brake,Air conditioning,"Air horn, truck horn",Aircraft,Aircraft engine,Alarm,Alarm clock,Ambient music,Ambulance (siren),Animal,Applause,Arrow,Artillery fire,Babbling,"Baby cry, infant cry",Baby laughter,Background music,Bagpipes,Bang,Banjo,Bark,Basketball bounce,Bass drum,Bass guitar,Bathtub (filling or washing),Beatboxing,"Bee, wasp, etc.","Beep, bleep",Bell,Bellow,Belly laugh,Bicycle,Bicycle bell,Bird,"Bird flight, flapping wings","Bird vocalization, bird call, bird song",Biting,Bleat,Blender,Bluegrass,Blues,"Boat, Water vehicle",Boiling,Boing,Boom,Bouncing,Bow-wow,Bowed string instrument,Brass instrument,Breaking,Breathing,"Burping, eructation","Burst, pop",Bus,Busy signal,Buzz,Buzzer,Cacophony,Camera,"Canidae, dogs, wolves",Cap gun,Car,Car alarm,Car passing by,Carnatic music,Cash register,Cat,Caterwaul,"Cattle, bovinae",Caw,Cello,Chainsaw,Change ringing (campanology),Chant,Chatter,Cheering,"Chewing, mastication","Chicken, rooster",Child singing,"Child speech, kid speaking",Children playing,Children shouting,Chime,"Chink, clink",Chirp tone,"Chirp, tweet",Choir,Chop,Chopping (food),Christian music,Christmas music,"Chuckle, chortle",Church bell,Civil defense siren,Clang,Clapping,Clarinet,Classical music,Clatter,Clickety-clack,Clicking,Clip-clop,Clock,Cluck,Coin (dropping),Computer keyboard,Conversation,Coo,Cough,Country,Cowbell,Crack,Crackle,Creak,Cricket,Croak,Crow,Crowd,"Crowing, cock-a-doodle-doo","Crumpling, crinkling",Crunch,Crushing,"Crying, sobbing",Cupboard open or close,"Cutlery, silverware",Cymbal,Dance music,"Dental drill, dentist's drill",Dial tone,Didgeridoo,Ding,Ding-dong,Disco,"Dishes, pots, and pans",Distortion,Dog,"Domestic animals, pets",Door,Doorbell,Double bass,Drawer open or close,Drill,Drip,Drum,Drum and bass,Drum kit,Drum machine,Drum roll,Dubstep,Duck,Echo,Effects unit,Electric guitar,Electric piano,"Electric shaver, electric razor",Electric toothbrush,Electronic dance music,Electronic music,Electronic organ,Electronic tuner,Electronica,Emergency vehicle,Engine,Engine knocking,Engine starting,Environmental noise,Eruption,Exciting music,Explosion,Fart,Field recording,Filing (rasp),Fill (with liquid),Finger snapping,Fire,Fire alarm,"Fire engine, fire truck (siren)",Firecracker,Fireworks,"Fixed-wing aircraft, airplane",Flamenco,Flap,Flute,"Fly, housefly",Foghorn,Folk music,Fowl,Frog,Frying (food),Funk,Fusillade,Gargling,Gasp,Gears,Giggle,Glass,Glockenspiel,Goat,Gobble,Gong,Goose,Gospel music,Groan,Growling,Grunt,Guitar,"Gunshot, gunfire",Gurgling,Gush,Hair dryer,Hammer,Hammond organ,Hands,Happy music,Harmonic,Harmonica,Harp,Harpsichord,Heart murmur,"Heart sounds, heartbeat",Heavy engine (low frequency),Heavy metal,Helicopter,Hi-hat,Hiccup,Hip hop music,Hiss,Honk,Hoot,Horse,House music,Howl,"Hubbub, speech noise, speech babble",Hum,Humming,"Ice cream truck, ice cream van",Idling,Independent music,Insect,"Inside, large room or hall","Inside, public space","Inside, small room",Jackhammer,Jazz,Jet engine,Jingle (music),Jingle bell,"Jingle, tinkle",Keyboard (musical),Keys jangling,Knock,Laughter,Lawn mower,Light engine (high frequency),Liquid,"Livestock, farm animals, working animals",Lullaby,Machine gun,Mains hum,Mallet percussion,Mandolin,Mantra,Maraca,"Marimba, xylophone",Mechanical fan,Mechanisms,Medium engine (mid frequency),Meow,Microwave oven,Middle Eastern music,Moo,Mosquito,Motor vehicle (road),"Motorboat, speedboat",Motorcycle,Mouse,Music,Music for children,Music of Africa,Music of Asia,Music of Bollywood,Music of Latin America,Musical instrument,"Narration, monologue","Neigh, whinny",New-age music,Noise,Ocean,Oink,Opera,Orchestra,Organ,"Outside, rural or natural","Outside, urban or manmade",Owl,Pant,Patter,Percussion,Piano,Pig,"Pigeon, dove",Ping,Pink noise,Pizzicato,Plop,Plucked string instrument,Police car (siren),Pop music,Pour,Power tool,"Power windows, electric windows",Printer,Progressive rock,"Propeller, airscrew",Psychedelic rock,Pulleys,Pulse,Pump (liquid),Punk rock,Purr,Quack,"Race car, auto racing",Radio,Rail transport,"Railroad car, train wagon",Rain,Rain on surface,Raindrop,Rapping,"Ratchet, pawl",Rattle,Rattle (instrument),Reggae,Reverberation,Reversing beeps,Rhythm and blues,Rimshot,Ringtone,Roar,"Roaring cats (lions, tigers)",Rock and roll,Rock music,"Rodents, rats, mice",Roll,"Rowboat, canoe, kayak",Rub,Rumble,Run,Rustle,Rustling leaves,Sad music,"Sailboat, sailing ship",Salsa music,Sampler,Sanding,Sawing,Saxophone,Scary music,Scissors,Scrape,Scratch,Scratching (performance technique),Screaming,Sewing machine,Shatter,Sheep,Ship,Shofar,Shout,Shuffle,Shuffling cards,Sidetone,Sigh,Silence,Sine wave,Singing,Singing bowl,Single-lens reflex camera,Sink (filling or washing),Siren,Sitar,Sizzle,Ska,Skateboard,Skidding,Slam,"Slap, smack",Sliding door,Slosh,"Smash, crash","Smoke detector, smoke alarm",Snake,Snare drum,Sneeze,Snicker,Sniff,Snoring,Snort,Sonar,Song,Soul music,Sound effect,Soundtrack music,Speech,Speech synthesizer,"Splash, splatter",Splinter,Spray,Squawk,Squeak,Squeal,Squish,Static,Steam,Steam whistle,"Steel guitar, slide guitar",Steelpan,Stir,Stomach rumble,Stream,String section,Strum,"Subway, metro, underground",Swing music,Synthesizer,Synthetic singing,Tabla,Tambourine,Tap,Tapping (guitar technique),Tearing,Techno,Telephone,Telephone bell ringing,"Telephone dialing, DTMF",Television,Tender music,Theme music,Theremin,Throat clearing,Throbbing,"Thump, thud",Thunder,Thunderstorm,Thunk,Tick,Tick-tock,Timpani,Tire squeal,Toilet flush,Tools,Toot,Toothbrush,Traditional music,"Traffic noise, roadway noise",Train,Train horn,Train wheels squealing,Train whistle,Trance music,"Trickle, dribble",Trombone,Truck,Trumpet,Tubular bells,Tuning fork,Turkey,Typewriter,Typing,Ukulele,Vacuum cleaner,Vehicle,"Vehicle horn, car horn, honking",Vibraphone,Vibration,Video game music,"Violin, fiddle",Vocal music,"Wail, moan","Walk, footsteps",Water,"Water tap, faucet",Waterfall,"Waves, surf",Wedding music,"Whack, thwack",Whale vocalization,Wheeze,Whimper,Whimper (dog),Whip,Whir,Whispering,Whistle,Whistling,White noise,Whoop,"Whoosh, swoosh, swish",Wild animals,Wind,Wind chime,"Wind instrument, woodwind instrument",Wind noise (microphone),Wood,Wood block,Writing,Yell,Yip,Yodeling,Zing,Zipper (clothing),Zither
0,id01,2024-06-26,,0.114,,,,,0.0053,0.0696,0.1184,0.0383,0.2309,,0.0251,,0.0643,,0.0541,0.0976,0.0,0.0033,,0.0057,,0.0825,,,0.0072,0.043,,0.0107,,0.2037,,0.0312,0.0906,,0.0315,,0.0507,,,,,0.038,,,0.0426,0.0533,,,,0.0174,,,0.1025,0.0432,,0.0836,0.0277,,0.0432,,0.1229,0.0417,,0.0409,0.0713,,0.0251,,0.0422,0.0319,,,,,,,,0.0044,0.0551,,0.0569,0.0066,0.0038,0.0032,,0.0318,0.0581,,,0.0053,0.0809,,0.0159,0.0113,0.0064,,,0.0279,,,0.0069,0.0397,,,0.049,0.0434,,0.0714,0.1791,0.0042,0.0269,0.118,,,0.2082,0.1004,0.0112,0.0401,,,0.0438,,,,0.2347,0.011,0.0448,0.0504,,,,,,0.0526,,,0.0358,0.0112,0.0535,0.0536,0.0513,0.0139,,0.0566,,0.0622,0.0696,,0.0487,,,,,,0.0066,,,0.1318,0.0086,,0.0086,,,,,0.1493,,0.0581,,0.0501,,0.1208,0.0475,,,0.1121,0.0336,0.052,,,0.1136,0.1665,0.0919,0.0082,0.0394,,0.1689,,,0.0664,0.0135,0.1263,,,,0.106,,,0.0473,,,,,,0.0199,,,,0.0168,0.0655,0.0187,0.0681,,0.0576,,0.0179,,,,,,,,0.0816,,0.073,,0.0401,,0.0395,,,0.0577,,,0.0395,0.6191,,0.0345,0.0561,,0.0823,0.0209,0.0121,0.0415,,,0.129,0.0044,0.0175,,,0.1149,,0.0079,,,0.0585,0.0497,0.009,0.0649,0.6394,,,,,,0.0579,0.0543,0.0879,,0.1003,,,,0.0737,0.0821,0.0395,0.0486,0.1185,,,0.0178,,0.0133,0.0135,0.0076,,0.0088,0.1223,0.1484,,,,,0.066,0.0456,0.0392,,0.0677,0.0289,,,0.0642,0.0155,0.1194,,,0.0163,,0.01,0.0536,,0.1202,0.0425,,0.069,,,,0.0834,,,,0.0508,0.0016,0.0753,0.1114,,,,,,0.0384,,,,,,,,,,0.0181,0.0096,0.07,,0.0061,,0.0963,0.0081,0.054,0.0598,0.0044,,,,,,,0.268,0.0646,0.0119,,,0.0488,0.0601,,,0.0841,,0.0049,0.0083,0.0593,,0.0224,0.8241,,0.0302,,,0.032,0.0616,,0.0988,,0.1141,,0.0319,0.0309,0.0688,0.0307,0.0247,,0.0412,0.0351,0.0603,0.0017,,0.027,0.0782,,,0.0084,0.0575,0.0236,0.4213,0.0003,0.0464,,0.05,,,,0.0395,0.1554,0.0375,0.4027,,,0.1075,,,,,,,,,,0.0234,0.0086,,,,0.008,,,0.004,0.0137,0.0066,,0.0711,0.02,0.0348,,,,0.0292,,,,,0.047,0.0349,0.02,,0.1291,0.077,0.0511,0.1433,0.007,,0.0122,,0.0348,,,,,,0.0922,,0.0155,0.1282,0.0653,,0.0634,,,,,0.07,0.0419,0.026,0.0735,0.1888,0.0117,0.0668,,0.0439,0.0193,0.0485,0.2212,,,0.3458,,0.0869,0.0287,0.0424,0.0584,0.0601,0.0599,,0.1009,0.0465,,0.0708,,,,,0.0108,
1,id01,2024-06-27,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0257,,,,,,,,0.2451,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0463,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,0.0251,,0.0378,,,,0.2301,,,,0.0,,,,,,,0.0,,,,,,,,,,0.0938,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0252,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.3812,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,id01,2024-06-28,,,,,,,,,,,,,,,0.1039,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.3623,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,0.1165,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0603,,,,,,0.139,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0007,,0.0,,,,,,,,,,,,,,0.0637,,,,,,,,,,,,,,,,,0.0267,0.4613,0.0314,,0.0,,,,,,,0.0,,,,,,,,,0.1175,0.0003,,,0.0535,,,,,,,,,,,,,,,,,,,,,,,,,,0.0008,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0744,,0.0383,,,,,,,,,,,,,,,,,0.0,,,,,1.0,,,,,,,,,,,,,,,,,,0.0477,,,,,,,,,,,,0.0014,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.3165,,,,,,,,0.1345,,,,,,,,,,,,,,,,,,,0.0452,0.0952,,,0.1167,,,,,,,,,
3,id01,2024-06-29,,,,,,,,,,,,,,,0.273,,,,0.0,,,,,,,,,,,,,0.2488,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,0.0855,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.1922,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.3187,,,0.0004,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.034,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,1.0,,,,,,,,,,,,,,,,,,0.026,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2903,,,,,,,0.0359,,,,,,
4,id01,2024-06-30,,,,,,,,,,,,,,,0.063,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,0.0,,,,0.0388,,,,,0.2222,0.0728,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0675,,,,,,,,,,,,,,,,,,,0.0,,,,,0.9987,,,,,,,,,,,,,,,0.0368,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0439,,,,,,,,,0.0481,,,,,,,,,,,,,,,,,,,0.0792,,,,,,,,,,,,


In [28]:
results['label_entropy'].head()

Unnamed: 0,subject_id,lifelog_date,entropy
0,id01,2024-06-26,5.0665
1,id01,2024-06-27,1.6084
2,id01,2024-06-28,2.4522
3,id01,2024-06-29,1.8299
4,id01,2024-06-30,1.4793


In [29]:
ch2025_mAmbience.head()

Unnamed: 0,subject_id,timestamp,labels,prob,lifelog_date
0,id01,2024-06-26 13:00:10,"[Music, Vehicle, Motor vehicle (road), Outside, urban or manmade, Outside, rural or natural, Car, Speech, Inside, large room or hall, Truck, Sound effect]","[0.30902618, 0.081680894, 0.04035286, 0.037144363, 0.032663062, 0.03199804, 0.029806137, 0.01684492, 0.016206821, 0.01591479]",2024-06-26
1,id01,2024-06-26 13:02:10,"[Music, Vehicle, Bell, Hiss, Jingle bell, Chime, Car, Motor vehicle (road), Bicycle, Chink, clink]","[0.62307084, 0.021118319, 0.018510727, 0.013137147, 0.012887808, 0.012369333, 0.011385, 0.0107314605, 0.010630278, 0.010412726]",2024-06-26
2,id01,2024-06-26 13:04:10,"[Horse, Animal, Clip-clop, Outside, rural or natural, Livestock, farm animals, working animals, Rustling leaves, Wind, Wild animals, Snake, Bicycle]","[0.25209898, 0.24263993, 0.24169719, 0.20218423, 0.15339553, 0.09363359, 0.07172977, 0.06615251, 0.0627117, 0.05555296]",2024-06-26
3,id01,2024-06-26 13:06:10,"[Speech, Inside, large room or hall, Narration, monologue, Inside, public space, Conversation, Music, Door, Crowd, Clip-clop, Tap]","[0.93433166, 0.041821606, 0.015862875, 0.0069918185, 0.004214324, 0.0014499724, 0.0013527935, 0.0010621995, 8.6553075E-4, 8.336997E-4]",2024-06-26
4,id01,2024-06-26 13:08:10,"[Speech, Inside, small room, Inside, large room or hall, Conversation, Outside, urban or manmade, Outside, rural or natural, Narration, monologue, Inside, public space, Animal, Child speech, kid speaking]","[0.8935082, 0.019385122, 0.01352074, 0.0076918676, 0.005634144, 0.005461581, 0.0052098036, 0.003650546, 0.0031810105, 0.002692297]",2024-06-26


### 학습 & 테스트 데이터

In [None]:
train2 = train.merge(ch2025_mWifi_daily,on=['subject_id','lifelog_date'],how='left')
train2.head()

Unnamed: 0,subject_id,sleep_date,lifelog_date,Q1,Q2,Q3,S1,S2,S3,scan_count,unique_bssid_count,avg_rssi,max_rssi,min_rssi,strong_signal_ratio,empty_scan_count,top_bssid,top_bssid_count,hour_span_minutes
0,id01,2024-06-27,2024-06-26,0,0,0,0,0,1,69.0,393.0,-70.1964,-19.0,-91.0,0.2309,0.0,86:25:19:9f:9b:be,19.0,716.0
1,id01,2024-06-28,2024-06-27,0,0,0,0,1,1,126.0,357.0,-69.0629,-26.0,-92.0,0.2701,0.0,04:09:a5:3a:c8:6a,54.0,1430.0
2,id01,2024-06-29,2024-06-28,1,0,0,1,1,1,118.0,376.0,-69.0941,-26.0,-92.0,0.2594,0.0,04:09:a5:3a:c8:6a,47.0,1430.0
3,id01,2024-06-30,2024-06-29,1,0,1,2,0,0,134.0,258.0,-67.7897,-24.0,-91.0,0.3063,0.0,04:09:a5:3a:c8:6a,117.0,1420.0
4,id01,2024-07-01,2024-06-30,0,1,1,1,1,1,108.0,242.0,-68.2999,-23.0,-90.0,0.2946,0.0,04:09:a5:3a:c8:6a,70.0,1310.0


In [None]:
test2 = test.merge(ch2025_mWifi_daily,on=['subject_id','lifelog_date'],how='left')
test2.head()

Unnamed: 0,subject_id,sleep_date,lifelog_date,Q1,Q2,Q3,S1,S2,S3,scan_count,unique_bssid_count,avg_rssi,max_rssi,min_rssi,strong_signal_ratio,empty_scan_count,top_bssid,top_bssid_count,hour_span_minutes
0,id01,2024-07-31,2024-07-30,0,0,0,0,0,0,115.0,910.0,-73.8566,-29.0,-93.0,0.1451,0.0,86:25:19:9f:9b:be,42.0,1400.0
1,id01,2024-08-01,2024-07-31,0,0,0,0,0,0,135.0,492.0,-69.2624,-33.0,-93.0,0.2579,0.0,04:09:a5:3a:c8:6a,48.0,1430.0
2,id01,2024-08-02,2024-08-01,0,0,0,0,0,0,124.0,397.0,-72.1787,-33.0,-92.0,0.2053,0.0,86:25:19:9f:9b:be,49.0,1428.0
3,id01,2024-08-03,2024-08-02,0,0,0,0,0,0,132.0,366.0,-69.7404,-27.0,-93.0,0.2188,0.0,04:09:a5:3a:c8:6a,53.0,1430.0
4,id01,2024-08-04,2024-08-03,0,0,0,0,0,0,107.0,312.0,-69.6403,-21.0,-93.0,0.2214,0.0,04:09:a5:3a:c8:6a,77.0,1420.0
