In [10]:
import json
import pandas as pd
from glob import glob
from datetime import datetime

In [11]:
def clean_csv(filename):
    df = pd.read_csv(filename)
    df = df.iloc[:,:-1]

    room_no = filename.split('/')[-1].split('-')[0]
    area = float(df.iloc[1,1])
    doors_area = float(df.iloc[2, 1])
    panels_area = float(df.iloc[3, 1])
    labels = df.iloc[4].tolist()

    for i in range(len(labels)):
        label = labels[i]
        label = label.lower()
        label = '_'.join(label.split())
        labels[i] = label
    
    df = df.drop(range(5))
    df.columns = labels

    hours = []
    days = []
    months = []

    for i in range(len(df)):
        dt = datetime.strptime(df.iloc[i]["timestamp"], "%m/%d/%y %H:%M")
        hours.append(dt.hour)
        days.append(dt.day)
        months.append(dt.month)

    df.insert(1, "room_id", [room_no for _ in range(len(df))])
    df.insert(2, "area", [area for _ in range(len(df))])
    df.insert(3, "doors_area", [doors_area for _ in range(len(df))])
    df.insert(4, 'panels_area', [panels_area for _ in range(len(df))])
    df.insert(5, 'hours', hours)
    df.insert(6, 'days', days)
    df.insert(7, 'months', months)

    na_columns = df.columns[df.isna().any()].tolist()

    df = df.dropna(axis="columns")
    df.to_csv(filename, index=False)
    
    return na_columns

In [12]:
filenames = glob("./DTFL - Data/*.csv")
na_hashmap = {}

for filename in filenames:
    na_columns = clean_csv(filename)
    na_hashmap[filename] = na_columns

with open('data_info.json', 'w') as fp:
    json.dump(na_hashmap, fp)

In [13]:
co2_count = 0
hum_count = 0
complete = 0

for nas in na_hashmap.values():
    if len(nas) == 1:
        co2_count += 1
    elif len(nas) == 2:
        co2_count += 1
        hum_count += 1
    else:
        complete += 1

In [14]:
co2_count, hum_count, complete

(56, 8, 10)