# Data Analysis

<p>This file is use to analyse the data files generated using the OpenStuido software as a part of DMX9213 research. The below steps will be followed for the data analysis.</p>

- Step 1: Open .csv file and preprocess the data.
- Step 2: Calculate the PMV values for each hour.
- Step 3: Identify the month with highest cumilative PMV values averaged over 30 days from the baselins.
- Step 4: Check if the months are different from each baselines. If different, use the month that selected by majority of models.
- Step 5: Average the temperature, humidity and PMV values for the selected month. Average values should represent 24 hour variation of temperature and humidity for the selected month.
- Step 6: Generate graphs to compair the temperature, humidity and PMV values for the baseline and the passive cooling implimented models. The comparison should be performend for the selected month.


### General

This section will be used to import the data necessary for all simulation.

In [15]:
%%time 

# install packages
%pip install pandas

CPU times: total: 15.6 ms
Wall time: 2.76 s



[notice] A new release of pip is available: 23.2.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip





In [30]:
# import packages
import math
import pandas as pd
import os

In [17]:
# paths
BASELINE_PATH = "input/baseline/"
IMPROVED_PATH = "input/improved/"
baseline_csv_files = os.listdir(BASELINE_PATH)
improved_csv_files = os.listdir(IMPROVED_PATH)

In [18]:
# import the test file
TEST_FILE_PATH = "input/test.csv"

In [36]:
# parameter to run the test cells
run_test = True

### Step 1


Open .csv file and preprocess the data.

In [50]:
def preprocess_data(file_path: str) -> pd.DataFrame:

    df = pd.read_csv(file_path)

    # remove columns relevent to plenum space
    columns_to_drop = [column_name for column_name in df.columns if "PLENUM" in column_name]
    df.drop(columns=columns_to_drop, inplace=True)

    # rename the columns
    for column_name in df.columns:
        df.rename(columns={column_name: column_name.replace('LIVINGTHERMALZONE:Zone', '').strip()}, inplace=True)

    return df

In [76]:
if run_test:
    step_1_test_df = preprocess_data(TEST_FILE_PATH)
    display(step_1_test_df.head())

Unnamed: 0,Hourly,Air Relative Humidity[%],Air Temperature[C],Mean Radiant Temperature[C],Outdoor Air Drybulb Temperature[C],Outdoor Air Wetbulb Temperature[C]
0,2006-Jan-01 01:00:00,79.261238,27.116869,27.077136,24.642456,23.291442
1,2006-Jan-01 02:00:00,82.436045,26.61654,26.63398,23.984123,23.241227
2,2006-Jan-01 03:00:00,84.435859,26.223098,26.240367,23.842456,23.174662
3,2006-Jan-01 04:00:00,86.096696,25.895915,25.903493,23.742456,23.127976
4,2006-Jan-01 05:00:00,87.458615,25.580347,25.581124,23.584123,23.043825


### Step 2

Calculate PMV values for each hour. \
Refer to following link for more details on PMV calculation. \
https://github.com/chathusha/PmvCalculator

In [52]:
def calculate_clo(m: float, icl: float) -> float:
    """
    Calculates the clo for a given metabolic rate and clothing insulation using ASHRAE 55:2023.
    
    Args:
        m (float): metabolic rate in met unit
        icl (float): clothing insulation in clo unit
    
    Return:
        float: clo value to use in the PMV calculator
    """
    if m < 1.2:
        clo = icl

    else:
        clo = icl * (0.6 + 0.4 / m)

    return clo

In [74]:
def calculate_pmv(ta: float, tr: float, vel: float, rh: float, met: float, clo: float, wme: float) -> dict:
    """
     Calculate the PMV for given inputs using ASHRAE 55:2023. The air speed shouldn't be above 0.1 m/s.

     Args:
          ta (float): air temperature in C
          tr (float): mean radiant temperature in C
          vel(float): air speed in m/s
          rh (float): relative humidity in %
          met (float): metabolic rate in met unit
          clo (float): clothing factor in clo unit (calculated using calculate_clo method)
          wme (float): external work in met

     Return:
          dict: {
               pmv: Predicted Mean Vote (PMV),
               ppd: Predicted Precentage of Dissatisfied (PPD),
               status: status of the calculation, 0 succeed, 1 error
               }
     """
    if met > 1:
        pa = rh * 10. * math.exp(16.6536 - 4030.183 / (ta + 235.))
        icl = 0.155 * clo  # thermal insulation of the clothing in m2K/W
        m = met * 58.15  # metabolic rate in W/m2
        w = wme * 58.15  # extenal work in W/m2
        mw = m - w  # internal heat production in the humen body

        if icl <= 0.078:
            fcl = 1 + (1.29 * icl)
        else:
            fcl = 1.05 + (0.645 * icl)

        # heat transfer coefficient by forced convection
        hcf = 12.1 * math.sqrt(vel)
        taa = ta + 273.
        tra = tr + 273.
        tcla = taa + (35.5 - ta) / (3.5 * icl + 0.1)
        p1 = icl * fcl
        p2 = p1 * 3.96
        p3 = p1 * 100.
        p4 = p1 * taa
        p5 = 308.7 - 0.028 * mw + (p2 * math.pow(tra / 100, 4))
        xn = tcla / 100.
        xf = tcla / 50.
        eps = 0.00015
        n = 0

        while abs(xn - xf) > eps:
            xf = (xf + xn) / 2
            hcn = 2.38 * math.pow(abs(100. * xf - taa), 0.25)
            hc = hcf if hcf > hcn else hcn
            xn = (p5 + p4 * hc - p2 * math.pow(xf, 4)) / (100. + p3 * hc)
            n += 1
            if n > 150:
                print("Max iterations exceeded")
                return {
                    'pmv': 0,
                    'ppd': 0,
                    'status': 1
                    }

        tcl = 100. * xn - 273.

        # heat loss diff. through skin
        hl1 = 3.05 * 0.001 * (5733 - (6.99 * mw) - pa)

        # heat loss by sweating
        hl2 = 0.42 * (mw - 58.15) if (mw > 58.15) else 0

        # latent respiration heat loss
        hl3 = 1.7 * 0.00001 * m * (5867. - pa)

        # dry respiration heat loss
        hl4 = 0.0014 * m * (34. - ta)

        # heat loss by radiation
        hl5 = 3.96 * fcl * (math.pow(xn, 4) - math.pow(tra / 100., 4))

        # heat loss by convection
        hl6 = fcl * hc * (tcl - ta)

        ts = 0.303 * math.exp(-0.036 * m) + 0.028
        pmv = ts * (mw - hl1 - hl2 - hl3 - hl4 - hl5 - hl6)
        ppd = 100 - 95 * \
        math.exp(-0.03353 * math.pow(pmv, 4) - 0.2179 * math.pow(pmv, 2))

        return {
            'pmv': pmv,
            'ppd': ppd,
            'status': 0
            }

    else:
        print("ASHRAE 55:2023 calculation does not available for metabolic rate less than 1")
        return {
            'pmv': 0,
            'ppd': 0,
            'status': 1
            }

In [78]:
def add_pmv_column(df: pd.DataFrame) -> pd.DataFrame:

    df['PMV'] = df.apply(lambda x: calculate_pmv(
        ta=x['Air Temperature[C]'],
        tr=x['Mean Radiant Temperature[C]'],
        vel=0.1,
        rh=x['Air Relative Humidity[%]'],
        met=1.2,
        clo=calculate_clo(m=1.2, icl=0.36),
        wme=0
        )['pmv'], axis=1)

    return df

In [79]:
if run_test:
    step_2_test_df = add_pmv_column(step_1_test_df)
    display(step_2_test_df.head())

Unnamed: 0,Hourly,Air Relative Humidity[%],Air Temperature[C],Mean Radiant Temperature[C],Outdoor Air Drybulb Temperature[C],Outdoor Air Wetbulb Temperature[C],PMV
0,2006-Jan-01 01:00:00,79.261238,27.116869,27.077136,24.642456,23.291442,0.701558
1,2006-Jan-01 02:00:00,82.436045,26.61654,26.63398,23.984123,23.241227,0.559119
2,2006-Jan-01 03:00:00,84.435859,26.223098,26.240367,23.842456,23.174662,0.435502
3,2006-Jan-01 04:00:00,86.096696,25.895915,25.903493,23.742456,23.127976,0.331001
4,2006-Jan-01 05:00:00,87.458615,25.580347,25.581124,23.584123,23.043825,0.228766
