In [None]:
import numpy as np 
import pandas as pd 
from statistics import mean
import math
import requests
import json 
from datetime import datetime,timedelta
import os
import math
import sys
from scipy.optimize import minimize_scalar

In [None]:
# File paths to merge for given month
files = [
    'MOD-PM-00562-ID-CLASSROOM.csv' 
]

# Classroom numbers for merging
classrooms=  ['clasroom numbers']

# Classroom Ids for merging. There are 50 classrooms. 38 from one school and 12 from other school.
classroomIds = [i for i in range(1,39)]

new_df = pd.DataFrame(columns=['Timestamp', 'PM1','PM25','PM10','sample_rh','sample_temp','Id','classroom'])

for f in range(len(files)):
    file = pd.read_csv(files[f])
    file.insert(6, "Id", classroomIds[f])
    file.insert(7, "classroom", classrooms[f])
    new_df = pd.concat([new_df,file])

# Saving the file 
new_df.to_csv('Month_Name.csv',index=False)

In [None]:
# This function takes csv file as input which genertaed in above code and calculate monthly average for all classroom IDs.
def monthly_avg(filePath):
    file = pd.read_csv(filePath)
    classDictPm25 = {}
    classDictPm1 = {}
    classDictPm10 = {}
    for ind in file.index:
        key = file['Id'][ind]

        pm25List = classDictPm25.get(key,[])
        pm25List.append(file['PM25'][ind])
        classDictPm25[key] = pm25List

        pm1List = classDictPm1.get(key,[])
        pm1List.append(file['PM1'][ind])
        classDictPm1[key] = pm1List

        pm10List = classDictPm10.get(key,[])
        pm10List.append(file['PM10'][ind])
        classDictPm10[key] = pm10List
    for k in classDictPm25:
        print(k)
    print('************************** PM 1 Averages **************************')
    for k in classDictPm1:
        m = np.nanmean(classDictPm1[k])
        if math.isnan(m):
            print(k)
            print(classDictPm1[k])
        print(m)
 
    print('************************** PM 25 Averages **************************')
    for k in classDictPm25:
        m = np.nanmean(classDictPm25[k])
        if math.isnan(m):
            print(k)
            print(classDictPm25[k])
        print(m)
    print('************************** PM 10 Averages **************************')
    for k in classDictPm10:
        m = np.nanmean(classDictPm10[k])
        if math.isnan(m):
            print(k)
            print(classDictPm10[k])
        print(m)



In [None]:
# This function takes csv file as input, it will generate daily average of classroom based on date.
def daily_avg(filePath):
    file = pd.read_csv(filePath)
    classDictPm25 = {}
    classDictPm1 = {}
    classDictPm10 = {}
    for ind in file.index:
        key = datetime.datetime.strptime(file['Timestamp'][ind],"%Y-%m-%dT%H:%M:%SZ").date()

        pm25List = classDictPm25.get(key,[])
        pm25List.append(file['PM25'][ind])
        classDictPm25[key] = pm25List

        pm1List = classDictPm1.get(key,[])
        pm1List.append(file['PM1'][ind])
        classDictPm1[key] = pm1List

        pm10List = classDictPm10.get(key,[])
        pm10List.append(file['PM10'][ind])
        classDictPm10[key] = pm10List
    for k in classDictPm1:
        print(k)
    print('************************** PM 1 Averages **************************')
    pm1 = []
    pm25 = []
    pm10 = []
    for k in classDictPm1:
        m = np.nanmean(classDictPm1[k])
        if math.isnan(m):
            print(k)
            print(classDictPm1[k])
        print(m)
 
    print('************************** PM 25 Averages **************************')
    for k in classDictPm25:
        m = np.nanmean(classDictPm25[k])
        if math.isnan(m):
            print(k)
            print(classDictPm25[k])
        print(m)
        
    print('************************** PM 10 Averages **************************')
    for k in classDictPm10:
        m = np.nanmean(classDictPm10[k])
        if math.isnan(m):
            print(k)
            print(classDictPm10[k])
        print(m)

    return pm1,pm25,pm10

In [None]:
# This function takes csv file path to read data, dates which needs to be removed from the data and save path to where new cleaned data needs to be saved. 
def remove_dates(filePath,dates, savePath):
    file = pd.read_csv(filePath)
    
    pm1List = []
    pm25List = []
    pm10List = []
    timestampList = []
    sample_temp = []
    sample_rh = []
    
    for ind in file.index:
            currentDate = datetime.datetime.strptime(file['Timestamp'][ind],"%Y-%m-%dT%H:%M:%S").date()
            if currentDate not in dates:
                timestampList.append(file['Timestamp'][ind])
                sample_rh.append(file['sample_rh'][ind])
                sample_temp.append(file['sample_temp'][ind])
                pm1List.append(file['PM1'][ind])
                pm25List.append(file['PM25'][ind])
                pm10List.append(file['PM10'][ind])
                
    newFile = pd.DataFrame({'Timestamp':timestampList,
                            'PM1' : pm1List,
                            'PM25': pm25List,
                            'PM10': pm10List,
                            'sample_rh' :sample_rh,
                            'sample_temp': sample_temp})
    newFile.to_csv(savePath,index=False)

In [None]:
# This function takes CSV path to save data, cleans the data and save the data to save path.
def clean_data(path,savePath):
    df = pd.read_csv(path)
    sv = pd.DataFrame(columns=['Timestamp', 'PM1','PM25','PM10','sample_rh','sample_temp'])
        
   
    for ind in df.index:
        dateTime = datetime.strptime(df['Timestamp'][ind],"%Y-%m-%dT%H:%M:%S")
        
        if dateTime.weekday() == 4:
            date = dateTime.replace(hour=14, minute=0, second=0)
            if dateTime <= date: 
                sv.loc[len(sv)] = {'Timestamp': df['Timestamp'][ind], 'PM1': df['PM1'][ind], 'PM25': df['PM25'][ind],
                                   'PM10': df['PM10'][ind],'sample_rh': df['sample_rh'][ind], 'sample_temp': df['sample_temp'][ind]}
                
        else:
            date = dateTime.replace(hour=15, minute=0, second=0)
            if dateTime <= date: 
                sv.loc[len(sv)] = {'Timestamp': df['Timestamp'][ind], 'PM1': df['PM1'][ind], 'PM25': df['PM25'][ind],
                                   'PM10': df['PM10'][ind],'sample_rh': df['sample_rh'][ind], 'sample_temp': df['sample_temp'][ind]}

    sv.to_csv(savePath,index=False) 
            

In [None]:
# This function is used to calculate the ACH value from CO2 data.
def CO2_ACH(path):
    X = 0
    df = pd.read_csv(path)
    df['time'] = range(0, 6 * len(df), 6)
    outside_co2 = 415
    val = 0
    df['new vals'] = None
    fix = df.iloc[:, 1].iloc[0]

    while True:

        for i in range(0,len(df)):
            v = outside_co2 - ((outside_co2 - fix)*math.exp(-val*df['time'].iloc[i]))

            df['new vals'].iloc[i] = v

        array1 = df.iloc[:, 1].values
        array2 = df['new vals'].values
        Y = np.sum((array1 - array2) ** 2)        
        
        if X == 0:
            X = Y
            val = val + 0.00001
        elif X > Y:
            val = val + 0.00001
            X = Y
        else:
            break
    return val * 60
