In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from pptx import Presentation
from pptx.util import Inches

In [47]:
# Define the exponential function
def exponential_function(x, a, b):
    return a * np.exp(b * x)

def linear_function(x,a,b):
    return a*x + b

def save_plot_as_image(time_column, pm25_column, fitted_pm25, params, filename):
    plt.figure(figsize=(10, 6))
    plt.scatter(time_column, pm25_column, label='Original Data')
    plt.plot(time_column, fitted_pm25, label='Fitted Curve', color='red')
    plt.xlabel('Time')
    plt.ylabel('PM 2.5')
    plt.title('Fitted Curve of PM 2.5 Data' + filename)
    plt.legend()
#     image_path = f"{filename}.png"
#     plt.savefig(image_path)
    plt.show()
#     return image_path

def cal_stats(time_column, pm25_column,ln_pm25_column):
    # Perform curve fitting using curve_fit
    params, covariance = curve_fit(exponential_function, time_column, pm25_column)
    
    params_l, covariance_l = curve_fit(linear_function, time_column, ln_pm25_column)

    # Calculate the fitted values using the obtained parameters
    fitted_pm25 = exponential_function(time_column, *params)
    
    liner_fiited_pm25 = linear_function(time_column, *params_l)

    # Calculate the R-squared value
    r_squared = r2_score(pm25_column, fitted_pm25)
    
    r_squared_l = r2_score(ln_pm25_column, liner_fiited_pm25)

#     save_plot_as_image(time_column,pm25_column,fitted_pm25,params,'Exponetial//45//'+image_name+ ' expoential')
#     save_plot_as_image(time_column,ln_pm25_column,liner_fiited_pm25,params_l,'Linear//45//' + image_name + ' linear')
    return abs(params[1]),r_squared,abs(params_l[0]),r_squared_l

def exponetial_fit_all(file_path, file_list,image_name = ''):
    df = pd.read_csv(file_path)
    time_column = df['Time In Hour']
    pm25_column = df['PM 2.5']
    ln_pm25_column = df['LN(PM 2.5)']
        
    params,r_squared,params_l,r_squared_l = cal_stats(time_column, pm25_column,ln_pm25_column)
 
    if (r_squared < 0.9 and  r_squared_l < 0.9) or abs(params-params_l) > 1:
        file_list.append(file_path)
        return 0,0,0,0   
            
    return params,r_squared,params_l,r_squared_l


In [51]:
def exponetial_fit(file_path, file_list,image_name = ''):
    df = pd.read_csv(file_path)
    time_column = df['Time In Hour']
    pm25_column = df['PM 2.5']
    ln_pm25_column = df['LN(PM 2.5)']
    
    tolerance = 20
    df['PercentageChange'] = df['PM 2.5'].pct_change() * 100 
        
    df['WithinTolerance'] = (df['PercentageChange'].shift(-1) <= tolerance) | df['PercentageChange'].isna()
    
    index_to_slice = df[df['WithinTolerance'] == False].index[0]
    length = len(pm25_column)
#     print(index_to_slice, ' ::', len(pm25_column), '::', file_path)
    
    pm25_column = df.loc[:index_to_slice, 'PM 2.5']
    time_column = df.loc[:index_to_slice, 'Time In Hour']
    ln_pm25_column = df.loc[:index_to_slice, 'LN(PM 2.5)']
    
    params,r_squared,params_l,r_squared_l = cal_stats(time_column, pm25_column,ln_pm25_column)
 
    if (r_squared < 0.9 and  r_squared_l < 0.9) or abs(params-params_l) > 1:
        file_list.append(file_path)
#         return 0,0,0,0,0,0
        
                   
    return params,r_squared,params_l,r_squared_l,index_to_slice,length

In [49]:
folders = ['C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\CBHS\\Processed\\',
           'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\']
cr = [ '12-23', '1-13', '1-27', '2-10', '2-24', '3-10', '3-24', '4-7','4-21','4-28', '5-5', '5-12','5-19', '2-16', '2-20', '2-22' ]

ex_ach_list = []
ln_ach_list = []
ex_r_list = []
ln_r_list = []
date_list = []
id_list = []

ex_ach_list_w = []
ln_ach_list_w = []
ex_r_list_w = []
ln_r_list_w = []
date_list_w = []
id_list_w = []

file_list = []
for folder in folders:
    subfolders = [f for f in os.listdir(folder) if os.path.isdir(os.path.join(folder, f))]

    
    for subfolder in subfolders:
        path = folder + '\\' +subfolder 
        files = [f for f in os.listdir(path) if f.lower().endswith(('.csv'))]
        for file in files:
            exACH,exR,lnACH,lnR = exponetial_fit_all(path+'\\'+file, file_list)
#             print(path)
            date = file.split('-')[3] + '-' + file.split('-')[4].split('.')[0]
            date_string = date + ('-2023' if file.split('-')[3] != '12' else '-2022')
            date_object = pd.to_datetime(date_string, format='%m-%d-%Y')
            if date in cr:
                ex_ach_list.append(exACH)
                ln_ach_list.append(lnACH)
                ex_r_list.append(exR)
                ln_r_list.append(lnR)
                date_list.append(date_object)
                id_list.append(subfolder)
            else: 
                ex_ach_list_w.append(exACH)
                ln_ach_list_w.append(lnACH)
                ex_r_list_w.append(exR)
                ln_r_list_w.append(lnR)
                date_list_w.append(date_object)
                id_list_w.append(subfolder)
                
data = {
  'ID' : id_list,
  'ACH Exponetial Fit' : ex_ach_list ,
  'R Sqaure Exponetial Fit' : ex_r_list,
  'ACH Linear Fit' : ln_ach_list,
  'R Square Linear Fit' : ln_r_list,
  'Date' : date_list,
  'Diff ACH' : np.abs(np.array(ex_ach_list)-np.array(ln_ach_list))
} 

data_w = {
  'ID' : id_list_w,
  'ACH Exponetial Fit' : ex_ach_list_w,
  'R Sqaure Exponetial Fit' : ex_r_list_w,
  'ACH Linear Fit' : ln_ach_list_w,
  'R Square Linear Fit' : ln_r_list_w,
  'Date' : date_list_w,
  'Diff ACH' : np.abs(np.array(ex_ach_list_w)-np.array(ln_ach_list_w))
}                 
        
df = pd.DataFrame(data)
df_w = pd.DataFrame(data_w)

df.to_csv('ACH with C-R.csv',index=False)

df_w.to_csv('ACH without C-R.csv',index=False)




In [52]:
ex_ach_list = []
ln_ach_list = []
ex_r_list = []
ln_r_list = []
date_list = []
id_list = []
slice_list = []
len_list = []

ex_ach_list_w = []
ln_ach_list_w = []
ex_r_list_w = []
ln_r_list_w = []
date_list_w = []
id_list_w = []
slice_list_w = []
len_list_w = []

b = []

for file in file_list:
    exACH,exR,lnACH,lnR,slc,l = exponetial_fit(file, b)
    file_name = file.split('\\')[-1]
    date = file_name.split('-')[3] + '-' + file_name.split('-')[4].split('.')[0]
    date_string = date + ('-2023' if file_name.split('-')[3] != '12' else '-2022')
    date_object = pd.to_datetime(date_string, format='%m-%d-%Y')
    
    if date in cr:
        ex_ach_list.append(exACH)
        ln_ach_list.append(lnACH)
        ex_r_list.append(exR)
        ln_r_list.append(lnR)
        date_list.append(date_object)
        slice_list.append(slc)
        len_list.append(l)
        id_list.append(file.split('\\')[-2])
    else: 
        ex_ach_list_w.append(exACH)
        ln_ach_list_w.append(lnACH)
        ex_r_list_w.append(exR)
        ln_r_list_w.append(lnR)
        date_list_w.append(date_object)
        slice_list_w.append(slc)
        len_list_w.append(l)
        id_list_w.append(file.split('\\')[-2])


data = {
  'ID' : id_list,
  'ACH Exponetial Fit' : ex_ach_list ,
  'R Sqaure Exponetial Fit' : ex_r_list,
  'ACH Linear Fit' : ln_ach_list,
  'R Square Linear Fit' : ln_r_list,
  'Date' : date_list,
  'Diff ACH' : np.abs(np.array(ex_ach_list)-np.array(ln_ach_list)),
  'length' : len_list,
  'slice index': slice_list
} 

data_w = {
  'ID' : id_list_w,
  'ACH Exponetial Fit' : ex_ach_list_w,
  'R Sqaure Exponetial Fit' : ex_r_list_w,
  'ACH Linear Fit' : ln_ach_list_w,
  'R Square Linear Fit' : ln_r_list_w,
  'Date' : date_list_w,
  'Diff ACH' : np.abs(np.array(ex_ach_list_w)-np.array(ln_ach_list_w)),
  'length' : len_list_w,
  'slice index': slice_list_w
}                 
        
df = pd.DataFrame(data)
df_w = pd.DataFrame(data_w)

df.to_csv('ACH with C-R-High-R.csv',index=False)

df_w.to_csv('ACH without C-R-High-R.csv',index=False)

In [None]:
files = [
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-2-3.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-2-16.csv',
   
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-2-23.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-2-24.csv',
    
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-3-09.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-3-10.csv',
    
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-3-23.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-3-24.csv',
    
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-4-06.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-4-07.csv',
    
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-4-20.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-4-21.csv',
    
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-5-4.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-5-5.csv',
    
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-5-11.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-5-12.csv',
    
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-5-18.csv',
    'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\45\\MOD-PM-00566-5-19.csv',   
]

dates = [
    '2-3',
    '2-16',
    '2-23',
    '2-24',
    '3-9',
    '3-10',
    '3-23',
    '3-24',
    '4-6',
    '4-7',
    '4-20',
    '4-21',
    '5-4',
    '5-5',
    '5-11',
    '5-12',
    '5-18',
    '5-19'
]

cr = [ '12-23', '1-13', '1-27', '2-10', '2-24', '3-10', '3-24', '4-7','4-21','4-28', '5-5', '5-12','5-19', '2-16', '2-20', '2-22' ]

ex_ach_list = []
ln_ach_list = []
ex_r_list = []
ln_r_list = []
date_list = []

ex_ach_list_w = []
ln_ach_list_w = []
ex_r_list_w = []
ln_r_list_w = []
date_list_w = []
for index,file in enumerate(files):
    if os.path.exists(file):
        print('File name:- ', file.split('\\')[-1])
        exACH,exR,lnACH,lnR = exponetial_fit(file,'ID-45 D-'+dates[index])
        
        if dates[index] in cr:
            ex_ach_list.append(exACH)
            ln_ach_list.append(lnACH)
            ex_r_list.append(exR)
            ln_r_list.append(lnR)
            date_list.append(dates[index])
        else: 
            ex_ach_list_w.append(exACH)
            ln_ach_list_w.append(lnACH)
            ex_r_list_w.append(exR)
            ln_r_list_w.append(lnR)
            date_list_w.append(dates[index])
    else:
        print('************************************************************************')
        print('Path not exits:- ', file.split('\\')[-1])
        print('************************************************************************')

data = {
  'ID' : [47 for i in range(len(ex_ach_list))],
  'ACH Exponetial Fit' : [abs(number) for number in ex_ach_list] ,
  'R Sqaure Ex' : ex_r_list,
  'ACH Linear Fir' : [abs(number) for number in ln_ach_list],
  'R Square Ln' : ln_r_list,
  'date_list' : date_list  
} 

data_w = {
  'ID' : [47 for i in range(len(ex_ach_list_w))],
  'ACH Exponetial Fit' : [abs(number) for number in ex_ach_list_w],
  'R Sqaure Ex' : ex_r_list_w,
  'ACH Linear Fir' : [abs(number) for number in ln_ach_list_w],
  'R Square Ln' : ln_r_list_w,
  'date_list' : date_list_w  
} 

# df = pd.DataFrame(data)
# df_w = pd.DataFrame(data_w)

# if os.path.exists('C-R ACH.csv'):
#     cr_ach = pd.read_csv('C-R ACH.csv')
# else:
#     cr_ach = pd.DataFrame()
# merged_df = pd.concat([cr_ach,df], axis=0)
# merged_df.to_csv('C-R ACH.csv',index=False)

# if os.path.exists('ACH.csv'):
#     ach = pd.read_csv('ACH.csv')
# else:
#     ach = pd.DataFrame()

# merged_df = pd.concat([ach,df_w], axis=0)
# merged_df.to_csv('ACH.csv',index=False)        

In [123]:
folder = 'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\CBHS\\Processed\\'


subfolders = [f for f in os.listdir(folder) if os.path.isdir(os.path.join(folder, f))]

def save_plot_as_image(time_column, pm25_column, fitted_pm25, params, filename,path):
    plt.figure(figsize=(10, 6))
    plt.scatter(time_column, pm25_column, label='Original Data')
    plt.plot(time_column, fitted_pm25, label='Fitted Curve', color='red')
    plt.xlabel('Time')
    plt.ylabel('PM 2.5')
    plt.title('Fitted Curve of PM 2.5 Data ' + filename)
    plt.legend()
    image_path = f"{path}\\{filename}.png"
    plt.savefig(image_path)
    plt.close()
    return image_path

for subfolder in subfolders:
    path = folder + '\\' +subfolder 
    files = [f for f in os.listdir(path) if f.lower().endswith(('.csv'))]
    for file in files:
        date = file.split('-')[3] + '-' + file.split('-')[4].split('.')[0] + ('-2023' if file.split('-')[3] != '12' else '-2022')
        image_name = 'ID - {} Date - {}'.format(subfolder, date) 
        df = pd.read_csv(path+'\\'+file)
        
        
        time_column = df['Time In Hour']
        pm25_column = df['PM 2.5']
        ln_pm25_column = df['LN(PM 2.5)']

        # Perform curve fitting using curve_fit
        params, covariance = curve_fit(exponential_function, time_column, pm25_column)
        params_l, covariance_l = curve_fit(linear_function, time_column, ln_pm25_column)

        # Calculate the fitted values using the obtained parameters
        fitted_pm25 = exponential_function(time_column, *params)
        liner_fiited_pm25 = linear_function(time_column, *params_l)

        save_plot_as_image(time_column,pm25_column,fitted_pm25,params,image_name, 'Exponential')
        save_plot_as_image(time_column,ln_pm25_column,liner_fiited_pm25,params_l,image_name, 'Linear')


In [4]:
file = 'C:\\Users\\vrana7\\Desktop\\CIA\\Data Visualization\\data\\ach-no-cr.csv'
df = pd.read_csv(file)
ID = {
10:'A1',
14:'A2',
15:'A3',
16:'A4',
18:'A5',
19:'A6',
20:'A7',
22:'A8',
33:'A9',
34:'A10',
39:'B1',
40:'B2',
41:'B3',
42:'B4',
43:'B5',
44:'B6',
45:'B7',
46:'B8',
47:'B9'
}
df['Id']= df['ID'].replace(ID)
df.to_csv(file,index=False)