In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from pptx import Presentation
from pptx.util import Inches

In [35]:
def linear_function(x,ach,interSection):
    return ach*x + interSection

def linear_fit(file_path, tolerance=0, count=0):
    df = pd.read_csv(file_path)
    time_column = df['Time In Hour']
    pm25_column = np.log(df['PM 2.5'])
    
    index_to_slice = 0
    length = len(df)
    if count != 0:
        df['PercentageChange'] = df['PM 2.5'].pct_change() * 100 
        df['WithinTolerance'] = (df['PercentageChange'].shift(-1) <= tolerance) | df['PercentageChange'].isna()
        index_to_slice = df[df['WithinTolerance'] == False].index[0]
        
        pm25_column = np.log(df.loc[:index_to_slice, 'PM 2.5'])
        time_column = df.loc[:index_to_slice, 'Time In Hour']
  
    params, covariance = curve_fit(linear_function, time_column, pm25_column)
    fiited_pm25 = linear_function(time_column, *params)
    r_squared = r2_score(pm25_column, fiited_pm25)
    
    if r_squared < 0.95 and count == 0:
        return linear_fit(file_path, 70, count+1)
    elif r_squared < 0.95 and count == 1:
        return linear_fit(file_path, 60, count+1)
    elif r_squared < 0.95 and count == 2:
        return linear_fit(file_path, 50, count+1)
    elif r_squared < 0.95 and count == 3:
        return linear_fit(file_path, 40, count+1)
    elif r_squared < 0.95 and count == 4:
        return linear_fit(file_path, 30, count+1)
    elif r_squared < 0.95 and count == 5:
        return linear_fit(file_path, 20, count+1)
    
    return abs(params[0]),r_squared,index_to_slice,length

In [36]:
folders = ['C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\CBHS\\Processed\\',
           'C:\\Users\\vrana7\\Documents\\CIA\\Humidifier-Experiment\\RDRK\\Processed\\']
cr = [ '12-23', '1-13', '1-27', '2-10', '2-24', '3-10', '3-24', '4-7','4-21','4-28', '5-5', '5-12','5-19', '2-16', '2-20', '2-22' ]

cr_ach = []
cr_error = []
date_list = []
id_list = []
index = []
length = []

no_cr_ach = []
no_cr_error = []
date_list_w = []
id_list_w = []
index_w = []
length_w = []

file_list = []
for folder in folders:
    subfolders = [f for f in os.listdir(folder) if os.path.isdir(os.path.join(folder, f))]

    
    for subfolder in subfolders:
        path = folder + '\\' +subfolder 
        files = [f for f in os.listdir(path) if f.lower().endswith(('.csv'))]
        for file in files:
            ach,error,index_to_slice,total_length = linear_fit(path+'\\'+file)

            date = file.split('-')[3] + '-' + file.split('-')[4].split('.')[0]
            date_string = date + ('-2023' if file.split('-')[3] != '12' else '-2022')
            date_object = pd.to_datetime(date_string, format='%m-%d-%Y')
            
            if date in cr:
                cr_ach.append(ach)
                cr_error.append(error)
                date_list.append(date_object)
                id_list.append(subfolder)
                index.append(index_to_slice)
                length.append(total_length)
            else: 
                no_cr_ach.append(ach)
                no_cr_error.append(error)
                date_list_w.append(date_object)
                id_list_w.append(subfolder)
                index_w.append(index_to_slice)
                length_w.append(total_length)
            
            if error < 0.9:
                file_list.append(path+'\\'+file)
                
data = {
  'ID' : id_list,
  'ACH Linear Fit' : cr_ach,
  'R Square Linear Fit' : cr_error,
  'Date' : date_list,
  'Total Data Point' : length,
  'Index to slice' : index
} 

data_w = {
  'ID' : id_list_w,
  'ACH Linear Fit' : no_cr_ach,
  'R Square Linear Fit' : no_cr_error,
  'Date' : date_list_w,
  'Total Data Point' : length_w,
  'Index to slice' : index_w
}                 
        
df = pd.DataFrame(data)

df_w = pd.DataFrame(data_w)

df.to_csv('C:\\Users\\vrana7\\Desktop\\CIA\\Data Visualization\\data\\ach-cr-1.csv',index=False)
df_w.to_csv('C:\\Users\\vrana7\\Desktop\\CIA\\Data Visualization\\data\\ach-no-cr-1.csv',index=False)

