In [5]:
import os
import glob
import pandas as pd
from pathlib import Path

In [1]:
class Combine:

        
    '''
        BACKGROUND ON THIS CLASS:
        
        -- This class can be used to do two things:
            -- To combine multiple data files for 1 species, use the following functions:
                    -- __init__, specombine, colrename, specsv
                    -- This creates a single "one-species combo file".
            -- To combine multiple "one-species combo files" (See above) into a multiple-species combo file, use the following functions:
                    -- __init__ (see note on species arg), allcombine, alltidy, allcsv
    '''
    
    '''
        __INIT__ FUNCTION:
        
        -- Step 1 of the process, reads in data.
        -- species = what species of pollutant do you want to manage data for? (If combining multiple species, put "ALL" for this arg.) 
        -- path_in = where do the data files for species live?
        -- path_out1 = where do you want to put the individual-species csvs for all days of data? 
        -- path_out2 = if you plan on combining multiple species, where do you want to put the all-species csv? (If you don't plan on running the allcsv function, you can call this arg anything you want.)
    '''
    
    def __init__(self, species, path_in, path_out1, path_out2):
        self.species = species
        self.pathin = path_in
        self.pathout1 = path_out1
        self.pathout2 = path_out2

    '''
        SPECOMBINE FUNCTION:
        
        -- Step 2 of the process, combines all files for the 1 species into a "one-species combo file".
        
    '''
    
    def specombine(self):
        os.chdir(self.pathin)
        
        self.csv_files = glob.glob('*.{}'.format('csv'))
        self.source_files = sorted(Path(self.pathin).glob('*.csv'))
        
        self.df_append = pd.DataFrame()
        
        for file in self.source_files:
            self.df_temp = pd.read_csv(file)
            self.df_temp['source'] = file.name
            self.df_append = self.df_append.append(self.df_temp, ignore_index=True)
            
    '''
        COLRENAME FUNCTION:
        
        -- Step 3, renames a column in the one-species combo file if necessary.
    '''
    
    def colrename(self, subout, subin):
        self.df_append.columns = self.df_append.columns.str.replace(subout, subin)
        
        
    '''
        SPECSV FUNCTION:
        
        -- Step 4, outputs one-species combo file to csv.
    '''
    
    def specsv(self):
        self.csvpath = self.pathout1 + "/" + self.species + "_combined.csv"
        self.df_append.to_csv(self.csvpath)
        
    '''
        ALLCOMBINE FUNCTION:
        
        -- Step 5 (Step 2 for multi-species combo file).
        -- Combines multiple one-species combo files to make multi-species combo file.
    '''
    
    def allcombine(self):
        os.chdir(self.pathout1)
        
        
        self.ALLcsv_files = glob.glob('*.{}'.format('csv'))
        self.ALLsource_files = sorted(Path(self.pathout1).glob('*.csv'))
        
        self.ALLdf_append = pd.DataFrame()
        
        for file in self.ALLsource_files:
            self.ALLdf_temp = pd.read_csv(file)
            self.ALLdf_temp['species'] = file.name
            self.ALLdf_append = self.ALLdf_append.append(self.ALLdf_temp, ignore_index=True)
            
        
    '''
        ALLTIDY FUNCTION:
        
        -- Step 6 (Step 3 for multi-species combo file).
        -- Tidies dataset.
    '''
    
    def alltidy(self):
        self.ALLdf_append.rename(columns={'x':'site'}, inplace=True)
        self.ALLdf_append = self.ALLdf_append.iloc[:, 1:]
        self.ALLdf_append[["source1","source2","source3","source4"]] = self.ALLdf_append["source"].str.split('_',expand=True)
        self.ALLdf_append[["date","source4"]] = self.ALLdf_append["source4"].str.split('.',expand=True)
        self.ALLdf_append[["species","end"]] = self.ALLdf_append["species"].str.split('_',expand=True)
        self.ALLdf_append = self.ALLdf_append.drop(["source1","source2","source3","source4","end"], axis=1)
        
        self.spec_cols = [col for col in self.ALLdf_append.columns if 'spec_' in col]
        self.id_cols = [col for col in self.ALLdf_append.columns if 'spec_' not in col]
        self.ALLdf_append = pd.melt(self.ALLdf_append, id_vars=self.id_cols, value_vars=self.spec_cols)
        
        self.ALLdf_append['variable'] = self.ALLdf_append['variable'].str.replace("spec_T", "")
        self.ALLdf_append.rename(columns={'variable':'hour'}, inplace=True)
        self.ALLdf_append.rename(columns={'value':'concentration'}, inplace=True)
        
    '''
        ALLCSV FUNCTION:
        
        -- Step 7 (Step 4 for multi-species combo file).
        -- Outputs multi-species combo file to csv.
    '''
    
    def allcsv(self):
        self.ALLcsvpath = self.pathout2 + "/" + self.species + "_combined.csv"
        self.ALLdf_append.to_csv(self.ALLcsvpath)

In [194]:
COcombine = Combine(species = "CO", 
                    path_in = "/nas/longleaf/home/adolwick/Output_CSVs/CO",
                    path_out1 = "/nas/longleaf/home/adolwick/Output_CSVs/combined",
                    path_out2 = "/nas/longleaf/home/adolwick/Output_CSVs/combined/ALL")
NOcombine = Combine(species = "NO", 
                    path_in = "/nas/longleaf/home/adolwick/Output_CSVs/NO",
                    path_out1 = "/nas/longleaf/home/adolwick/Output_CSVs/combined",
                    path_out2 = "/nas/longleaf/home/adolwick/Output_CSVs/combined/ALL")
O3combine = Combine(species = "O3", 
                    path_in = "/nas/longleaf/home/adolwick/Output_CSVs/O3",
                    path_out1 = "/nas/longleaf/home/adolwick/Output_CSVs/combined",
                    path_out2 = "/nas/longleaf/home/adolwick/Output_CSVs/combined/ALL")
NO2combine = Combine(species = "NO2", 
                    path_in = "/nas/longleaf/home/adolwick/Output_CSVs/NO2",
                    path_out1 = "/nas/longleaf/home/adolwick/Output_CSVs/combined",
                    path_out2 = "/nas/longleaf/home/adolwick/Output_CSVs/combined/ALL")
SO2combine = Combine(species = "SO2", 
                    path_in = "/nas/longleaf/home/adolwick/Output_CSVs/SO2",
                    path_out1 = "/nas/longleaf/home/adolwick/Output_CSVs/combined",
                    path_out2 = "/nas/longleaf/home/adolwick/Output_CSVs/combined/ALL")
ALLcombine = Combine(species = "ALL", 
                    path_in = "/nas/longleaf/home/adolwick/Output_CSVs/combined",
                    path_out1 = "/nas/longleaf/home/adolwick/Output_CSVs/combined",
                    path_out2 = "/nas/longleaf/home/adolwick/Output_CSVs/combined/ALL")

COcombine.specombine()
NOcombine.specombine()
O3combine.specombine()
NO2combine.specombine()
SO2combine.specombine()

O3combine.df_append

Unnamed: 0,x,Lat,Lon,O3_T0,O3_T1,O3_T2,O3_T3,O3_T4,O3_T5,O3_T6,...,O3_T15,O3_T16,O3_T17,O3_T18,O3_T19,O3_T20,O3_T21,O3_T22,O3_T23,source
0,0,34.311798,-79.408539,0.017219,0.015469,0.013620,0.011783,0.009890,0.008464,0.009457,...,0.036316,0.037223,0.036859,0.035112,0.032097,0.027198,0.022723,0.020858,0.020588,O3_data_24hr_20130601.csv
1,1,34.417950,-79.383331,0.017266,0.015324,0.013123,0.010585,0.008716,0.007182,0.008983,...,0.036405,0.037088,0.037238,0.035530,0.032291,0.027430,0.022796,0.020883,0.020501,O3_data_24hr_20130601.csv
2,2,34.397034,-79.254700,0.017226,0.015218,0.013085,0.011680,0.009872,0.008439,0.009371,...,0.036476,0.036946,0.036620,0.035013,0.031536,0.026322,0.021979,0.021050,0.020481,O3_data_24hr_20130601.csv
3,3,34.375969,-79.126099,0.017124,0.015139,0.013004,0.011458,0.010306,0.008796,0.009061,...,0.036255,0.037244,0.036954,0.035701,0.031652,0.025922,0.021648,0.021036,0.020621,O3_data_24hr_20130601.csv
4,4,34.354759,-78.997620,0.016360,0.014824,0.013291,0.012041,0.010808,0.009477,0.009520,...,0.038020,0.037716,0.037024,0.035596,0.031735,0.025767,0.021051,0.020262,0.020072,O3_data_24hr_20130601.csv
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1345,25,34.779041,-78.893860,0.019743,0.017631,0.015352,0.013220,0.012163,0.011606,0.011257,...,0.032069,0.031717,0.030533,0.028818,0.025046,0.022393,0.021786,0.020670,0.019165,O3_data_24hr_20130715and0531.csv
1346,26,34.948822,-79.256104,0.020195,0.018776,0.016957,0.015111,0.013997,0.013171,0.012893,...,0.040240,0.039797,0.038877,0.036956,0.030772,0.026216,0.023580,0.021622,0.020826,O3_data_24hr_20130715and0531.csv
1347,27,34.927742,-79.126587,0.019436,0.017606,0.015608,0.013940,0.013041,0.012358,0.011753,...,0.037800,0.037359,0.036132,0.033486,0.027163,0.025732,0.023108,0.021228,0.020680,O3_data_24hr_20130715and0531.csv
1348,28,34.906509,-78.997131,0.019341,0.018066,0.016080,0.013907,0.012465,0.011278,0.009966,...,0.034863,0.034719,0.033892,0.031703,0.026435,0.023275,0.021589,0.020609,0.019856,O3_data_24hr_20130715and0531.csv


In [195]:
COcombine.colrename(subout="CO", subin="spec")
NOcombine.colrename(subout="NO", subin="spec")
O3combine.colrename(subout="O3", subin="spec")
NO2combine.colrename(subout="NO2", subin="spec")
SO2combine.colrename(subout="SO2", subin="spec")

O3combine.df_append

Unnamed: 0,x,Lat,Lon,spec_T0,spec_T1,spec_T2,spec_T3,spec_T4,spec_T5,spec_T6,...,spec_T15,spec_T16,spec_T17,spec_T18,spec_T19,spec_T20,spec_T21,spec_T22,spec_T23,source
0,0,34.311798,-79.408539,0.017219,0.015469,0.013620,0.011783,0.009890,0.008464,0.009457,...,0.036316,0.037223,0.036859,0.035112,0.032097,0.027198,0.022723,0.020858,0.020588,O3_data_24hr_20130601.csv
1,1,34.417950,-79.383331,0.017266,0.015324,0.013123,0.010585,0.008716,0.007182,0.008983,...,0.036405,0.037088,0.037238,0.035530,0.032291,0.027430,0.022796,0.020883,0.020501,O3_data_24hr_20130601.csv
2,2,34.397034,-79.254700,0.017226,0.015218,0.013085,0.011680,0.009872,0.008439,0.009371,...,0.036476,0.036946,0.036620,0.035013,0.031536,0.026322,0.021979,0.021050,0.020481,O3_data_24hr_20130601.csv
3,3,34.375969,-79.126099,0.017124,0.015139,0.013004,0.011458,0.010306,0.008796,0.009061,...,0.036255,0.037244,0.036954,0.035701,0.031652,0.025922,0.021648,0.021036,0.020621,O3_data_24hr_20130601.csv
4,4,34.354759,-78.997620,0.016360,0.014824,0.013291,0.012041,0.010808,0.009477,0.009520,...,0.038020,0.037716,0.037024,0.035596,0.031735,0.025767,0.021051,0.020262,0.020072,O3_data_24hr_20130601.csv
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1345,25,34.779041,-78.893860,0.019743,0.017631,0.015352,0.013220,0.012163,0.011606,0.011257,...,0.032069,0.031717,0.030533,0.028818,0.025046,0.022393,0.021786,0.020670,0.019165,O3_data_24hr_20130715and0531.csv
1346,26,34.948822,-79.256104,0.020195,0.018776,0.016957,0.015111,0.013997,0.013171,0.012893,...,0.040240,0.039797,0.038877,0.036956,0.030772,0.026216,0.023580,0.021622,0.020826,O3_data_24hr_20130715and0531.csv
1347,27,34.927742,-79.126587,0.019436,0.017606,0.015608,0.013940,0.013041,0.012358,0.011753,...,0.037800,0.037359,0.036132,0.033486,0.027163,0.025732,0.023108,0.021228,0.020680,O3_data_24hr_20130715and0531.csv
1348,28,34.906509,-78.997131,0.019341,0.018066,0.016080,0.013907,0.012465,0.011278,0.009966,...,0.034863,0.034719,0.033892,0.031703,0.026435,0.023275,0.021589,0.020609,0.019856,O3_data_24hr_20130715and0531.csv


In [196]:
COcombine.specsv()
NOcombine.specsv()
O3combine.specsv()
NO2combine.specsv()
SO2combine.specsv()

In [197]:
ALLcombine.allcombine()

ALLcombine.ALLdf_append

Unnamed: 0.1,Unnamed: 0,x,Lat,Lon,spec_T0,spec_T1,spec_T2,spec_T3,spec_T4,spec_T5,...,spec_T16,spec_T17,spec_T18,spec_T19,spec_T20,spec_T21,spec_T22,spec_T23,source,species
0,0,0,34.311798,-79.408539,0.088209,0.081118,0.073615,0.071216,0.071763,0.073557,...,0.082697,0.082952,0.082191,0.081597,0.081015,0.079645,0.077133,0.074063,CO_data_24hr_20130601.csv,CO_combined.csv
1,1,1,34.417950,-79.383331,0.086638,0.085448,0.081550,0.079113,0.080889,0.085779,...,0.084494,0.085838,0.085522,0.085358,0.084805,0.083148,0.080689,0.076966,CO_data_24hr_20130601.csv,CO_combined.csv
2,2,2,34.397034,-79.254700,0.071846,0.070907,0.069899,0.069123,0.068527,0.068304,...,0.083975,0.084393,0.083158,0.082167,0.080683,0.079592,0.077936,0.075034,CO_data_24hr_20130601.csv,CO_combined.csv
3,3,3,34.375969,-79.126099,0.075366,0.073556,0.071086,0.069397,0.069020,0.069467,...,0.086382,0.085569,0.084422,0.083480,0.081230,0.080317,0.078809,0.076369,CO_data_24hr_20130601.csv,CO_combined.csv
4,4,4,34.354759,-78.997620,0.079879,0.078802,0.077132,0.075806,0.075510,0.077173,...,0.086434,0.084834,0.084226,0.083452,0.081623,0.081043,0.080653,0.080620,CO_data_24hr_20130601.csv,CO_combined.csv
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6745,1345,25,34.779041,-78.893860,0.000082,0.000096,0.000100,0.000105,0.000110,0.000094,...,0.000049,0.000053,0.000053,0.000052,0.000036,0.000040,0.000042,0.000040,SO2_data_24hr_20130715and0531.csv,SO2_combined.csv
6746,1346,26,34.948822,-79.256104,0.000048,0.000063,0.000081,0.000101,0.000120,0.000115,...,0.000040,0.000044,0.000048,0.000057,0.000044,0.000043,0.000044,0.000051,SO2_data_24hr_20130715and0531.csv,SO2_combined.csv
6747,1347,27,34.927742,-79.126587,0.000044,0.000061,0.000079,0.000113,0.000141,0.000132,...,0.000043,0.000045,0.000047,0.000049,0.000038,0.000038,0.000039,0.000042,SO2_data_24hr_20130715and0531.csv,SO2_combined.csv
6748,1348,28,34.906509,-78.997131,0.000061,0.000089,0.000125,0.000171,0.000191,0.000171,...,0.000046,0.000048,0.000051,0.000058,0.000038,0.000039,0.000041,0.000041,SO2_data_24hr_20130715and0531.csv,SO2_combined.csv


In [198]:
ALLcombine.alltidy()

ALLcombine.ALLdf_append

Unnamed: 0,site,Lat,Lon,source,species,date,hour,concentration
0,0,34.311798,-79.408539,CO_data_24hr_20130601.csv,CO,20130601,0,0.088209
1,1,34.417950,-79.383331,CO_data_24hr_20130601.csv,CO,20130601,0,0.086638
2,2,34.397034,-79.254700,CO_data_24hr_20130601.csv,CO,20130601,0,0.071846
3,3,34.375969,-79.126099,CO_data_24hr_20130601.csv,CO,20130601,0,0.075366
4,4,34.354759,-78.997620,CO_data_24hr_20130601.csv,CO,20130601,0,0.079879
...,...,...,...,...,...,...,...,...
161995,25,34.779041,-78.893860,SO2_data_24hr_20130715and0531.csv,SO2,20130715and0531,23,0.000040
161996,26,34.948822,-79.256104,SO2_data_24hr_20130715and0531.csv,SO2,20130715and0531,23,0.000051
161997,27,34.927742,-79.126587,SO2_data_24hr_20130715and0531.csv,SO2,20130715and0531,23,0.000042
161998,28,34.906509,-78.997131,SO2_data_24hr_20130715and0531.csv,SO2,20130715and0531,23,0.000041


In [199]:
ALLcombine.allcsv()