# C2-C6

In [20]:
import pandas as pd
import os
import pathlib

try:
    FACTEURS = pd.read_csv("../facteurs_de_conversions_C2-C20.csv", sep=";")
except FileNotFoundError:
    print("facteurs_de_conversions_C2-C20.csv not found.")

def apply_conversion(
        data: pd.DataFrame,
) -> pd.DataFrame:
    print(data)
    cols = data.drop(['Volume'], axis=1).columns
    for head in cols:
        poll = FACTEURS[FACTEURS['Composé'] == head]
        x_facteur = poll["Facteur de conversion"].values[0]
        data[head] = data[head].apply(
            lambda x:
                x*x_facteur
            )
    return (data)

def fill_quarts(
        data: pd.DataFrame,
        type_appareil: str,
) -> pd.DataFrame:
    if type_appareil == 'C2-C6':

        shift_data = data.shift(freq='15min')
        data = pd.concat([data, shift_data], axis=0)

    if type_appareil == 'C6-C20':
        shift_data = data.shift(freq='15min')
        data = pd.concat([data, shift_data], axis=0)
        for i in range(2):
            
            shift_data = shift_data.shift(freq='15min')

            data = pd.concat([data, shift_data], axis=0)

    return (data.sort_index())


month = '02'
year = '2022'
type_appareil = [
    'C2-C6',
    'C6-C20',
]

file_directories = [f for f in pathlib.Path().glob(f'../test_data/{year}/{month}/*{type_appareil[0]}.Asc')]

C2C6_data = pd.DataFrame()
for file in file_directories:
    asc_data = pd.read_table(file)
    asc_data['Sampling date'] = pd.to_datetime(asc_data['Sampling date']).dt.round('15min')
    asc_data.set_index('Sampling date', inplace=True)

    asc_data = asc_data.drop(
    list(asc_data.filter(regex='Unnamed').columns),
    axis=1,
    )
    print(asc_data)
    asc_data = apply_conversion(asc_data)

    C2C6_data = pd.concat([C2C6_data, asc_data])

C2C6_data.sort_index(inplace=True)

C2C6_data = fill_quarts(
    data=C2C6_data,
    type_appareil=type_appareil[0]
    )
C2C6_data = C2C6_data[~C2C6_data.index.duplicated(keep='first')]

                     Volume  ETHANE  ETHYLENE  PROPANE  PROPENE  I-BUTANE  \
Sampling date                                                               
2022-01-31 23:30:00  100.88    1.78      0.98     0.96     0.04      0.10   
2022-02-01 00:00:00  100.91    1.77      0.91     0.39     0.00      0.11   
2022-02-01 00:30:00  100.95    1.80      0.89     0.40     0.00      0.10   
2022-02-01 01:00:00  101.00    1.85      0.98     0.28     0.00      0.12   
2022-02-01 01:30:00  101.09    1.72      0.86     0.34     0.00      0.08   
...                     ...     ...       ...      ...      ...       ...   
2022-02-28 21:00:00  101.72    3.30      8.98     2.29     0.54      0.32   
2022-02-28 21:30:00  101.72    2.79      8.42     1.20     0.52      0.20   
2022-02-28 22:00:00  101.65    2.87      8.44     1.20     0.32      0.24   
2022-02-28 22:30:00  101.65    2.79      8.54     1.19     0.46      0.19   
2022-02-28 23:00:00  101.61    4.17      9.68     2.40     0.63      0.48   

TypeError: can't multiply sequence by non-int of type 'float'

In [7]:
C2C6_data["2022-02-01 19":"2022-02-02"]["BENZENE"]

Sampling date
2022-02-01 19:00:00    0.03
2022-02-01 19:15:00    0.00
2022-02-01 19:30:00    0.00
2022-02-01 19:45:00    0.00
2022-02-01 20:00:00    0.00
                       ... 
2022-02-02 22:45:00    0.70
2022-02-02 23:00:00    0.68
2022-02-02 23:15:00    0.68
2022-02-02 23:30:00    0.63
2022-02-02 23:45:00    0.63
Name: BENZENE, Length: 115, dtype: float64

# C6-C20

In [15]:
import pandas as pd
import os
import pathlib

type_appareil = [
    'C2-C6',
    'C6-C20',
]

file_directories = [f for f in pathlib.Path().glob(f'../test_data/{year}/{month}/*{type_appareil[1]}.Asc')]

C6C20_data = pd.DataFrame()
for file in file_directories:
    asc_data = pd.read_table(file)
    asc_data['Sampling date'] = pd.to_datetime(asc_data['Sampling date']).dt.round('30min')
    asc_data.set_index('Sampling date', inplace=True)
    if "CAL60" in str(file):
        asc_data = asc_data.shift(periods=-1, freq='15min')
    C6C20_data = pd.concat([C6C20_data, asc_data])

C6C20_data.sort_index(inplace=True)

C6C20_data = fill_quarts(
    data=C6C20_data,
    type_appareil=type_appareil[1]
    )
C6C20_data = C6C20_data[~C6C20_data.index.duplicated(keep='first')]

In [16]:
import datetime as dt
import calendar

def filter_month(
        data: pd.DataFrame,
        year: int,
        month: int,
        day: int = 1,
):
    start_date = dt.datetime(year, month, day)
    day_before_start_date = start_date - dt.timedelta(days=1)

    last_day_of_month = calendar.monthrange(year, month)[1]
    end_date = dt.datetime(
        year,
        month,
        last_day_of_month
        )
    data = data[day_before_start_date:end_date]
    return (data)

C2C6_data = filter_month(data=C2C6_data, year=2022, month=2)
C6C20_data = filter_month(data=C6C20_data, year=2022, month=2)


In [17]:

xair_data = pd.concat([C2C6_data, C6C20_data], axis=1).sort_index()
xair_data = filter_month(
    data=xair_data,
    year=2022,
    month=2
    )
xair_data[xair_data.index.duplicated(keep='first')]

Unnamed: 0_level_0,Volume,ETHANE,ETHYLENE,PROPANE,PROPENE,I-BUTANE,N-BUTANE,ACETYLENE,TRANS-2-BUTENE,1-BUTENE,...,N-HEXADECANE,N-HEPTADECANE,PHENANTHRENE,N-OCTADECANE,ANTHRACENE,N-NONADECANE,N-EICOSANE,FLUORANTHENE,PYRENE,Unnamed: 63
Sampling date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


In [18]:
xair_data["2022-02-01 19":"2022-02-02"]["Volume"]

Unnamed: 0_level_0,Volume,Volume
Sampling date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-02-01 19:00:00,101.62,1155.7
2022-02-01 19:15:00,105.40,400.9
2022-02-01 19:30:00,105.40,400.9
2022-02-01 19:45:00,105.12,400.9
2022-02-01 20:00:00,105.12,400.9
...,...,...
2022-02-02 22:45:00,101.07,1153.6
2022-02-02 23:00:00,101.19,1153.6
2022-02-02 23:15:00,101.19,1153.6
2022-02-02 23:30:00,101.15,1154.0


In [104]:
def get_cols(
        d1: pd.DataFrame,
        d2: pd.DataFrame,
) -> list:
    col1 = d1.columns.to_list()
    col2 = d2.columns.to_list()

    header = list(dict.fromkeys(col1 + col2))

    return(header)

header = get_cols(C2C6_data, C6C20_data)
xair_data = pd.DataFrame(columns=header)

xair_data = pd.concat([xair_data, C2C6_data]).sort_index()
xair_data = pd.concat([xair_data, C6C20_data]).sort_index()

xair_data = filter_month(
    data=xair_data,
    year=2022,
    month=2
    )
# xair_data.to_csv('../output/test.csv')

  xair_data = pd.concat([xair_data, C2C6_data]).sort_index()
  xair_data = pd.concat([xair_data, C2C6_data]).sort_index()
  xair_data = pd.concat([xair_data, C6C20_data]).sort_index()


In [105]:
xair_data

Unnamed: 0,Volume,ETHANE,ETHYLENE,PROPANE,PROPENE,I-BUTANE,N-BUTANE,ACETYLENE,TRANS-2-BUTENE,1-BUTENE,...,N-HEXADECANE,N-HEPTADECANE,PHENANTHRENE,N-OCTADECANE,ANTHRACENE,N-NONADECANE,N-EICOSANE,FLUORANTHENE,PYRENE,Unnamed: 63
2022-01-31 00:15:00,1155.20,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2022-01-31 00:30:00,1155.20,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2022-01-31 00:45:00,1155.20,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2022-01-31 01:00:00,1155.20,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2022-01-31 23:30:00,100.88,1.78,0.98,0.96,0.04,0.10,0.32,0.0,0.03,0.00,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-27 23:30:00,102.40,5.21,9.45,4.50,0.54,0.83,2.47,0.0,0.21,0.27,...,,,,,,,,,,
2022-02-27 23:45:00,102.40,5.21,9.45,4.50,0.54,0.83,2.47,0.0,0.21,0.27,...,,,,,,,,,,
2022-02-27 23:45:00,1167.50,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2022-02-28 00:00:00,102.36,4.74,9.21,2.70,0.51,0.93,2.62,0.0,0.19,0.12,...,,,,,,,,,,


In [96]:
c1 = C2C6_data.columns.to_list()
c2 = C6C20_data.columns.to_list()

header = list(dict.fromkeys(c1 + c2))
print(header)


['Volume', 'ETHANE', 'ETHYLENE', 'PROPANE', 'PROPENE', 'I-BUTANE', 'N-BUTANE', 'ACETYLENE', 'TRANS-2-BUTENE', '1-BUTENE', 'VINYLCHLORIDE', 'CIS-2-BUTENE', 'CYCLOPENTANE', 'I-PENTANE', 'N-PENTANE', '1-3-BUTADIENE', 'TRANS-2-PENTENE', '1-PENTENE', 'CIS-2-PENTENE', '2-2-DIME-BUTANE', 'ME-CYCLOPENTANE', 'CYCLOHEXANE', '2-ME-PENTANE', '3-ME-PENTANE', 'N-HEXANE', 'ISOPRENE', '2-ME-1-PENTENE', '2-4-DIME-PENTANE', '2-3-DIME-PENTANE', '2-ME-HEXANE', 'BENZENE', 'Unnamed: 32', 'Unnamed: 5', 'NAPHTHALENE', 'Unnamed: 4', '1-HEXENE', '2-3-DIMEC5+2MEC6', '3-ME-HEXANE', '224-TME-PENTANE', 'N-HEPTANE', 'ME-CYCLOHEXANE', '234-TME-PENTANE', 'TOLUENE', '2-ME-HEPTANE', '3-ME-HEPTANE', 'N-OCTANE', 'ETHYLBENZENE', 'M&P-XYLENES', 'STYRENE', 'O-XYLENE', 'N-NONANE', 'I-PROPYLBENZENE', 'A-PINENE', 'N-PROPYLBENZENE', 'M-ETHYLTOLUENE', 'P-ETHYLTOLUENE', '135-TMB', 'O-ETHYLTOLUENE', 'B-PINENE', '124-TMB', '1-3-DICL-BENZENE', 'N-DECANE', '1-4-DICL-BENZENE', '3-CARENE', '123-TMB', 'LIMONENE', 'M-DIETHYLBENZENE', 'P-D