In [11]:
import sys
from dataclasses import dataclass, field

import numpy as np
import pandas as pd
import rpy2.robjects as R

In [10]:
def read_file(input_path: str):
    
    # checking extensions
    filetype = file.split(".")[-1]
    if filetype == "xlsx" or filetype == "xls" or filetype == "ods":
        df = pd.read_excel(input_path, header=None)
    elif filetype == "csv":
        df = pd.read_csv(input_path, sep=",", header=None)
    elif filetype == "tsv":
        df = pd.read_csv(input_path, sep="\t", header=None)
    else:
        print("Filetype not supported.")
        sys.exit()
    
    # trimming the array
    array = np.asarray(df)
    trimmed = array[-8:, -12:]
    
    # array to list
    ordered = []
    for row in trimmed:
        for val in row:
            ordered.append(val)
    
    return ordered


def make_col(file: str, var_name: str, var_type: str, unit: str=""):
    
    # checking for NOIR type
    if var_type not in ["N", "O", "I", "R"]:
        print("Incorrect variable type.")
        sys.exit()
        
    # checking for unit
    if var_type in ["I", "R"] and unit != "":
        header = f"{var_name} [{unit}]"
    else:
        header = var_name
    
    # creating the final list
    t = [header, f"var_type:{var_type}"]
    ordered = read_file(file)
    defined = t + ordered
    
    return defined

def make_table(*col: list, output_path: str):
    
    columns = {}
    for c in col:
        columns[c[0]] = c[1:]
    table = pd.DataFrame(columns)
    table.to_csv(output_path, index=False)
                

In [13]:
file = "data/2021.12.13.xlsx"
template = "data/template1.ods"

data = make_col(
    file,
    var_name="absorbance",
    var_type="I"
)

temp1 = make_col(
    template,
    var_name="column",
    var_type="N"
)

make_table(
    temp1,
    data,
    output_path="out/table1.csv"
)

In [2]:
def read_data(input_path: str):
    
    # checking extensions
    filetype = input_path.split(".")[-1]
    if filetype == "xlsx" or filetype == "xls" or filetype == "ods":
        df = pd.read_excel(input_path, header=None)
    elif filetype == "csv":
        df = pd.read_csv(input_path, sep=",", header=None)
    elif filetype == "tsv":
        df = pd.read_csv(input_path, sep="\t", header=None)
    else:
        print("Filetype not supported.")
        sys.exit()

    # trimming the array
    array = np.asarray(df)
    trimmed = array[-8:, -12:]

    # array to list
    col = []
    for row in trimmed:
        for val in row:
            col.append(val)

    return col

@dataclass
class Variable:
    input_path: str = field(repr=False)
    var_name: str = field(repr=False)
    var_type: str = field(repr=False)
    unit: str = field(repr=False, default="")
    header: str = field(init=False)
    data: list = field(init=False)
    
    def __post_init__(self):
        # checking for NOIR type
        if self.var_type not in ["N", "O", "I", "R"]:
            print("Incorrect variable type. Choose one of these: N, O, I, R.")
            sys.exit()

        # checking for unit
        if self.var_type in ["I", "R"] and self.unit != "":
            self.header = f"{self.var_name} [{self.unit}]"
        else:
            self.header = self.var_name
            
        self.data = read_data(self.input_path)

In [3]:
var1 = Variable(
    input_path="data/2021.12.13.xlsx",
    var_name="absorbance",
    var_type="R",
    unit="RU"
)

In [4]:
var1

Variable(header='absorbance [RU]', data=[0.786, 0.7, 0.576, 0.462, 0.392, 0.311, 0.836, 0.738, 0.629, 0.511, 0.312, 0.32, 0.733, 0.697, 0.588, 0.52, 0.36, 0.302, 0.805, 0.696, 0.621, 0.551, 0.298, 0.295, 0.422, 0.388, 0.388, 0.37, 0.431, 0.402, 0.395, 0.389, 0.6, 0.466, 0.534, 0.513, 0.455, 0.361, 0.379, 0.367, 0.452, 0.39, 0.388, 0.386, 0.582, 0.531, 0.54, 0.48, 0.422, 0.412, 0.468, 0.396, 0.746, 0.595, 0.683, 0.59, 0.676, 0.637, 0.644, 0.575, 0.353, 0.383, 0.415, 0.359, 0.7, 0.537, 0.648, 0.484, 0.622, 0.558, 0.545, 0.496, 0.849, 0.717, 0.887, 0.772, 0.762, 0.723, 0.786, 0.769, 0.296, 0.288, 0.28, 0.286, 0.803, 0.519, 0.771, 0.685, 0.718, 0.694, 0.755, 0.731, 0.239, 0.226, 0.227, 0.218])

In [5]:
var2 = Variable(
    input_path="data/template1.ods",
    var_name="column",
    var_type="N",
    unit=""
)
var2

Variable(header='column', data=['kolumna1', 'kolumna2', 'kolumna3', 'kolumna4', 'kolumna5', 'kolumna6', 'kolumna7', 'kolumna8', 'kolumna9', 'kolumna10', 'kolumna11', 'kolumna12', 'kolumna1', 'kolumna2', 'kolumna3', 'kolumna4', 'kolumna5', 'kolumna6', 'kolumna7', 'kolumna8', 'kolumna9', 'kolumna10', 'kolumna11', 'kolumna12', 'kolumna1', 'kolumna2', 'kolumna3', 'kolumna4', 'kolumna5', 'kolumna6', 'kolumna7', 'kolumna8', 'kolumna9', 'kolumna10', 'kolumna11', 'kolumna12', 'kolumna1', 'kolumna2', 'kolumna3', 'kolumna4', 'kolumna5', 'kolumna6', 'kolumna7', 'kolumna8', 'kolumna9', 'kolumna10', 'kolumna11', 'kolumna12', 'kolumna1', 'kolumna2', 'kolumna3', 'kolumna4', 'kolumna5', 'kolumna6', 'kolumna7', 'kolumna8', 'kolumna9', 'kolumna10', 'kolumna11', 'kolumna12', 'kolumna1', 'kolumna2', 'kolumna3', 'kolumna4', 'kolumna5', 'kolumna6', 'kolumna7', 'kolumna8', 'kolumna9', 'kolumna10', 'kolumna11', 'kolumna12', 'kolumna1', 'kolumna2', 'kolumna3', 'kolumna4', 'kolumna5', 'kolumna6', 'kolumna7', 'k

In [15]:
dataframe = {
        var1.header: R.FloatVector(var1.data),
        var2.header: R.StrVector(var2.data),
}
dataframe

df = R.DataFrame(dataframe)

In [21]:
print(df.colnames)

[1] "absorbance [RU]" "column"         



In [37]:
def make_df(*var: object):
    df = {}
    for v in var:
        if v.var_type == "N":
            df[v.header] = R.StrVector(v.data)
        elif v.var_type == "O" and type(v.data[0]) == str:
            df[v.header] = R.StrVector(v.data)
        else:
            df[v.header] = R.FloatVector(v.data)
    dataframe = R.DataFrame(df)
    return dataframe

In [44]:
df1 = make_df(var1, var2)
print(df1)

   absorbance [RU]    column
1            0.786  kolumna1
2            0.700  kolumna2
3            0.576  kolumna3
4            0.462  kolumna4
5            0.392  kolumna5
6            0.311  kolumna6
7            0.836  kolumna7
8            0.738  kolumna8
9            0.629  kolumna9
10           0.511 kolumna10
11           0.312 kolumna11
12           0.320 kolumna12
13           0.733  kolumna1
14           0.697  kolumna2
15           0.588  kolumna3
16           0.520  kolumna4
17           0.360  kolumna5
18           0.302  kolumna6
19           0.805  kolumna7
20           0.696  kolumna8
21           0.621  kolumna9
22           0.551 kolumna10
23           0.298 kolumna11
24           0.295 kolumna12
25           0.422  kolumna1
26           0.388  kolumna2
27           0.388  kolumna3
28           0.370  kolumna4
29           0.431  kolumna5
30           0.402  kolumna6
31           0.395  kolumna7
32           0.389  kolumna8
33           0.600  kolumna9
34           0

<module 'rpy2.robjects.vectors' from '/home/f1lem0n/.config/anaconda3/envs/mda/lib/python3.10/site-packages/rpy2/robjects/vectors.py'>