In [None]:
#default_exp datasets

In [None]:
#exporti
import os
import torch
import numpy as np
import pandas as pd
import warnings
from tqdm import tqdm

from dl4to.problem import Problem
from dl4to.solution import Solution

In [None]:
#hide
from nbdev.showdoc import show_doc

# CSV converter

In [None]:
#export
class CSVConverter():
    """
    The purpose of the CSVConverter class is to convert csv files into TO problems. For each problem there should be two csv files:
    one that contains all voxel-wise information (forces, design space, Dirichlet conditions and ground truth densities) and one "_info" file that contains all scalar information (Young's modulus E, Poisson's ratio ν, yield stress σ_ys and voxel size h).
    The names of the files should start with 0 and increase, so the files for the first sample are "0.csv" and "0_info.csv", the files for the second sample are "1.csv" and "1_info.csv" and so one. For more information on the exact formating we refer to
    the SELTO datasets [1].
    """
    def __init__(
        self,
        csv_dir_path:str, # The path to the directory that contains files named as "i.pt" and "i_info.pt" where i is an integer, starting at i=0 for the first sample.
        dtype:torch.dtype=torch.float32, # The datatype into which the values from the csv files are converted.
        verbose:bool=True, # Whether to give the user feedback on the progress.
        pde_solver:"pd4to.pde.PDESolver"=None, # The pde solver that is used to solve the PDE for linear elasticity. Only has an effect if either `solve_pde_for_trivial_solution=True` or `solve_pde_for_gt_solution=True`.
        solve_pde_for_trivial_solution:bool=False, # Whether to solve the PDE for each trivial solution and save the displacements in the solution object. These can later be accessed via `problem.trivial_solution.u`. This is useful if PDE preprocessing is used. Requires a pde solver.
        solve_pde_for_gt_solution:bool=False # Whether to solve the PDE for each ground truth and save the displacements in the solution object. These can later be accessed via `gt_solution.u`. Requires a pde solver.
    ):
        self._csv_dir_path = csv_dir_path
        self._dtype = dtype
        self.verbose = verbose
        self.solve_pde_for_trivial_solution = solve_pde_for_trivial_solution
        self.solve_pde_for_gt_solution = solve_pde_for_gt_solution
        self.pde_solver = pde_solver
        self.column_names = [
            'x', 'y', 'z',
            'design_space',
            'dirichlet_x', 'dirichlet_y', 'dirichlet_z',
            'force_x', 'force_y', 'force_z', 'density'
        ]


    @property
    def csv_dir_path(self):
        return self._csv_dir_path


    @property
    def dtype(self):
        return self._dtype


    @property
    def size(self):
        return self._size


    def __len__(self):
        """
        Returns the number of file pairs `(i.csv, i_info.csv)` that are contained in `self.csv_dir_path`.
        """
        csv_files = os.listdir(self.csv_dir_path)
        n_csv_files = 0

        for csv_file in csv_files:
            if not 'info' in csv_file:
                n_csv_files += 1
        return n_csv_files


    def __getitem__(self,
                    idx:int # The index for which `(problem, gt_solution)` should be returned.
                   ):
        """
        Returns the tuple `(problem, gt_solution)` for index `idx`.
        """
        data = self._get_data_array(idx)
        shape, voxels = self._get_shape_and_voxels(data)
        E, ν, σ_ys, h = self._get_data_info(shape, idx)
        F, Ω_dirichlet, Ω_design = self._get_forces_boundary_conditions_and_design_space(data, shape, voxels)
        θ = self._get_θ(data, shape, voxels)

        problem = Problem(
            E=E, ν=ν, σ_ys=σ_ys, h=h,
            Ω_dirichlet=Ω_dirichlet, 
            Ω_design=Ω_design, 
            F=F, 
            pde_solver=self.pde_solver, 
            name=f"problem_{idx}",
            dtype=self.dtype)

        if self.solve_pde_for_trivial_solution:
            _ = problem.trivial_solution.solve_pde()

        gt_solution = Solution(
            problem=problem,
            θ=θ
        )

        if self.solve_pde_for_gt_solution:
            _ = gt_solution.solve_pde(binary=True)

        return problem, gt_solution


    def _get_data_info(self, shape, i):
        file_path = f'{self.csv_dir_path}/{i}_info.csv'
        data_info_column_names = ['E', 'ν', 'σ_ys', 'h']
        data_info = pd.read_csv(file_path,  names=data_info_column_names)
        E = data_info['E'].item()
        ν = data_info['ν'].item()
        σ_ys = data_info['σ_ys'].item()
        h = data_info['h'].item()
        return E, ν, σ_ys, [h, h, h]


    def _get_data_array(self, i):
        data = pd.read_csv(f'{self.csv_dir_path}/{i}.csv', names=self.column_names)
        return data


    def _get_shape_and_voxels(self, data):
        shape = data[['x', 'y', 'z']].iloc[-1].values.astype(int) + 1
        vox_x = data['x'].values
        vox_y = data['y'].values
        vox_z = data['z'].values
        voxels = [vox_x, vox_y, vox_z]
        return shape, voxels


    def _get_forces_boundary_conditions_and_design_space(self, data, shape, voxels):
        F = torch.zeros(3, *shape, dtype=self.dtype)
        F[0, voxels[0], voxels[1], voxels[2]] = torch.tensor(data['force_x'].values, dtype=self.dtype)
        F[1, voxels[0], voxels[1], voxels[2]] = torch.tensor(data['force_y'].values, dtype=self.dtype)
        F[2, voxels[0], voxels[1], voxels[2]] = torch.tensor(data['force_z'].values, dtype=self.dtype)

        Ω_dirichlet = torch.zeros(3, *shape, dtype=self.dtype)
        Ω_dirichlet[0, voxels[0], voxels[1], voxels[2]] = torch.tensor(data['dirichlet_x'].values, dtype=self.dtype)
        Ω_dirichlet[1, voxels[0], voxels[1], voxels[2]] = torch.tensor(data['dirichlet_y'].values, dtype=self.dtype)
        Ω_dirichlet[2, voxels[0], voxels[1], voxels[2]] = torch.tensor(data['dirichlet_z'].values, dtype=self.dtype)

        Ω_design = torch.zeros(1, *shape, dtype=int)
        Ω_design[:, voxels[0], voxels[1], voxels[2]] = torch.from_numpy(data['design_space'].values.astype(int))
        return F, Ω_dirichlet, Ω_design


    def _get_θ(self, data, shape, voxels):
        θ = torch.zeros(1, *shape, dtype=self.dtype)
        θ[:, voxels[0], voxels[1], voxels[2]] = torch.tensor(data['density'].values, dtype=self.dtype)
        return θ


    def __call__(self, 
                 pt_dir_path:str=None # The path where the `.pt` files should be saved.
                ):
        """
        Converts all `(i.csv, i_info.csv)` pairs in `csv_dir_path` and saves them as `.pt` files in the directory `pt_dir_path`.
        """
        if pt_dir_path is None:
            pass
        else:
            if not os.path.exists(pt_dir_path):
                os.makedirs(pt_dir_path)

        iters = range(len(self))
        if self.verbose:
            print("Generating dataset...")
            iters = tqdm(iters)

        for i in iters:
            try:
                sample = self[i]
                if pt_dir_path is not None:
                    torch.save(sample, f"{pt_dir_path}/{i}.pt")
            except FileNotFoundError:
                if self.verbose:
                    print(f"Could not generate file {i}.")

        if self.verbose:
            print("done!")
            if pt_dir_path is not None:
                print(f"pt dataset generated in directory `{pt_dir_path}`.")

# Methods

In [None]:
show_doc(CSVConverter.__len__)

<h4 id="CSVConverter.__len__" class="doc_header"><code>CSVConverter.__len__</code><a href="__main__.py#L47" class="source_link" style="float:right">[source]</a></h4>

> <code>CSVConverter.__len__</code>()

Returns the number of file pairs `(i.csv, i_info.csv)` that are contained in `self.csv_dir_path`.

In [None]:
show_doc(CSVConverter.__getitem__)

<h4 id="CSVConverter.__getitem__" class="doc_header"><code>CSVConverter.__getitem__</code><a href="__main__.py#L60" class="source_link" style="float:right">[source]</a></h4>

> <code>CSVConverter.__getitem__</code>(**`i`**:`int`)

Returns the tuple `(problem, gt_solution)` for index `i`.

||Type|Default|Details|
|---|---|---|---|
|**`i`**|`int`||The index for which `(problem, gt_solution)` should be returned.|


In [None]:
show_doc(CSVConverter.__call__)

<h4 id="CSVConverter.__call__" class="doc_header"><code>CSVConverter.__call__</code><a href="__main__.py#L142" class="source_link" style="float:right">[source]</a></h4>

> <code>CSVConverter.__call__</code>(**`pt_dir_path`**:`str`=*`None`*)

Converts all `(i.csv, i_info.csv)` pairs in `csv_dir_path` and saves them as `.pt` files in the directory `pt_dir_path`.

||Type|Default|Details|
|---|---|---|---|
|**`pt_dir_path`**|`str`|`None`|The path where the `.pt` files should be saved.|


# References

[1] Dittmer, Sören, Erzmann, David, Harms, Henrik, Falck, Rielson, & Gosch, Marco. (2023). SELTO Dataset [Data set]. Zenodo. https://doi.org/10.5281/zenodo.7781392