# Plan

* [x] Learn Git stuff
* [x] Create local Git repository at EPOM and repo at GitHub
* [ ] Refresh memory about Python
* [ ] Learn MATLAB language at enough level to be able convert the code
* [ ] Convert `RegGrid3D.m` to `reg_grid_3d.py`
* [ ] Convert `importfileXYZ.m` to `import_xyz.py`
* [ ] Test
* [ ] Re-Implement stuff
* [ ] Test
* [ ] Test
* [ ] Test

**Other**
* [ ] Python paths handling
* [ ] Python linked list concept
* [ ] Something

## Searching path to file with extension 

In [1]:
# Define a function that searches for files with defined extension 
# and returns a list of full paths for these files
import os

def find_file_with_extension(extension, path=os.getcwd()):
        # Check the top path existence
    if os.path.exists(path):
        print(('Searching *%s files in directory:' + path + '\n') % extension)
    else:  # Raise a meaningful error
        raise RuntimeError('Path either not exists or not correct ' + path)
    
    
    paths = []
    count = 0
    for dirpath, dirnames, filenames in os.walk(path):
        for filename in filenames:
            fextension = os.path.splitext(filename)[1]
            if fextension == extension:
                paths.append(os.path.abspath(os.path.join(dirpath, filename)))
                print("%d. %s" % (count, filename))
                count += 1
    print("\n")
    return paths

# Test
test_paths = find_file_with_extension('.m')
print(test_paths)

Searching *.m files in directory:/home/jupyter-idudkov/IDudkov_Internship/dynamic_surface_class

0. RegGrid3D.m
1. Testgrid.m
2. importfileXYZ.m


['/home/jupyter-idudkov/IDudkov_Internship/dynamic_surface_class/f02_matlab_code/RegGrid3D.m', '/home/jupyter-idudkov/IDudkov_Internship/dynamic_surface_class/f02_matlab_code/Testgrid.m', '/home/jupyter-idudkov/IDudkov_Internship/dynamic_surface_class/f02_matlab_code/importfileXYZ.m']


## XYZ file reading

In [4]:
import os.path
import numpy as np

# Define a function that read XYZ files 
# and returns a numpy array

# Realization 1. Probably well optimized but it crashes EPOM kernel
def read_xyz_file_r1(xyz_filepath, startRow=0, endRow=None, 
                  header_length=0, separator=' '):
            # Check the file existence
    if os.path.exists(xyz_filepath):
        print(('Reading file: %s\n') % xyz_filepath)
    else:   # Raise a meaningful error
        raise RuntimeError('File is not exist or path is not correct')
    if endRow != None:
        xyz_array = np.genfromtxt(xyz_filepath, delimiter=separator, skip_header=header_length,
                                 missing_values=None, max_rows=(endRow-startRow))
    else:
        xyz_array = np.genfromtxt(xyz_filepath, delimiter=separator, skip_header=header_length,
                         missing_values=None, max_rows=endRow)
    return xyz_array
        
    
    # Realization 2. Probably, the best realization. 
    # It takes the least amount of memory and works well, but doesn't have some important features 
    # It doesn't crashing EPOM kernel
def read_xyz_file_r2(xyz_filepath, header_length=0, separator=' '):
            # Check the file existence
    if os.path.exists(xyz_filepath):
        print(('Reading file: %s\n') % xyz_filepath)
    else:   # Raise a meaningful error
        raise RuntimeError('File is not exist or path is not correct')
        
    header = list()
    x_list = list()
    y_list = list()
    z_list = list()
    
    count = 0
    
    with open(xyz_filepath, 'r') as xyz_file:
        for index, line in enumerate(xyz_file):
            if index in range(header_length):
                x, y, z = line.split(separator)
                header.append(x)
                header.append(y)
                header.append(z)
                
                count += 1
            else:
                x, y, z = line.split(separator)

                x_list.append(round(float(x), 3))
                y_list.append(round(float(y), 3))
                z_list.append(round(float(z), 3))
                count += 1
    print("Number of Rows: %d" % count)
    xyz_array = np.array([x_list, y_list, z_list])
    return xyz_array



    # Realization 3. The easiest and the most expensive one
def read_xyz_file_r3(xyz_filepath, startRow=0, endRow=None, 
                  header_length=0, separator=' '):
            # Check the file existence
    if os.path.exists(xyz_filepath):
        print(('Reading file: %s\n') % xyz_filepath)
    else:   # Raise a meaningful error
        raise RuntimeError('File is not exist or path is not correct')        
        # Header reading
        for header_line in xyz_file[startRow: startRow+header_length]:
            header.append(header_line.split(fseparator))
        print(xyz_file)
        # Data reading
        for xyz_line in xyz_file:
            xyz = xyz_line.split(fseparator)
            x.append(round(float(xyz[0]), 3))
            y.append(round(float(xyz[1]), 3))
            z.append(round(float(xyz[2]), 3))
            
    # Open, read and close the file
    xyz_file = open(xyz_filepath, 'r')
    xyz_file_content = xyz_file.read()
    xyz_file.close
    
    xyz_lines = xyz_file_content.splitlines()
    count = 1
    
    header = list()
    x = list()
    y = list()
    z = list()
    
    # Header reading
    for header_line in xyz_lines[startRow: startRow+header_length]:
        header.append(header_line.split(fseparator))
        count += 1
    # Data reading
    for xyz_line in xyz_lines[startRow+header_length: endRow]:
        xyz = xyz_line.split(fseparator)
        x.append(round(float(xyz[0]), 3))
        y.append(round(float(xyz[1]), 3))
        z.append(round(float(xyz[2]), 3))
        count += 1
    print("%d rows were read" % count)
    xyz_array = np.array([x, y, z])
    
    return header, xyz_array

In [3]:
# Test
xyz_paths = find_file_with_extension('.xyz')
xyz_array = read_xyz_file_r2(xyz_paths[0], separator=',',header_length=1)
print(xyz_array)
print(np.shape(xyz_array))

Searching *.xyz files in directory:/home/jupyter-idudkov/IDudkov_Internship/dynamic_surface_class

0. SH2017_MainSurveyLines_1m_CUBE_NOAA.xyz
1. depth.xyz


Reading file: /home/jupyter-idudkov/IDudkov_Internship/dynamic_surface_class/f01_data/SH2017_MainSurveyLines_1m_CUBE_NOAA.xyz

Number of Rows: 10263471
[[ 3.6760350e+05  3.6760450e+05  3.6760550e+05 ...  3.6924750e+05
   3.6924850e+05  3.6923750e+05]
 [ 4.7691565e+06  4.7691565e+06  4.7691565e+06 ...  4.7753925e+06
   4.7753925e+06  4.7753935e+06]
 [-2.8000000e+01 -2.8050000e+01 -2.8150000e+01 ... -2.3990000e+01
  -2.4050000e+01 -2.4030000e+01]]
(3, 10263470)
