# 读取角膜地形图

从数据文件中读取所需要的数据, 并转换成Pandas DataFrame的形式. 

Pandas DataFrame支持多种索引方式, 并且能够方便转换成numpy array进行运算. 

## 必要的函数库

* pandas: 常用数据读写处理的工具包, 如果未安装应考虑使用anaconda安装


In [6]:
import pandas as pd
# import numpy as np
from pandas import DataFrame, Series
import re
import os

# 构造工具


## 构造dlmread
仿照MatLab里面的dlmread
```matlab
M = dlmread(filename,delimiter,[R1 C1 R2 C2])
```
注意其中行列数字按照excel表格中的形式写, 首行=1, 首列=1. 否则一个大的表格数起来太麻烦了. 

In [78]:
def dlmread(filename,delimiter,R1,C1,R2,C2,header=None):
    s=range(R1-1)
    n=R2-R1+1
    cols=range(C1-1,C2)
    data=pd.read_csv(filename,
                     sep=delimiter,
                     skiprows=s,
                     nrows=n,
                     header=header,
                     usecols=cols
                    )
    return data

In [97]:
# 测试用, 测试开关使用and True: 
if __name__=="__main__" and True:
    fpath=os.path.join('..','testdata')
    fname='standard.csv'
    filename=os.path.join(fpath,fname)
    standard_data=pd.read_csv(filename,header=None,sep=';')
    data=dlmread(filename,';',1,2,4,2,header=None)
    print("原始表格")
    print(standard_data)
    print("部分读取")
    print(data)

    fname='pentacam.csv'
    filename=os.path.join(fpath,fname)
    data=dlmread(filename,';',313,1,317,2,header=None)
    print(data)

原始表格
    0   1   2   3   4
0  A1  B1  C1  D1 NaN
1  A2  B2  C2  D2 NaN
2  A3  B3  C3  D3 NaN
3  A4  B4  C4  D4 NaN
4  A5  B5  C5  D5 NaN
部分读取
    1
0  B1
1  B2
2  B3
3  B4
                    0      1
0     Cornea Front Rh   7.81
1     Cornea Front Rv   7.72
2   Cornea Front Axis  13.70
3  Cornea Front Astig  -0.50
4    Cornea Front Exz  -0.41


## 翻译单元格位置
比如给定A1, 应返回R=1,C=1

In [103]:
def col_to_num(col_str):
    """ Convert base26 column string to number. """
    expn = 0
    col_num = 0
    for char in reversed(col_str):
        col_num += (ord(char) - ord('A') + 1) * (26 ** expn)
        expn += 1

    return col_num
def cell2num(cellname):
    col_letter="".join(re.findall('[A-Z][a-z]*',cellname))
    col=col_to_num(col_letter)
    row="".join(re.findall('[0-9]*',cellname))
    return (row,col)
    

In [108]:
# 测试用, 测试开关使用and True: 
if __name__=="__main__" and True:
    (r,c)=cell2num('AA1')
    print("({0},{1})".format(r,c))

(1,27)


## 翻译单元格范围
例如: A1..B5->[1,1,5,2]

In [113]:
def cell_block(cell_string):
    cell_name=re.split('\..',cell_string)
    (r1,c1)=cell2num(cell_name[0])
    (r2,c2)=cell2num(cell_name[1])
    return(r1,c1,r2,c2)    

In [114]:
(r1,c1,r2,c2)=cell_block('A1..B5')
print("({0},{1},{2},{3})".format(r1,c1,r2,c2))

['A1', 'B5']
(1,1,5,2)


## 读取 Sirius  角膜地形图

Sirius 角膜地形图. 数据存储为CSV文件. 
除Radii数据之外, 其他数据的描述以极座标方式描述角膜, 每一类数据共31行, 256列. 

read_sirius函数需要两个参数: 
* filepath_or_buffer:  一般来说是文件名
* catalog:  需要获取的数据类别. 包含的类别有: 
  * 'Radii'                        
  * 'CornealThickness'              
  * 'ElevationAnterior'
  * 'ElevationPosterior'
  * 'RefractiveEquivalentPower'
  * 'RefractiveFrontalPowerAnterior'
  * 'RefractiveFrontalPowerPosterior'
  * 'SagittalAnterior'
  * 'SagittalPosterior'
  * 'TangentialAnterior'
  * 'TangentialPosterior'
  
** 务必注意类别名称的大小写 **


In [7]:
def read_sirius(filepath_or_buffer,catalog):
    # based on Sirius CSV
    catalog_dict={
        'Radii':                           [2,1,],
        'CornealThickness':                {"skiprows":3, "nrows":31},
        'ElevationAnterior':               {"skiprows":35, "nrows":31},
        'ElevationPosterior':              {"skiprows":67, "nrows":31},
        'RefractiveEquivalentPower':       {"skiprows":99, "nrows":31},
        'RefractiveFrontalPowerAnterior':  {"skiprows":131, "nrows":31},
        'RefractiveFrontalPowerPosterior': {"skiprows":163, "nrows":31},
        'SagittalAnterior':                {"skiprows":195, "nrows":31},
        'SagittalPosterior':               {"skiprows":227, "nrows":31},
        'TangentialAnterior':              {"skiprows":259, "nrows":31},
        'TangentialPosterior':             {"skiprows":291, "nrows":31}
    }
    # extract skiprows and nrows from dict
    s=catalog_dict[catalog]["skiprows"]
    n=catalog_dict[catalog]["nrows"]
    
    # read CSV after skiprows and get nrows
    sirius_data=pd.read_csv(filepath_or_buffer,
                        skiprows=range(s),
                        header=None,
                        nrows=n,
                        sep=';')
    
    # delete the last column. Is there any better method?
    last_column_name= sirius_data.columns[-1]
    del sirius_data[last_column_name]
    
    return sirius_data


In [8]:
# 测试用: 
if __name__=="__main__" and True:
    fpath=os.path.join('..','testdata')
    fname='sirius.csv'
    filename=os.path.join(fpath,fname)
    catalog='CornealThickness'
    data=read_sirius(filename,catalog)
    
    print(data)

          0          1          2          3          4          5    \
0    621.9440   621.9440   621.9440   621.9440   621.9440   621.9440   
1    623.1229   623.1656   623.2072   623.2475   623.2865   623.3240   
2    624.8762   624.9506   625.0231   625.0938   625.1624   625.2291   
3    627.3560   627.4747   627.5919   627.7073   627.8210   627.9326   
4    630.7875   630.9597   631.1305   631.2994   631.4661   631.6304   
5    635.1459   635.3712   635.5947   635.8160   636.0348   636.2505   
6    640.0944   640.3879   640.6793   640.9679   641.2530   641.5339   
7    645.7549   646.1091   646.4610   646.8098   647.1545   647.4940   
8    651.8187   652.2406   652.6607   653.0776   653.4902   653.8972   
9    658.3864   658.8671   659.3480   659.8279   660.3053   660.7786   
10   665.2798   665.8257   666.3752   666.9268   667.4789   668.0296   
11   673.3134   673.8997   674.4913   675.0869   675.6847   676.2828   
12   681.7916   682.4629   683.1432   683.8311   684.5245   685.

## 读取PentaCam角膜地形图数据

PentaCam 角膜地形图. 数据存储为CSV文件. 

* Front, Back以二维矩阵形式存储直角座标位置数据, 分别141行, 141列 
* 其他数据一般标题在第一列, 数据放在第二列. 看起来非常凌乱. 于是读取很费力. 
* 格式这么难看, 德国人真的好意思? ? ? 

read_pentacam函数需要两个参数: 
* filepath_or_buffer:  一般来说是文件名
* catalog:  需要获取的数据类别. 包含的类别有: 
  * 'FRONT'
  * 'BACK'
  * 'Cornea'
  * 'Pachy'
  * 'Chamber'
  * 'K'
  * 'Pupil'
  
** 务必注意类别名称的大小写 **

列索引目前需要用字符串, 例如'7.000'

In [46]:
def read_pentacam(filepath_or_buffer,catalog):
    # based on Sirius CSV
    catalog_dict={
        'FRONT':{"skiprows":0, "nrows":141, "header":0, "keepCol":141,"new_col_name":[]},
        'BACK':{"skiprows":142, "nrows":141, "header":0,"keepCol":141,"new_col_name":[]},
        'Cornea':{"skiprows":311, "nrows":4, "header":None,"keepCol":1,"new_col_name":['value']},
        'Pachy':{"skiprows":316, "nrows":4, "header":None,"keepCol":1,"new_col_name":['value']}, 
        'Chamber':{"skiprows":320, "nrows":2, "header":None,"keepCol":1,"new_col_name":['value']}, 
        'K':{"skiprows":325, "nrows":3, "header":None,"keepCol":1,"new_col_name":['value']},
        # 下面这个我也不知道为什么,329,330我都已经测试过了, header也试过不同的
        'Pupil':{"skiprows":328, "nrows":4, "header":0,"keepCol":1,"new_col_name":['value']} 
    }
    # extract skiprows and nrows from dict
    s=catalog_dict[catalog]["skiprows"]
    n=catalog_dict[catalog]["nrows"]
    h=catalog_dict[catalog]["header"]
    k=catalog_dict[catalog]["keepCol"]
    newname=catalog_dict[catalog]["new_col_name"]
    
    # read CSV after skiprows and get nrows
    pentacam_data=pd.read_csv(filepath_or_buffer,
                        skiprows=range(s),
                        header=h,
                        nrows=n,
                        sep=';'
                             )
    # set index and index name
    first_column_name= pentacam_data.columns[0]
    pentacam_data.set_index(first_column_name,inplace=True)
    pentacam_data.index.name=''
    
    # keep columns
    pentacam_data=pentacam_data.iloc[:,range(k)]
    
    # change column name
    newnameDict=dict(zip(pentacam_data.columns,newname))
    pentacam_data.rename(columns=newnameDict,inplace=True)    
    return pentacam_data

In [50]:
# 测试用: 
if __name__=="__main__" and False:
    fpath=os.path.join('..','testdata')
    fname='pentacam.csv'
    filename=os.path.join(fpath,fname)
    catalog='Pupil'
#     catalog='Cornea'
    catalog='FRONT'

    data=read_pentacam(filename,catalog)
    
    print(data)
    print(data.shape)