# admixでの日本、韓国、中国パネルをテスト

In [None]:
import csv
import numpy as np
import os

# check if file exits
def check_file(file_name):
    if not os.path.isfile(file_name):
        print('Cannot find the file \'' + file_name + '\'!\n')
        exit()

# convert 23andme raw data
def twenty_three_and_me(data_file_name):
    check_file(data_file_name)
    processed_data = {}
    with open(data_file_name, 'r') as data:
        data = csv.reader(data, delimiter='\t')
        for row in data:
            # make sure the genotype is valid
            if len(row) == 4 and row[-1][-1] in ['A', 'T', 'G', 'C']:
                processed_data[row[0]] = row[-1]

    return processed_data

def read_raw_data(data_format, data_file_name=None):
    if data_format == "23andme":
        if not data_file_name is None:
            return twenty_three_and_me(data_file_name)
        else:
            print("Data file not set!")
            exit()
            return None
    else:
        print("Data format does not exist!")
        exit()
        return None
    

# all models
def models():
    return ['EastAsia3',
            'K7b',
            'K12b',]


def populations(model):
    if model == 'K7b':
        return [('South Asian','南亚'),
                ('West Asian','西亚'),
                ('Siberian','西伯利亚'),
                ('African','非洲'),
                ('Southern','地中海－中东'),
                ('Atlantic Baltic','大西洋波罗的海'),
                ('East Asian','东亚')]
    elif model == 'K12b':
        return [('Gedrosia','格德罗西亚'),
                ('Siberian','西伯利亚'),
                ('Northwest African','西北非'),
                ('Southeast Asian','东南亚'),
                ('Atlantic Med','大西洋地中海'),
                ('North European','北欧'),
                ('South Asian','南亚'),
                ('East African','东非'),
                ('Southwest Asian','西南亚'),
                ('East Asian','东亚'),
                ('Caucasus','高加索'),
                ('Sub Saharan','撒哈拉以南非洲')]
    elif model == 'EastAsia3':
        return [("Han",'中国'),
                ('Japanese','日本'),
                ('Korean','韓国')]

# number of populations in all models
def n_populations(model):
    return len(populations(model))


# model alleles file names
def snp_file_name(model):
    return model + ".alleles"


# model frequency matrix file names
def frequency_file_name(model):
    return model + "." + str(n_populations(model)) + ".F"




# convert alleles information of a model to a dict
def read_model(model):
    # obtain model file names
    snp_file_name = snp_file_name(model)
    frequency_file_name = frequency_file_name(model)

    # read SNPs
    snp = []
    minor_alleles = []
    major_alleles = []

    with open(
            os.path.join(os.path.dirname(__file__), 'data/' + snp_file_name),
            'r') as snp_file:
        snp_file = csv.reader(snp_file, delimiter=' ')
        for row in snp_file:
            snp.append(row[0])
            minor_alleles.append(row[1])
            major_alleles.append(row[2])

    # read frequency matrix
    frequency = []
    with open(
            os.path.join(
                os.path.dirname(__file__), 'data/' + frequency_file_name),
            'r') as frequency_file:
        frequency_file = csv.reader(frequency_file, delimiter=' ')
        for row in frequency_file:
            frequency.append([float(f) for f in row])

    return np.array(snp), np.array(minor_alleles), np.array(
        major_alleles), np.array(frequency)

In [None]:
model = "EastAsia3"