In [1]:
from Bio import SeqIO
import numpy as np
from PIL import Image
from skimage.filters import gaussian
import pandas as pd

In [4]:
def get_pos(record):
    des = record.description
    tile_num = int(des.split(' ')[0].split(':')[4])
    x_pos = int(des.split(' ')[0].split(':')[5])
    y_pos = int(des.split(' ')[0].split(':')[6])
    return tile_num, x_pos, y_pos


def generate_img(x, y, x_min, y_min, x_max, y_max, op_path, r, blurred, sigma):
    # x_min = min(x)
    # x_max = max(x)
    # y_min = min(y)
    # y_max = max(y)
    x_range = x_max - x_min + 1
    y_range = y_max - y_min + 1
    # x_range = 27994
    # y_range = 27174
    img = np.zeros(shape=(x_range, y_range))
    for i in range(0, len(x)):
        for j in range(0, r):
            for k in range(0, r):
                img[max(0, x[i] - x_min - j),
                    max(0, y[i] - y_min - k)] = 1024 * 2
                img[min(x_range - 1, x[i] - x_min + j),
                    max(0, y[i] - y_min - k)] = 1024 * 2
                img[min(x_range - 1, x[i] - x_min + j),
                    min(y_range - 1, y[i] - y_min + k)] = 1024 * 2
                img[max(0, x[i] - x_min - j),
                    min(y_range - 1, y[i] - y_min + k)] = 1024 * 2
    if blurred:
        img = gaussian(img, sigma=sigma)
    im = Image.fromarray(img)
    new_im = im.convert("L")
    new_im.save(op_path, dpi=(300.0, 300.0))


def generate_with_marker(x, y, marker_x, marker_y, op_path, csv_path):
    x_min = min(x)
    x_max = max(x)
    y_min = min(y)
    y_max = max(y)
    x_range = x_max - x_min + 1
    y_range = y_max - y_min + 1
    img = np.zeros(shape=(x_range, y_range, 3))
    marker_dic = {'index': range(0, len(marker_x)), 'x': [], 'y': []}
    for i in range(0, len(x)):
        for j in range(0, 5):
            for k in range(0, 5):
                img[max(0, x[i] - x_min - j),
                    max(0, y[i] - y_min - k), 0:3] = 255
                img[min(x_range - 1, x[i] - x_min + j),
                    max(0, y[i] - y_min - k), 0:3] = 255
                img[min(x_range - 1, x[i] - x_min + j),
                    min(y_range - 1, y[i] - y_min + k), 0:3] = 255
                img[max(0, x[i] - x_min - j),
                    min(y_range - 1, y[i] - y_min + k), 0:3] = 255
    for i in range(0, len(marker_x)):
        marker_dic['x'].append(marker_x[i] - x_min)
        marker_dic['y'].append(marker_y[i] - y_min)
        for j in range(0, 5):
            for k in range(0, 5):
                img[max(0, marker_x[i] - x_min - j),
                    max(0, marker_y[i] - y_min - k), 0] = 255
                img[max(0, marker_x[i] - x_min - j),
                    max(0, marker_y[i] - y_min - k), 1:3] = 0
                img[min(x_range - 1, marker_x[i] - x_min + j),
                    max(0, marker_y[i] - y_min - k), 0] = 255
                img[min(x_range - 1, marker_x[i] - x_min + j),
                    max(0, marker_y[i] - y_min - k), 1:3] = 0
                img[min(x_range - 1, marker_x[i] - x_min + j),
                    min(y_range - 1, marker_y[i] - y_min + k), 0] = 255
                img[min(x_range - 1, marker_x[i] - x_min + j),
                    min(y_range - 1, marker_y[i] - y_min + k), 1:3] = 0
                img[max(0, marker_x[i] - x_min - j),
                    min(y_range - 1, marker_y[i] - y_min + k), 0] = 255
                img[max(0, marker_x[i] - x_min - j),
                    min(y_range - 1, marker_y[i] - y_min + k), 1:3] = 0
    blurred_img = gaussian(img, sigma=3, multichannel=True)
    im = Image.fromarray(blurred_img.astype(np.uint8))
    new_im = im.convert("P")
    new_im.save(op_path, dpi=(300.0, 300.0))
    df = pd.DataFrame(marker_dic)
    df.to_csv(csv_path, index=True, header=True)


def get_coordinates(fastq_path):
    x_coordinate_01 = []
    y_coordinate_01 = []
    x_coordinate_02 = []
    y_coordinate_02 = []
    for record in SeqIO.parse(fastq_path, "fastq"):
        if record is not None:
            tile_num, x_pos, y_pos = get_pos(record)
            seq = str(record.seq)
            if tile_num == 1101:
                x_coordinate_01.append(x_pos)
                y_coordinate_01.append(y_pos)
            elif tile_num == 1102:
                x_coordinate_02.append(x_pos)
                y_coordinate_02.append(y_pos)
    print('Coordinates are found.')
    return x_coordinate_01, y_coordinate_01, x_coordinate_02, y_coordinate_02


def get_xy_coordinates(fastq_path, with_seq):
    x_coordinate_01 = []
    y_coordinate_01 = []
    x_coordinate_02 = []
    y_coordinate_02 = []
    x_seq_coordinate_01 = []
    y_seq_coordinate_01 = []
    x_seq_coordinate_02 = []
    y_seq_coordinate_02 = []
    for record in SeqIO.parse(fastq_path, "fastq"):
        if record is not None:
            tile_num, x_pos, y_pos = get_pos(record)
            seq = str(record.seq)
            if tile_num == 1101:
                if with_seq != seq:
                    x_coordinate_01.append(x_pos)
                    y_coordinate_01.append(y_pos)
                else:
                    x_seq_coordinate_01.append(x_pos)
                    y_seq_coordinate_01.append(y_pos)
            elif tile_num == 1102:
                if with_seq != seq:
                    x_coordinate_02.append(x_pos)
                    y_coordinate_02.append(y_pos)
                else:
                    x_seq_coordinate_02.append(x_pos)
                    y_seq_coordinate_02.append(y_pos)
    print('Coordinates are found.')
    return x_coordinate_01, y_coordinate_01, \
        x_coordinate_02, y_coordinate_02, \
        x_seq_coordinate_01, y_seq_coordinate_01, \
        x_seq_coordinate_02, y_seq_coordinate_02


def get_seq_coordinates(fastq_path, with_seq_1, with_seq_2):
    x_coordinate_01 = []
    y_coordinate_01 = []
    x_coordinate_02 = []
    y_coordinate_02 = []
    max_x = 0
    max_y = 0
    min_x = 9e7
    min_y = 9e7
    for record in SeqIO.parse(fastq_path, "fastq"):
        if record is not None:
            tile_num, x_pos, y_pos = get_pos(record)
            if x_pos > max_x:
                max_x = x_pos
            if x_pos < min_x:
                min_x = x_pos
            if y_pos > max_y:
                max_y = y_pos
            if y_pos < min_y:
                min_y = y_pos
            seq = str(record.seq)
            if tile_num == 1101:
                if with_seq_1 in seq or with_seq_2 in seq:
                    x_coordinate_01.append(x_pos)
                    y_coordinate_01.append(y_pos)
            elif tile_num == 1102:
                if with_seq_1 in seq or with_seq_2 in seq:
                    x_coordinate_02.append(x_pos)
                    y_coordinate_02.append(y_pos)
    print('Coordinates are found.')
    return x_coordinate_01, y_coordinate_01, x_coordinate_02, y_coordinate_02, max_x, max_y


# def get_seq_coordinates(fastq_path, lib_seq):
#     x_coordinate_01 = []
#     y_coordinate_01 = []
#     x_coordinate_02 = []
#     y_coordinate_02 = []
#     max_x_1 = 0
#     max_y_1 = 0
#     max_x_2 = 0
#     max_y_2 = 0
    
#     for record in SeqIO.parse(fastq_path, "fastq"):
#         if record is not None:
#             tile_num, x_pos, y_pos = get_pos(record)
#             seq = str(record.seq)
#             if tile_num == 1101:
#                 if 
#                 if lib_seq in seq:
#                     x_coordinate_01.append(x_pos)
#                     y_coordinate_01.append(y_pos)
#             elif tile_num == 1102:
#                 if lib_seq in seq:
#                     x_coordinate_02.append(x_pos)
#                     y_coordinate_02.append(y_pos)
#     print('Coordinates are found.')
#     return x_coordinate_01, y_coordinate_01, x_coordinate_02, y_coordinate_02


In [14]:
# Test
fastq_path = '/Users/qinhanhou/Desktop/DeindlLab/220722/SeqRes/donuts_1/Donuts1/0722.fastq'
x_coordinate_01, y_coordinate_01, x_coordinate_02, y_coordinate_02 \
    = get_seq_coordinates(fastq_path, 'A', 'qwwdasdqwraszdqwqw')
x_coordinate_01 = x_coordinate_01[10000:11000]
y_coordinate_01 = y_coordinate_01[10000:11000]


generate_img(x_coordinate_01, y_coordinate_01,
             '/Users/qinhanhou/Desktop/DeindlLab/220722/SeqImg/test.png', 1, False, 4)

Coordinates are found.


In [8]:
def get_seq_coordinates_v2(fastq_path, with_seq, tile):
    x_coordinate = []
    y_coordinate = []
    max_x = 0
    max_y = 0
    min_x = 9e7
    min_y = 9e7
    for record in SeqIO.parse(fastq_path, "fastq"):
        if record is not None:
            tile_num, x_pos, y_pos = get_pos(record)
            if x_pos > max_x:
                max_x = x_pos
            if x_pos < min_x:
                min_x = x_pos
            if y_pos > max_y:
                max_y = y_pos
            if y_pos < min_y:
                min_y = y_pos
            seq = str(record.seq)
            if tile_num == tile:
                if with_seq in seq:
                    x_coordinate.append(x_pos)
                    y_coordinate.append(y_pos)
    print('Coordinates are found.')
    return x_coordinate, y_coordinate, max_x, max_y, min_x, min_y

In [11]:
fastq_path = '/Users/qinhanhou/Desktop/DeindlLab/220722/SeqRes/donuts_1/Donuts1/0722.fastq'
# x_coordinate_01m, y_coordinate_01m, x_coordinate_02m, y_coordinate_02m, max_x, max_y, min_x, min_y \
#     = get_seq_coordinates(fastq_path, 'GGTCTCGTCCAATCTAT', 'qwwdasdqwraszdqwqw')
# x_coordinate_01, y_coordinate_01, x_coordinate_02, y_coordinate_02 \
#     = get_seq_coordinates(fastq_path, 'A', 'qwwdasdqwraszdqwqw')
x_coord, y_coord, max_x, max_y, min_x, min_y = get_seq_coordinates_v2(fastq_path=fastq_path, with_seq='GGTCTCGTCCAATCTAT', tile=1101)
# print(len(x_coordinate_01))
# print(len(x_coordinate_02))
# print(np.max(x_coordinate_01))
# print(np.min(x_coordinate_01))
print(max_x - min_x)
# generate_with_marker(x_coordinate_01, y_coordinate_01, x_coordinate_01m, y_coordinate_01m,
#   '/Users/qinhanhou/Desktop/DeindlLab/220722/SeqImg/1101_marked_sigma3.png',
#   '/Users/qinhanhou/Desktop/DeindlLab/220722/SeqImg/1101_marked_sigma3.csv')
generate_img(x_coord, y_coord, min_x, min_y, max_x, max_y,
             '/Users/qinhanhou/Desktop/DeindlLab/220722/SeqImg/1101_full_sigma3_v2.png', 3, True, 4)
# generate_img(x_coordinate_02, y_coordinate_02,
            #  '/Users/qinhanhou/Desktop/DeindlLab/220722/SeqImg/1102_full_sigma3.png', 3, True, 4)


Coordinates are found.
27998
