## Tests for converting DB to image

### Imports

In [None]:
import logging
import configparser
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon, Circle

from io import BytesIO

from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.graphics import renderPDF
from reportlab.lib import colors
from svglib.svglib import svg2rlg

from configparser import ConfigParser

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

### Initialize the logging and the config parser

In [None]:
logging.basicConfig(format='%(asctime)s|%(levelname)s| %(message)s', level=logging.INFO)
config = ConfigParser()
config.read('config.ini')

### Read in the excel file

In [None]:
df = pd.read_excel(config['main']['db_path'], 'DATABASE')
df = df.dropna(0, 'all')
df = df.loc[:, ['Tiroir', 'Position', 'Name', 'Date_congel', 'Organism', 'Tissue', 'Disease']]
df.columns = ['drawer', 'pos', 'name', 'date', 'organism', 'tissue', 'disease']
df

### Clean data to make consensus on organisms,  tissues and diseases

In [None]:
# go through each category
categories = ['organism', 'disease', 'tissue']
for category in categories:
    # get the list of keys for this category (like "human", "mouse", etc. for category "organism")
    category_keys = config[category]
    # store the indices of all matched rows
    matched_rows_indices = []
    # go through each key
    for category_key in category_keys:
        # get the list of patterns for the current key (like "mouse" or
        #    "musmusculus" for key "mouse" in category "organism")
        patterns = config[category][category_key].split(',')
        # go through each pattern
        for pattern in patterns:
            # create the actual pattern
            pattern = ('.*' + pattern + '.*').replace('.*.*.*', '.*')
            # match the pattern to a lower case, cleaned version of the value of each row
            matching_rows = df[category].str.replace('[-_^ ()]', '').str.match(pattern, case=False)
            # consider badly matched rows (None or NaN) as non-matching
            matching_rows[matching_rows.isnull()] = False
            # store the list of matched indices
            matched_rows_indices.extend(matching_rows[matching_rows].index)
            # if there was any match
            if matching_rows.sum() > 0:
                logging.info('Category "{:8s}": found {:3d} rows matching "{}"'.format(category,
                    matching_rows.sum(), category_key))
                # overwrite the value in the rows with the key
                df.loc[matching_rows, category] = category_key
    # replace all unmatched rows with the "unknown" string
    logging.info('Category "{:8s}": found {:3d} rows without match (set to "unknown")'.format(category,
        len(df.loc[~df.index.isin(matched_rows_indices), category])))
    df.loc[~df.index.isin(matched_rows_indices), category] = 'unknown'


###  Create a pdf for each drawer

In [None]:
logging.info("Initializing the canvas of the PDF file")
c = canvas.Canvas('celllinedb.pdf', pagesize=A4)
cw, ch = A4

# Translate origin to upper left corner, to make it easier to use the coordinates from the GIMP draft.
#   However, all Y coordinates will need to be negative.
c.translate(0, ch)

# create the matplotlib figure with the right aspect ratio
fig = plt.figure(figsize=[10, 10])
ax = plt.gca()
plt.axis('off')

# add information
plt.text(-0.2, 1.2, 'Tiroir X', fontsize=60)

# draw a base triangle-like polygon
bot_left = [0.054, 0.15]
top_left = [0.44, 0.96]
top_right = [0.56, 0.96]
bot_right = [0.946, 0.15]
polygon = Polygon([bot_left, top_left, top_right, bot_right], True, facecolor='black')
ax.add_patch(polygon)

# draw an ellipse at the bottom
ellipse_center = [0.5, 0.15]
ellipse_width = 0.89
ellipse_height = 0.3
arc = Ellipse(ellipse_center, ellipse_width, ellipse_height, facecolor='black')
ax.add_patch(arc)

# set parameters for all the circle drawing
circle_top_left = [top_left[0] + 0.02, top_left[1] - 0.05]
circle_pad = [0.026, 0.03]
circle_x_shift = 0.04
n_circles = 1
circle_diameter = 0.07
letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P']
side_label_fontsize = 40
circle_name_fontsize = 7
circle_date_fontsize = 8

# go row by row
for i_row in range(11):
    # every row has one more circle
    n_circles += 1
    # for each circle on a row
    for i_circle in range(n_circles):
        # define the coordinates of the current circle's center
        x = circle_top_left[0] + i_circle * (2 * circle_radius + circle_pad[0]) - i_row * circle_x_shift
        y = circle_top_left[1] - i_row * (2 * circle_radius + circle_pad[1])
        
        # draw the first "column" (circle number) label
        if i_circle == 0 and i_row == 0:
            plt.text(x + 0.02, y + 0.07, '↙ ', fontsize=label_fontsize)
            plt.text(x + 0.07, y + 0.11, str(i_circle + 1), fontsize=side_label_fontsize)
            
        # draw the "column" (circle number) labels
        if i_circle == n_circles - 1 and i_row < 10:
            plt.text(x + 0.02, y + 0.04, '↙ ', fontsize=label_fontsize)
            plt.text(x + 0.08, y + 0.08, str(i_circle + 1), fontsize=side_label_fontsize)
        
        # draw the row number labels
        if i_circle == 0 and i_row == 10:
            plt.text(x - 0.12, y - 0.02, letters[i_row] + ' →', fontsize=side_label_fontsize)
        elif i_circle == 0:
            plt.text(x - 0.2, y - 0.02, letters[i_row] + ' →', fontsize=side_label_fontsize)
            
        # last row only has 4 circles, so skip if we are not currently processing those
        if i_row == 10 and i_circle not in [3, 4, 7, 8]: continue
            
        # draw the current circle
        e = Ellipse([x, y], circle_diameter, circle_diameter, linewidth=2, facecolor='white')
        ax.add_patch(e)
        
        # write the content of the circle
        plt.text(x, y + 0.01, 'SKOV3', fontsize=circle_name_fontsize, horizontalalignment='center')
        plt.text(x, y - 0.01, '11/09/19', fontsize=circle_date_fontsize, horizontalalignment='center')

In [None]:
# re-position the axe
ax.set_position([0.1, 0, 0.8, 1])
# save the matplotlib figure as SVG to BytesIO object
img_data = BytesIO()
fig.savefig(img_data, format='svg', transparent=True)
# rewind the bytes array and convert to a reportlab Drawing object
img_data.seek(0)
drawing = svg2rlg(img_data)
# rescale the drawing to make sure it has the right pixel size
drawing.scale(0.9 * cw / drawing.width, 0.6 * ch / drawing.height)
# draw the object to the canvas
renderPDF.draw(drawing, c, 50, -ch + 100)

logging.info("Saving PDF file")
c.showPage()
c.save()