In [None]:
import dna_puller.dna_puller as puller
from gnuplot_generator.gnuplot_generator import GnuplotGenerator
import os, csv, json, subprocess

window_size = 1000 # size of sliding window
csv_folders = 'csv' # folder where CSV folders are saved
postfix = 'all' # postfix for charts (in case you create more charts in same path)

In [None]:
# List of all species pro analysis from Ensembl database 
species = [ 'Esox_lucius' ]

puller = puller.DnaPuller(species, True, False, ['dna_sm'], True, window_size)
puller.download_and_parse_data()

In [None]:
files = os.listdir('jsons')

json_files = []

for file in files:
    if file[-5:] == '.json':
        json_files.append(file)

In [None]:
for file in json_files:
    name = file[:-5]
    data = {}
    os.makedirs('csv/' + name, exist_ok=True)
    with open('jsons/' + file) as f:
        data = json.load(f)
    
    
    for lg, values in data['dna_sm'].items():
        with open('csv/' + name + '/' + lg + '.csv', 'w', newline='') as csvfile:
            csvwriter = csv.writer(csvfile, delimiter=' ',
                                    quotechar='|', quoting=csv.QUOTE_MINIMAL)
            for index, value in values.items():
                gc_count = value['G'] + value['C'] + value['S'] + value['g'] + value['c'] + value['s']
                all_count = value['all'] - value['N'] - value['n']

                big_percent = 0 
                if value['all_big'] > 0:
                    big_percent = value['all_big'] / float(value['all'])

                if all_count > 0:
                    gc_val = gc_count/all_count

                csvwriter.writerow([index, gc_val, big_percent])   

In [None]:
def file_len(fname):
    p = subprocess.Popen(['wc', '-l', fname], stdout=subprocess.PIPE, 
                                              stderr=subprocess.PIPE)
    result, err = p.communicate()
    if p.returncode != 0:
        raise IOError(err)
    return int(result.strip().split()[0])

folders = os.listdir(csv_folders)

species_folders = []

for folder in folders:
    if folder[0] != '.':
        species_folders.append(folder)

files = {}
biggest_files = {}
sorted_files = {}
for folder in species_folders:
    biggest_files[folder] = 0
    sorted_files[folder] = {}
    csv_files = []
    for file in os.listdir(csv_folders + '/' + folder):
        if file[-4:] == '.csv':
            csv_files.append(file)
            
            len_f =  file_len(csv_folders + '/' + folder + '/' + file)
            sorted_files[folder][file] = len_f
            if biggest_files[folder] < len_f * window_size:
                biggest_files[folder] = len_f * window_size
    files[folder] = csv_files 
    sorted_files[folder] = sorted(sorted_files[folder], key = sorted_files[folder].get, reverse=True)

In [None]:
def create_plot(fname):
    p = subprocess.Popen(['gnuplot', '-p', fname], stdout=subprocess.PIPE, 
                                                   stderr=subprocess.PIPE)
    result, err = p.communicate()
    if p.returncode != 0:
        raise IOError(err)

for folder in species_folders:
    range_plot = '[0:' + str(biggest_files[folder]) + ']'
    gen = GnuplotGenerator(folder, range_plot)
    gen.add_palette("( 0 'green', 50 'orange', 100 'red')")
    gen.set_term('png', 'plots_' + postfix + '/' + folder + '_' + postfix + '.png', 10000, 0.1)
    
    if not os.path.exists('plots_' + postfix):
        os.makedirs('plots_' + postfix)
    
    for file in sorted_files[folder]:
        gen.add_plot(csv_folders + '/' + folder + '/' + file, '1:2:3', file[:-4])
    
    with open(csv_folders + '/' + folder + '/' + folder + '_' + postfix + '.gnu', 'w') as gnu_file:
        lines = gen.prepare_definition()
        for line in lines:
            gnu_file.write(line + '\n')
    
    create_plot(csv_folders + '/' + folder + '/' + folder + '_' + postfix + '.gnu')