In [16]:
require 'csv'
require 'distribution'

true

In [7]:
require 'csv'

class SeedStock
  attr_accessor :seed, :gene_id, :last_planted, :storage, :grams_remaining

  def initialize(seed, gene_id, last_planted, storage, grams_remaining)
    @seed = seed
    @gene_id = gene_id
    @last_planted = last_planted
    @storage = storage
    @grams_remaining = grams_remaining.to_i
  end

  # Class method to load data from a file and return an array of SeedStock objects
  def self.load_from_file(file_path)
    seed_stocks = []
    CSV.foreach(file_path, col_sep: "\t", headers: true) do |row|
      seed_stock = SeedStock.new(row['Seed_Stock'], row['Mutant_Gene_ID'], row['Last_Planted'], row['Storage'], row['Grams_Remaining'])
      seed_stocks << seed_stock
    end
    seed_stocks
  end

  # Simulate planting 7 grams of seeds for all SeedStock objects
  def self.planting_simulation(seed_stocks, amount)
    seed_stocks.each do |seed_stock|
      seed_stock.planting_simulation(amount)
    end
  end

  # Write the database to a new file
  def self.write_database(seed_stocks, output_file)
    CSV.open(output_file, 'w', col_sep: "\t") do |csv|
      # Write the header row
      csv << ['Seed_Stock', 'Mutant_Gene_ID', 'Last_Planted', 'Storage', 'Grams_Remaining']
      # Write the data for each SeedStock object to the file
      seed_stocks.each { |seed_stock| csv << seed_stock.to_csv }
    end
    puts "Data saved to #{output_file}"
  end

  def planting_simulation(amount)
    @grams_remaining -= amount
    @grams_remaining = [0, @grams_remaining].max
    @last_planted = Time.now.strftime('%m/%d/%Y')  # Update the last_planted date to today  
    puts "WARNING: we have run out of Seed Stock #{@seed}" if @grams_remaining == 0
  end

  def to_csv
    [@seed, @gene_id, @last_planted, @storage, @grams_remaining]
  end
end

:to_csv

In [8]:
class Gene
  attr_accessor :gene_id, :gene_name, :mutant_phenotype

  def initialize(gene_id, gene_name, mutant_phenotype)
    if valid_gene_id?(gene_id)
      @gene_id = gene_id
      @gene_name = gene_name
      @mutant_phenotype = mutant_phenotype
    else
      puts "Error: Invalid gene ID format"
    end
  end

  def self.build_gene_id_to_gene_name_mapping
    gene_info_data = CSV.read('StockDatabaseDataFiles/gene_information.tsv', col_sep: "\t", headers: true)
    gene_id_to_gene_name = {}
    gene_info_data.each do |row|
      gene_id = row['Gene_ID']
      gene_name = row['Gene_name']
      gene_id_to_gene_name[gene_id] = gene_name
    end
    gene_id_to_gene_name
  end

  private

  def valid_gene_id?(gene_id)
    gene_id.match(/A[Tt]\d[Gg]\d\d\d\d\d/)
  end
end


:valid_gene_id?

In [12]:
require './HybridCross_object.rb'

false

In [18]:
# Check if the correct number of arguments is provided
unless ARGV.length = 4
  puts "Usage: ruby process_database_Jacob.rb gene_information.tsv seed_stock_data.tsv cross_data.tsv new_stock_file.tsv"
  abort
end

# Assign the input file names to variables
gene_information_file, seed_stock_data_file, cross_data_file, output_file = ARGV


NoMethodError: undefined method `length=' for ["/home/osboxes/.local/share/jupyter/runtime/kernel-654ade5c-3b42-4193-8e14-122bcc9b62a1.json"]:Array

In [13]:
# Create an array of SeedStock objects by loading data from the file
my_seed_stock = SeedStock.load_from_file('StockDatabaseDataFiles/seed_stock_data.tsv')

# Simulate planting 7 grams of seeds for all SeedStock objects
SeedStock.planting_simulation(my_seed_stock, 7)

# Specify the path to the output TSV file
output_file = 'mynew_stock_file.tsv'

# Use the class method to write the data to the output file
SeedStock.write_database(my_seed_stock, output_file)



Data saved to mynew_stock_file.tsv


In [17]:
# Read data from seed_stock.tsv AND link with gene names
seed_stock_data = CSV.read('StockDatabaseDataFiles/seed_stock_data.tsv', col_sep: "\t", headers: true)
seed_stocks = seed_stock_data.map do |row|
  gene_id = row['Mutant_Gene_ID']
  seed_stock = SeedStock.new(row['Seed_Stock'], gene_id, row['Last_Planted'], row['Storage'], row['Grams_Remaining'])
  seed_stock
end

# Read data from cross_data.tsv
cross_data = CSV.read('StockDatabaseDataFiles/cross_data.tsv', col_sep: "\t", headers: true)

# Create an array to store instances of the HybridCross class
hybrid_crosses = []

# Get the gene_id to gene_name mapping using the class method
gene_id_to_gene_name = Gene.build_gene_id_to_gene_name_mapping

# Iterate through the CSV data and create instances of HybridCross
cross_data.each do |row|
  hybrid_cross = HybridCross.new(
    row['Parent1'],
    row['Parent2'],
    row['F2_Wild'],
    row['F2_P1'],
    row['F2_P2'],
    row['F2_P1P2']
  )
  hybrid_cross.link_to_seed_stocks_and_genes_names(seed_stocks, gene_id_to_gene_name)
  chi_squared = hybrid_cross.chi_squared_test
  hybrid_cross.chi_squared = chi_squared
  hybrid_crosses << hybrid_cross
end

# Link gene names to hybrid crosses
statistically_significant_genes = hybrid_crosses.select { |gene| gene.statistically_significant }

puts "Recording:"
statistically_significant_genes.each do |gene|
  parent1_gene_name = gene.gene_name_parent1
  parent2_gene_name = gene.gene_name_parent2
  chi_squared = gene.chi_squared
  puts "#{parent1_gene_name} is linked to #{parent2_gene_name} with chi-squared score #{format('%.7f', chi_squared)}"
end

puts
puts "Final Report:"

statistically_significant_genes.each do |gene|
  parent1_gene_name = gene.gene_name_parent1
  parent2_gene_name = gene.gene_name_parent2
  puts "#{parent1_gene_name} is linked to #{parent2_gene_name}"
  puts "#{parent2_gene_name} is linked to #{parent1_gene_name}"
end

Recording:
ufo is linked to pi with chi-squared score 32.2794279

Final Report:
ufo is linked to pi
pi is linked to ufo


[#<HybridCross:0x0000557ebaea5d10 @parent1="A51", @parent2="B52", @f2_wild=152, @f2_p1=26, @f2_p2=22, @f2_p1p2=2, @seed_stock_parent1=#<#<Class:0x0000557ebb903730>::SeedStock:0x0000557ebaf33c00 @seed="A51", @gene_id="AT1G30950", @last_planted="9/2/2013", @storage="cama25", @grams_remaining=5>, @seed_stock_parent2=#<#<Class:0x0000557ebb903730>::SeedStock:0x0000557ebaf33390 @seed="B52", @gene_id="AT5G20240", @last_planted="8/11/2012", @storage="cama16", @grams_remaining=7>, @chi_squared=32.27942794279428, @statistically_significant=true, @gene_name_parent1="ufo", @gene_name_parent2="pi">]