In [1]:
import sys
import collections
from collections import Counter, defaultdict

import datetime
from datetime import datetime, timedelta, timezone
import dateutil.parser

import statistics
from statistics import median

import pandas as pd
import seaborn as sns
pd.options.display.max_rows = 999

import pydent
from pydent import AqSession, models
from pydent.models import Sample, Item, Plan

sys.path.append("ext-plan-pydent")
from plans import ExternalPlan

In [2]:
session = ExternalPlan.create_session("production")

Logged in as Devin Strickland



In [None]:
def get_completed_jobs(operation_type_name):
    op_type = session.OperationType.find_by_name(operation_type_name)
    ops = session.Operation.where({"operation_type_id": op_type.id, 'status': 'done'})
    print("Found {} Operations of type {}".format(len(ops), operation_type_name))

    op_ids = [op.id for op in ops]
    job_associations = session.JobAssociation.where({"operation_id": op_ids})

    job_ids = [ja.job_id for ja in job_associations]
    jobs = session.Job.find(job_ids)
    jobs = [j for j in jobs if j.state[-1]['operation'] == "complete"]
    print("Found {} completed Jobs of type {}".format(len(jobs), operation_type_name))
    
    return jobs

In [None]:
ops, jobs, plans = get_completed_jobs("Challenge and Label")

times = []
lengths = []
state_completeness = []
for job in jobs:
    times.append((job.state[2]['time'], job.state[-2]['time']))
    lengths.append(len(job.state))
    completeness.append(job.is_complete)
    state_completeness.append(job.state[-1]['operation'])

In [None]:
deltas = []

for time in times:
    start = dateutil.parser.parse(time[0])
    end = dateutil.parser.parse(time[1])
    delta = (end - start).seconds/60
    deltas.append(delta)

In [None]:
sns.distplot(deltas, kde=False)
print(median([d for d in deltas if d < 500]))
# data = list(zip(lengths, completeness, state_completeness))
# data.sort(key=lambda x: x[0])
# for d in data:
#     print(d)

In [None]:
window = datetime.now() - timedelta(weeks=8)
print(str(window))
plans = session.Plan.where("budget_id = {} AND created_at > '{}'".format(budget.id, window))
print(len(plans))

In [None]:
headers = [
    "Challenge problem",
    "Use case/Experiment",
    "Description",
    "Data volume delivered to TA4 this month (TBs)",
    "Cumulative data volume delivered to TA4 (TB)",
    "Method Development Required?",
    "Strains",
    "Protocol Performed",
    "Date Started",
    "Date Finished",
    "# Sample",
    "Measurement",
    "Data Volume generated (GBs)",
    "Date data delivered to TA4",
    "Data format",
    "Cost"
]

In [None]:
job = session.Job.find(82029)
print(job.status)
print((job.state[0]))
print((job.state[1]))
print((job.state[2]))
print((job.state[3]))
print((job.state[-2]))
print((job.state[-1]))

In [None]:
plan_data = {}

for p in plans:
    d = {}
    d["plan"] = p
    d["ops"] = p.operations
    plan_data[p.id] = d

In [None]:
all_operation_types = {}

for ot in session.OperationType.all():
    all_operation_types[ot.id] = ot

In [None]:
table_data = []
month_start = datetime(2018, 10, 1, tzinfo=timezone(-timedelta(hours=7)))
month_end = datetime(2018, 11, 1, tzinfo=timezone(-timedelta(hours=7)))

for id, data in plan_data.items():
    dates = [op.updated_at for op in data["ops"]]
    start = dateutil.parser.parse(min(dates))
    end = dateutil.parser.parse(max(dates))
    
    if start >= month_start and end < month_end:
        row = {}
        row["Description"] = data["plan"].name

        row["Date Started"] = start.strftime("%x")
        row["Date Finished"] = end.strftime("%x")

        if not data.get("cost"):
            data["cost"] = data["plan"].estimate_cost()

        row["Cost"] = data["cost"]
        
        
        op_types = list(set([all_operation_types[op.operation_type_id].name for op in data["ops"]]))
        
        if "Challenge and Label" in op_types:
            row["Challenge problem"] = "Protein Design"
        elif "Treat With Zymolyase" in op_types:
            row["Challenge problem"] = "Protein Design"
        elif "4. Measure OD and GFP" in op_types:
            row["Challenge problem"] = "YeastSTATES"

        table_data.append(row)

In [None]:
df = pd.DataFrame(data=table_data, columns=headers)

In [None]:
df

In [None]:
df.to_csv("progress_report.csv", index=False)