# Analysis of intervals between startup milestones

This notebook provides an analysis of the time interval between various milestones in the development of our startups. In particular, it assesses the time between GRA investments (Phase I, II, and III) and how that might relate to participation in I-Corps.

The input data in the notebook was extracted from Salesforce.


In [43]:
%matplotlib inline
import pandas
import numpy as np
import matplotlib
import datetime

In [44]:
# These are the ISO-8601 formatted date fields in the input file; we'll want Pandas to parse these as dates.
date_fields = [
    'first_touch_date',
    'incorporated_date',
    'icorps_end_date',
    'gra_1_inv_date',
    'gra_2_inv_date',
    'gra_3_inv_date',
    'sbir_inv_date',
]

In [45]:
def ndays(d):
    return None if pandas.isnull(d) else d.days

In [46]:
# Date when the I-Corps program got underway; we distinguish between teams before and after this date.
icorps_start_date = datetime.date(2012,1,1)

In [47]:
with open('data_in/vl-intervals.csv') as csv:
    df = pandas.read_csv(csv, parse_dates=date_fields, infer_datetime_format=True)

In [48]:
# Establish the interval columns
df['touch_to_gra_1_days'] = map(ndays, df['gra_1_inv_date'] - df['first_touch_date'])
df['icorps_to_gra_1_days'] = map(ndays, df['gra_1_inv_date'] - df['icorps_end_date'])
df['gra_1_to_gra_2_days'] = map(ndays, df['gra_2_inv_date'] - df['gra_1_inv_date'])
df['gra_2_to_gra_3_days'] = map(ndays, df['gra_3_inv_date'] - df['gra_2_inv_date'])

In [49]:
df

Unnamed: 0,name,origin,first_touch_date,incorporated_date,icorps_inv_date,icorps_end_date,gra_1_inv_date,gra_2_inv_date,gra_3_inv_date,sbir_inv_date,total_investments,touch_to_gra_1_days,icorps_to_gra_1_days,gra_1_to_gra_2_days,gra_2_to_gra_3_days
0,GTronix,School of Electrical and Computer Engineering,2004-07-01,NaT,,NaT,2005-01-01,NaT,NaT,NaT,36290000.0,184.0,,,
1,Singhose - Controll,School of Mechanical Engineering,2016-03-14,NaT,,2016-05-20,NaT,NaT,NaT,NaT,50000.0,,,,
2,ChemCore Therapeutics,Department of Biomedical Engineering,2010-08-16,NaT,,NaT,2010-09-13,NaT,NaT,NaT,25000.0,28.0,,,
3,Orthonics,Department of Biomedical Engineering,2005-01-01,NaT,,NaT,NaT,NaT,NaT,NaT,240000.0,,,,
4,BISmark,School of Computer Science,2012-06-04,NaT,2012-07-01,2012-12-31,NaT,NaT,NaT,NaT,50000.0,,,,
5,GaN bipolar switch,School of Electrical and Computer Engineering,2013-01-29,NaT,,NaT,NaT,NaT,NaT,NaT,25000.0,,,,
6,CameRad Technologies,School of Electrical and Computer Engineering,2014-10-01,2016-02-26,2014-10-01,2015-03-31,NaT,NaT,NaT,NaT,50000.0,,,,
7,Coreopsys Software Labs,College of Computing,2006-12-01,NaT,,NaT,2006-12-01,NaT,NaT,NaT,200000.0,0.0,,,
8,LiquidText,College of Computing,2009-09-22,NaT,,NaT,2009-10-26,NaT,NaT,NaT,43000.0,34.0,,,
9,Social Gaze,School of Interactive Computing,2015-11-03,NaT,,2016-03-04,NaT,NaT,NaT,NaT,50000.0,,,,


In [50]:
def export_as_csv(df, fn):
    with open('data_out/' + fn, 'w') as outcsv:
        df.to_csv(outcsv)

In [51]:
# Slice the subsets -- teams before I-Corps was founded, post-founding teams 
# that did not participate, and teams that did participate.
pre_icorps = df[df.first_touch_date > icorps_start_date]
no_icorps = df[(df.first_touch_date <= icorps_start_date) & (pandas.isnull(df.icorps_end_date))]
did_icorps = df[pandas.notnull(df.icorps_end_date)]

In [52]:
# How many of each are there?
len(pre_icorps), len(no_icorps), len(did_icorps)

(91, 136, 43)

In [53]:
# Save the whole file plus the subsets.
export_as_csv(df, 'all_startups.csv')
export_as_csv(pre_icorps, 'pre_icorps.csv')
export_as_csv(no_icorps, 'no_icorps.csv')
export_as_csv(did_icorps, 'did_icorps.csv')

In [54]:
# Define some convenient functions to give us the intervals we're interested in for each subset.
def avg_days(s):
    return round(s.mean(), 0)
def day_metrics(df):
    return avg_days(df.touch_to_gra_1_days), \
            avg_days(df.icorps_to_gra_1_days), \
            avg_days(df.gra_1_to_gra_2_days), \
            avg_days(df.gra_2_to_gra_3_days)

In [55]:
# averages: from first touch to GRA 1, from end of I-Corps to GR1,from GRA 1 to GRA 2, from GRA 2 to GRA 3

In [56]:
day_metrics(pre_icorps)

(180.0, 52.0, 529.0, 358.0)

In [57]:
day_metrics(no_icorps)

(118.0, nan, 490.0, 559.0)

In [58]:
day_metrics(did_icorps)

(280.0, -151.0, 804.0, nan)