In [2]:
# must go first
%matplotlib inline
%config InlineBackend.figure_format='retina'

# Reloads functions each time so you can edit a script 
# and not need to restart the kernel
%load_ext autoreload
%autoreload 2

# plotting
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_context("poster", font_scale=1.3)

import sys
import os
import datetime

sns.set()
sns.set_context('poster', font_scale=1.3)
sns.set_style("white")

import warnings
warnings.filterwarnings('ignore')

# basic wrangling
import pandas as pd
import numpy as np
import yaml
import json
import re

# eda tools
import missingno as msno
import collections
import itertools
from tqdm import tqdm
import nltk

# pandas options
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 75)
pd.set_option('display.width', 1000)

# Update matplotlib defaults to something nicer
mpl_update = {
    'font.size': 16,
    'xtick.labelsize': 14,
    'ytick.labelsize': 14,
    'figure.figsize': [12.0, 8.0],
    'axes.labelsize': 20,
    'axes.labelcolor': '#677385',
    'axes.titlesize': 20,
    'lines.color': '#0055A7',
    'lines.linewidth': 3,
    'text.color': '#677385',
    'font.family': 'sans-serif',
    'font.sans-serif': 'Tahoma'
}
mpl.rcParams.update(mpl_update)

In [3]:
# Create helper functions for specifying paths and appending
# directories with relevant python source code.
# This is a lot at the top of your notebook but if you get the jupyter
# extension for collapsing headings, you can always have this and the
# imports collapsed

root_dir = os.curdir
max_nest = 10  # arbitrary, 3 would probably suffice
nest = 0
while "src" not in os.listdir(root_dir) and nest < max_nest:
    # Look up the directory structure for a src directory
    root_dir = os.path.join(os.pardir, root_dir)
    nest += 1
    
# If you don't find the src directory, the root directory is this directory
root_dir = os.path.abspath(root_dir) if nest < max_nest else os.path.abspath(
    os.curdir)

# Add the root directory to be able to import from src, etc
sys.path.append(root_dir)

# Get the source directory and append path to access
# python packages/scripts within directory
if "src" in os.listdir(root_dir):
    src_dir = os.path.join(root_dir, "src")

# If data or figures directory don't exist in project directory,
# they will be saved to this directory
data_dir = os.path.join(
    root_dir, "data") if "data" in os.listdir(root_dir) else os.curdir
external_data_dir = os.path.join(
    data_dir, "external") if "external" in os.listdir(data_dir) else os.curdir
figure_dir = os.path.join(
    root_dir,
    "figures") if "figures" in os.listdir(root_dir) else os.curdir
models_dir = os.path.join(
    root_dir,
    "models") if "models" in os.listdir(root_dir) else os.curdir

# Prepends the directory path for specifying paths to data or figures
# dataplus("data.csv") -> "/Users/cmawer/project/data/data.csv"
# figplus("cool.png") -> "/Users/cmawer/project/figures/cool.png"
dataplus = lambda x: os.path.join(data_dir, x)
dataextplus = lambda x: os.path.join(external_data_dir, x)
figplus = lambda x: os.path.join(figure_dir, x)
modelsplus = lambda x: os.path.join(models_dir, x)

# Prepends the date to a string (e.g. to save dated files)
# dateplus("cool-figure.png") -> "2018-12-05-cool-figure.png"
now = datetime.datetime.now().strftime("%Y-%m-%d")
dateplus = lambda x: "%s-%s" % (now, x)

In [1]:
selected_features = ['funding_rounds',
 'founded_month',
 'founded_quarter',
 'founded_year',
 'country_esp',
 'country_ind',
 'country_other',
 'country_usa',
 'days_to_fund',
 'months_to_fund',
 'days_between_rounds',
 'months_between_rounds',
 'round_type_debt_financing',
 'round_type_post_ipo_debt',
 'round_type_post_ipo_equity',
 'round_type_private_equity',
 'round_type_venture',
 'unique_investors',
 'median_investor_value',
 'no_acquisitions',
 'no_ipos',
 'market_type__biotechnology',
 'market_type__clean technology',
 'market_type__enterprise software',
 'market_type__finance',
 'market_type__health and wellness',
 'market_type__hospitality',
 'market_type__internet',
 'market_type__mobile',
 'market_type__other']