In [4]:
import csv
import numpy as np

In [5]:
def intersect(a, b):
    r = [val for val in a if val in b]
    return r

def revenue_transform(range):
    """Return a revenue that is in the middle of the specified range as an integer value"""
    if range == 'Less than $500K':
        return 250000
    elif range == '$500K - $1M':
        return 750000
    elif range == '$1M - $5M':
        return 3500000
    elif range == '$5M - $10M':
        return 7500000
    elif range == '$10M - $25M':
        return 17500000
    elif range == '$25M - $50M':
        return 37500000
    elif range == '$50M - $100M':
        return 75000000
    elif range == '$100M - $250M':
        return 174000000
    elif range == '$250M - $500M':
        return 375000000
    elif range == '$500M - $1B':
        return 750000000
    elif range == '$1B - $5B':
        return 3750000000
    elif range == 'Greater than $5B':
        return 7500000000
    else:
        return 0

In [6]:
class Company: 
    
    def __init__(self, valuation, name, growth_score, mindshare_score, employee_count, employee_range, uniques, months_since_last_funding, founded, stage, investors, total_funding, last_funding_amount, location, revenue_range, business_model, industries):
        self.valuation = valuation
        self.name = name
        self.growth_score = growth_score 
        self.mindshare_score = mindshare_score 
        self.employee_count = employee_count 
        self.employee_range = employee_range 
        self.uniques = uniques
        self.months_since_last_funding = months_since_last_funding
        self.founded = founded
        self.stage = stage 
        self.investors = investors
        self.num_investors = len(investors) 
        self.total_funding = total_funding 
        self.last_funding_amount = last_funding_amount 
        self.location = location 
        self.revenue_range = revenue_range 
        self.business_model = business_model
        self.industries = industries
        
    def compare_to(self, company):
        """Return a value, where the higher the value, the more similar the two companies."""
        comparison = 0
        if abs(self.growth_score-company.growth_score) < 200:
            comparison += 1
        if abs(self.mindshare_score-company.mindshare_score) < 200:
            comparison += 1
        if abs(self.employee_range == self.employee_range):
            comparison += 1
        if abs(self.uniques-company.uniques) < 200:
            comparison += 5
        if self.months_since_last_funding == company.months_since_last_funding:
            comparison += 1
        if self.founded == company.founded:
            comparison += 1
        if self.stage == company.stage:
            comparison += 10
        if self.num_investors == company.num_investors:
            comparison += 1
        if abs(self.total_funding-company.total_funding) < 50000000:
            comparison += 1
        if self.location == company.location:
            comparison += 1
        if self.revenue_range == company.revenue_range:
            comparison += 10
        if self.business_model == company.business_model:
            comparison += 1
        comparison += len(intersect(self.industries, company.industries))
        return comparison
    
    def get_valuation(self):
        return self.valuation

    def generate_valuation(self, company):
        """Return a weighted average valuation estimate based on several different ratios"""
        num_estimates = 0
        total = 0
        #estimate based on revenue range
        if revenue_transform(self.revenue_range) != 0 and revenue_transform(company.revenue_range) != 0:
            num_estimates += 5 #weighting this value 5 times as much as the others, since it is the most important ratio
            revenue_v = (self.valuation/revenue_transform(self.revenue_range)) * revenue_transform(company.revenue_range)
            revenue_v *= 5
            total += revenue_v
        #estimate based on number of employees
        if self.employee_count != 0 and company.employee_count != 0:
            num_estimates += 2
            revenue_e = (self.valuation/self.employee_count) * company.employee_count
            revenue_e *= 2
            total += revenue_e
        #estimate based on growth score
        if self.growth_score != 0 and company.growth_score != 0:
            num_estimates += 1
            revenue_g = (self.valuation/self.growth_score) * company.growth_score
            total += revenue_g
        #estimate based on uniques
        if self.uniques != 0 and company.uniques != 0:
            num_estimates += 1
            revenue_u = (self.valuation/self.uniques) * company.uniques
            total += revenue_u
        #estimate based on total funding
        if self.total_funding != 0 and company.total_funding != 0:
            num_estimates += 1
            revenue_f = (self.valuation/self.total_funding) * company.total_funding
            total += revenue_f
        if num_estimates > 0:
            return round(total/num_estimates)
        return 0

In [7]:
f = open('InternData.csv', encoding="ISO-8859-1")

def get_arguments(row):
    """Return a list that parses a row of the csv, to contain the necessary arguments to create a company object"""
    
    if row[0] != '':
        valuation = int(row[0])
    else:
        valuation = 0
        
    name = row[1]
    
    if row[4] != '':
        growth_score = int(row[4])
    else:
        growth_score = 0
        
    if row[5] != '':
        mindshare_score = int(row[5])
    else:
        mindshare_score = 0
    
    if row[6] != '':
        employee_num = int(row[6])
    else: 
        employee_num = 0
        
    employee_range = 0
    if employee_num < 50:
        employee_range = 1
    elif employee_num < 200:
        employee_range = 2
    elif employee_num < 500:
        employee_range = 3
    else:
        employee_range = 4
        
    if row[7] != '':
        uniques = int(row[7])
    else:
        uniques = 0
        
    if row[8] != '':
        months_since_last_funding = int(row[8])
    else:
        months_since_last_funding = 0
    
    if row[9] != '':
        founded = int(row[9])
    else:
        founded = 0
    
    stage = row[10]
    investors = row[11].split('|')
    
    
    if row[12] != '':
        total_funding = int(row[12])
    else:
        total_funding = 0
    
    if row[14] != '':
        last_funding_amount = int(row[14])
    else:
        last_funding_amount = 0
        
    location = row[15]
    revenue_range = row[18]
    business_model = row[19]
    industries = row[20].split('|') 
    arguments = [valuation, name, growth_score, mindshare_score, employee_num, employee_range, uniques, months_since_last_funding, founded, stage, investors, total_funding, last_funding_amount, location, revenue_range, business_model, industries]
    return arguments
    
intern_data = csv.reader(f)
existing_valuations = {} #A dictionary where the key is the name, and the value is the company object
predicted_valuations = {}

#Loading the company data into company objects
row_num = 0
for row in intern_data:
    if row_num > 0 and row_num < 20:
        existing_valuations[row[1]] = Company(*get_arguments(row))
    elif row_num > 0:
        predicted_valuations[row[1]] = Company(*get_arguments(row))
    row_num += 1

In [8]:
#Generate company valuations based on the company that is most comparable
for key in predicted_valuations:
    
    compare_max = 0
    chosen_company = ''
    
    #finding the most comparable company based on the compare_to method
    for company in existing_valuations:
        comparison = existing_valuations[company].compare_to(predicted_valuations[key])
        if comparison >= compare_max:
            compare_max = comparison
            chosen_company = company
    
    predicted_valuations[key].valuation = existing_valuations[chosen_company].generate_valuation(predicted_valuations[key])

In [9]:
f = open('InternData.csv', encoding="ISO-8859-1")
intern_data = csv.reader(f)

with open('InternDataUpdated.csv', 'w') as csvfile:
    valuations = csv.writer(csvfile)
    num_row = 0
    for row in intern_data:
        if num_row < 20:
            valuations.writerow(row)
        else:
            row[0] = str(predicted_valuations[row[1]].get_valuation())
            valuations.writerow(row)
        num_row += 1

In [10]:
def get_valuation(company_name):
    """Takes in a string of a company name, and returns its valuation in MM"""
    if company_name in existing_valuations:
        return '$' + str(existing_valuations[company_name].get_valuation()) + 'MM'
    elif company_name in predicted_valuations:
        return '$' + str(predicted_valuations[company_name].get_valuation()) + 'MM'
    else:
        return None

In [11]:
#example
get_valuation('Ripple')

'$13936MM'