## JOIN

In [1]:
# Import Libraries
#초기 설정및 시스템 라이브러리
import platform
import warnings

# 데이터 시각화 라이브러리
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta
print(platform.system())
warnings.filterwarnings('ignore')

# 행,열,결과값 생략 없이 보기,세팅
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', None)
%matplotlib inline

# 시각화 OS별 한글폰트 설정
if platform.system() == 'Windows':
    plt.rcParams['font.family'] = 'Malgun Gothic'  # Windows 폰트 설정
elif platform.system() == 'Mac':
    plt.rcParams['font.family'] = 'AppleGothic'  # Mac 폰트 설정
    
print("="*60)
print("라이브러리 로드 완료!")
print("한글 폰트 설정 완료!")
print("="*60)

Windows
라이브러리 로드 완료!
한글 폰트 설정 완료!


In [2]:
# Load Dataset
import pandas as pd

df_obj   = pd.read_csv("./data/clean/clean_objects_final.csv")         # objects
df_fr    = pd.read_csv("./data/clean/clean_fr_final.csv")              # funding_rounds
df_rel   = pd.read_csv("./data/clean/clean_relationships_final.csv")   # relationships
df_off   = pd.read_csv("./data/clean/clean_offices_final.csv")         # offices
df_mile  = pd.read_csv("./data/clean/clean_milestones_final.csv")      # milestones
df_acq   = pd.read_csv("./data/clean/clean_acquisitions_final.csv")    # acquisitions
df_ipos  = pd.read_csv("./data/clean/clean_ipos_final.csv")            # ipos
df_people= pd.read_csv("./data/clean/clean_people_final.csv")          # people

In [3]:
display(df_rel["title"].value_counts())

title
CEO                                                                                 20444
Founder                                                                             14389
Co-Founder                                                                           9851
Board Member                                                                         8583
CTO                                                                                  7875
                                                                                    ...  
Director of IP and Licensing - Broadband Business (General Instrument)                  1
Senior Design and Project Engineer                                                      1
Director, Strategy & Business Development, Mobile Communications Innovation Team        1
Vice President of Sales - NextWave Wireless                                             1
Senior Director Strategic Alliances                                                     1
Name

In [4]:
print(df_obj.shape)
print(df_fr.shape)
print(df_off.shape)
print(df_mile.shape)
print(df_mile.shape)
print(df_acq.shape)
print(df_ipos.shape)
print(df_people.shape)

(462620, 45)
(52928, 17)
(112718, 11)
(39456, 5)
(39456, 5)
(9562, 11)
(1259, 16)
(226709, 6)


### 스타트업 성장률(산업)
* objects ↔ funding_rounds

In [5]:
# objects + funding_rounds
obj_fr = df_obj.merge(
    df_fr,
    left_on="objects_cfpr_id",
    right_on="fr_c_id",
    how="left"
)

# 산업별 라운드 수만 먼저 보기
rounds_by_industry = (
    obj_fr
    .groupby("category_code")["funding_round_id"]
    .nunique()
    .reset_index(name="n_rounds")
)

In [6]:
# 내용 확인
print(obj_fr.columns)
display(obj_fr.head())

Index(['objects_cfpr_id', 'entity_type', 'parent_c_id', 'normalized_name',
       'category_code', 'status', 'founded_at', 'closed_at', 'description',
       'overview', 'tag_list', 'country_code', 'state_code', 'city', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_category_filled',
       'is_obj_funding_total_usd_private', 'is_obj_funding_rounds_private',
     

Unnamed: 0,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,last_funding_at,funding_rounds,funding_total_usd,first_milestone_at,last_milestone_at,...,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,funding_round_id,fr_c_id,funded_at,funding_round_type,funding_round_code,raised_amount_usd,pre_money_valuation_usd,post_money_valuation_usd,participants,is_first_round,is_last_round,funded_year,funded_quarter,cat_fr_type,num_fr_type,log_participants,is_fr_raised_private
0,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,Software & Technology,seattle,seattle,WA,seattle,web,0,0,888.0,c:1,2005-10-01,series-a,a,5250000.0,,,2.0,0.0,1.0,2005.0,2005Q4,series-a,1.0,1.098612,0.0
1,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,Software & Technology,seattle,seattle,WA,seattle,web,0,0,889.0,c:1,2007-01-01,series-b,b,9500000.0,,,3.0,0.0,0.0,2007.0,2007Q1,series-b,2.0,1.386294,0.0
2,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,Software & Technology,seattle,seattle,WA,seattle,web,0,0,2312.0,c:1,2008-05-19,series-c+,c,25000000.0,,,4.0,1.0,0.0,2008.0,2008Q2,series-c+,3.0,1.609438,0.0
3,c:10,Company,,flektor,games_video,acquired,,,,"Flektor is a rich-media mash-up platform that enables consumers to create, remix and share photos and videos on the internet without the need for advanced video-editing skills or software.\n\nFox Interactive Media, a division of News Corporation, announced that it had completed the purchase of Flektor on May 30, 2007. The estimated puchase price is $15-20 million.","flektor, photo, video",USA,CA,Culver City,Los Angeles,,,0,0,,,,,,,...,Software & Technology,los angeles,los angeles,CA,culver city,games_video,1,1,,,,,,,,,,,,,,,,,
4,c:100,Company,,there,games_video,acquired,,,,"There.com is an online virtual world where anyone can explore, meet friends and play games. It was founded in 1998 by Will Harvey, a Stanford computer science Ph.D. and game developer, and Jeffrey Ventrella, an expert on artificial life from MIT's Media Lab. The duo raised approximately $37 million - including $20 million from employees, $11 million from angel investors and $6 million from Sutter Hill Ventures. In 2005 the company was spun off under Makena Technologies, and in March 2010 There closed to the public. In May 2011, There announced it would reopen as a 18+ Cloud-based service. As of Nov 2013, There is open.\n\nThere.com is a subscription service with a monthly fee of $10.00. Additional in-game accessories can be purchased for separate fees.\n\nOther online virtual worlds include [Kaneva](http://www.crunchbase.com/company/kaneva), [Second Life](http://www.crunchbase.com/company/secondlife) and [Cyworld](http://www.crunchbase.com/company/cyworld).","virtualworld, there, teens",USA,CA,San Mateo,SF Bay,,,0,0,,,,,2003-02-01,2011-09-23,...,Software & Technology,sf bay area,sf bay area,CA,san mateo,games_video,1,1,,,,,,,,,,,,,,,,,


In [7]:
print(df_obj.shape)
print(df_fr.shape)
print("="*60)
print(obj_fr.shape) #2만행

(462620, 45)
(52928, 17)
(483539, 62)


In [8]:
obj_fr.to_csv("./data/join/growth_industry_final.csv", encoding="utf-8", index=False)
print("="*60)
print("성장률(산업) growth_industry csv 추출 완료!")
print("="*60)

성장률(산업) growth_industry csv 추출 완료!


### 스타트업 성장률(규모)
* objects ↔ relationships ↔ funding_rounds

In [9]:
# objects + relationships
obj_rel = df_obj.merge(
    df_rel,
    left_on="objects_cfpr_id",
    right_on="rel_cf_id",
    how="left"
)

# 거기에 funding_rounds 붙이기
obj_rel_fr = obj_rel.merge(
    df_fr,
    left_on="objects_cfpr_id", 
    right_on="fr_c_id",
    how="left"
)

# 회사별 인원 수 + 라운드 수
company_size_rounds2 = (
    obj_rel_fr
    .groupby("objects_cfpr_id")
    .agg(
        n_people=("rel_p_id", "nunique"), # n_people 구간(소/중/대) 나눠서 성장률 비교 → “어떤 규모의 스타트업이 더 성장 잘하는지” 볼 수 있을 것!
        n_rounds=("funding_round_id", "nunique")
    )
    .reset_index()
)

In [10]:
# 내용 확인
print(obj_rel_fr.columns)
display(obj_rel_fr.head())

Index(['objects_cfpr_id', 'entity_type', 'parent_c_id', 'normalized_name',
       'category_code', 'status', 'founded_at', 'closed_at', 'description',
       'overview', 'tag_list', 'country_code', 'state_code', 'city', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_category_filled',
       'is_obj_funding_total_usd_private', 'is_obj_funding_rounds_private',
     

Unnamed: 0,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,last_funding_at,funding_rounds,funding_total_usd,first_milestone_at,last_milestone_at,...,rel_cf_id,start_at,end_at,is_past,sequence,title,has_end_date,cat_rel_title,funding_round_id,fr_c_id,funded_at,funding_round_type,funding_round_code,raised_amount_usd,pre_money_valuation_usd,post_money_valuation_usd,participants,is_first_round,is_last_round,funded_year,funded_quarter,cat_fr_type,num_fr_type,log_participants,is_fr_raised_private
0,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,0.0,8.0,Co-Founder/CEO/Board of Directors,0.0,Founder,888.0,c:1,2005-10-01,series-a,a,5250000.0,,,2.0,0.0,1.0,2005.0,2005Q4,series-a,1.0,1.098612,0.0
1,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,0.0,8.0,Co-Founder/CEO/Board of Directors,0.0,Founder,889.0,c:1,2007-01-01,series-b,b,9500000.0,,,3.0,0.0,0.0,2007.0,2007Q1,series-b,2.0,1.386294,0.0
2,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,0.0,8.0,Co-Founder/CEO/Board of Directors,0.0,Founder,2312.0,c:1,2008-05-19,series-c+,c,25000000.0,,,4.0,1.0,0.0,2008.0,2008Q2,series-c+,3.0,1.609438,0.0
3,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,1.0,279242.0,VP Marketing,0.0,VP,888.0,c:1,2005-10-01,series-a,a,5250000.0,,,2.0,0.0,1.0,2005.0,2005Q4,series-a,1.0,1.098612,0.0
4,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,1.0,279242.0,VP Marketing,0.0,VP,889.0,c:1,2007-01-01,series-b,b,9500000.0,,,3.0,0.0,0.0,2007.0,2007Q1,series-b,2.0,1.386294,0.0


In [11]:
print(df_obj.shape)
print(df_rel.shape)
print(df_fr.shape)
print("="*60)
print(obj_rel.shape)
print(obj_rel_fr.shape) #44만행

(462620, 45)
(402412, 10)
(52928, 17)
(726575, 55)
(909183, 72)


In [12]:
obj_rel_fr.to_csv("./data/join/growth_size_final.csv", encoding="utf-8", index=False)
print("="*60)
print("성장률(규모) growth_size csv 추출 완료!")
print("="*60)

성장률(규모) growth_size csv 추출 완료!


### 스타트업 성장률(사무실 위치)
* offices ↔ objects ↔ funding_rounds

In [13]:
# offices + objects
off_obj = df_off.merge(
    df_obj,
    left_on="offices_c_id",
    right_on="objects_cfpr_id",
    how="left"
)

# funding_rounds까지 연결
off_obj_fr = off_obj.merge(
    df_fr,
    left_on="offices_c_id",
    right_on="fr_c_id",
    how="left"
)

# 나라별 라운드 수
rounds_by_country = (
    off_obj_fr
    .groupby("country_code_x")["funding_round_id"]
    .nunique()
    .reset_index(name="n_rounds")
)

In [14]:
# 내용 확인
print(off_obj_fr.columns)
display(off_obj_fr.head())

Index(['offices_c_id', 'office_id', 'description_x', 'city_x', 'state_code_x',
       'country_code_x', 'latitude', 'longitude', 'offices_description_fixed',
       'cat_offices_description', 'offices_state_filled', 'objects_cfpr_id',
       'entity_type', 'parent_c_id', 'normalized_name', 'category_code',
       'status', 'founded_at', 'closed_at', 'description_y', 'overview',
       'tag_list', 'country_code_y', 'state_code_y', 'city_y', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overvie

Unnamed: 0,offices_c_id,office_id,description_x,city_x,state_code_x,country_code_x,latitude,longitude,offices_description_fixed,cat_offices_description,offices_state_filled,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description_y,overview,tag_list,country_code_y,state_code_y,city_y,...,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,funding_round_id,fr_c_id,funded_at,funding_round_type,funding_round_code,raised_amount_usd,pre_money_valuation_usd,post_money_valuation_usd,participants,is_first_round,is_last_round,funded_year,funded_quarter,cat_fr_type,num_fr_type,log_participants,is_fr_raised_private
0,c:1,1,,Seattle,WA,USA,47.603122,-122.333253,no office info,No Office Info,WA,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,...,Software & Technology,seattle,seattle,WA,seattle,web,0,0,888.0,c:1,2005-10-01,series-a,a,5250000.0,,,2.0,0.0,1.0,2005.0,2005Q4,series-a,1.0,1.098612,0.0
1,c:1,1,,Seattle,WA,USA,47.603122,-122.333253,no office info,No Office Info,WA,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,...,Software & Technology,seattle,seattle,WA,seattle,web,0,0,889.0,c:1,2007-01-01,series-b,b,9500000.0,,,3.0,0.0,0.0,2007.0,2007Q1,series-b,2.0,1.386294,0.0
2,c:1,1,,Seattle,WA,USA,47.603122,-122.333253,no office info,No Office Info,WA,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,...,Software & Technology,seattle,seattle,WA,seattle,web,0,0,2312.0,c:1,2008-05-19,series-c+,c,25000000.0,,,4.0,1.0,0.0,2008.0,2008Q2,series-c+,3.0,1.609438,0.0
3,c:3,3,Headquarters,Pleasanton,CA,USA,37.692934,-121.904945,headquarter,Headquarters,CA,c:3,Company,,zoho,software,operating,2005-09-15,,Online Business Apps Suite,"Zoho offers a suite of Business, Collaboration & Productivity applications. Apps include CRM, Customer Support, Office Suite, Email Hosting, Project Management, Accounting, App Creator among many others. \n\nZoho's parent company is Zoho Corp (previously AdventNet), ""a software company started in 1996 focusing on building quality and affordable software for businesses."" \n\nZoho's main competitors are [Google](/company/google) Docs & Spreadsheets, Salesforce as well as [Microsoft](/company/microsoft) Office.","zoho, officesuite, spreadsheet, writer, projects, sheet, crm, show, creator, wiki, planner, suite, notebook, chat, meeting, mail",USA,CA,Pleasanton,...,Software & Technology,sf bay area,sf bay area,CA,pleasanton,software,0,0,,,,,,,,,,,,,,,,,
4,c:4,4,,San Francisco,CA,USA,37.764726,-122.394523,no office info,No Office Info,CA,c:4,Company,,digg,news,acquired,2004-10-11,,user driven social content website,"Digg is a user driven social content website. Everything on Digg is user-submitted. After you submit content, other people read your submission and ""Digg"" what they like best. If your story receives enough Diggs, it's promoted to the front page for other visitors to see.\n\n[Kevin Rose](http://www.crunchbase.com/person/kevin-rose) came up with the idea for Digg in the fall of 2004. He found programmer [Owen Byrne](http://www.crunchbase.com/person/owen-byrne) through [eLance](http://www.crunchbase.com/company/elance) and paid him $10/hour to develop the idea. In addition, Rose paid $99 per month for hosting and $1,200 for the Digg.com domain. In December of 2004, Kevin launched his creation to the world through a post on his blog.\n\nIn February of 2005, Paris Hilton's cell phone was hacked. Images and phone numbers from the phone were posted online and it didn't take long for a user to post the link on Digg. The rapidshare downloads site started to receive an enormous amount of traffic and it was then, Rose says, he saw ""the power of breaking stories before anyone else.""\n\nDigg has been a force ever since. Acquisition offers have been made, Rose was on the cover of [BusinessWeek](/company/businessweek) and according to [Alexa](/company/alexa), Digg is in the top 100 most trafficked sites on the internet. The success hasn't come without its share of problems though. The site has had to face [services aimed at gaming](http://www.techcrunch.com/2007/04/02/subvert-and-profit-next-service-to-try-gaming-digg/) the way stories hit the front page, as well as a [user revolt](http://www.techcrunch.com/2007/05/01/digg-surrenders-to-mob/). Digg has however been able to get over these hurdles as it continues to be one of the social news leaders.","community, social, news, bookmark, digg, technology, design",USA,CA,San Francisco,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,news,0,0,1.0,c:4,2006-12-01,series-b,b,8500000.0,,,2.0,0.0,0.0,2006.0,2006Q4,series-b,2.0,1.098612,0.0


In [15]:
print(df_off.shape)
print(df_obj.shape)
print(df_fr.shape)
print("="*60)
print(off_obj.shape)
print(off_obj_fr.shape) #2만행

(112718, 11)
(462620, 45)
(52928, 17)
(112718, 56)
(139191, 73)


In [16]:
off_obj_fr.to_csv("./data/join/growth_addr_final.csv", encoding="utf-8", index=False)
print("="*60)
print("성장률(위치) growth_addr csv 추출 완료!")
print("="*60)

성장률(위치) growth_addr csv 추출 완료!


### 스타트업 성장률(이슈/마일스톤)
* milestones ↔ objects ↔ funding_rounds

In [17]:
# milestones + objects
mile_obj = df_mile.merge(
    df_obj,
    left_on="mile_cfpr_id",
    right_on="objects_cfpr_id",
    how="left"
)

# funding_rounds까지 연결
mile_obj_fr = mile_obj.merge(
    df_fr,
    left_on="mile_cfpr_id",
    right_on="fr_c_id",
    how="left"
)

# 이슈 타입별(카테고리) 라운드 수
rounds_by_issue = (
    mile_obj_fr
    .groupby("cat_mile_description")["funding_round_id"]
    .nunique()
    .reset_index(name="n_rounds")
)

In [18]:
# 내용 확인
print(mile_obj_fr.columns)
display(mile_obj_fr.head())

Index(['Unnamed: 0', 'mile_cfpr_id', 'milestone_at', 'description_x',
       'cat_mile_description', 'objects_cfpr_id', 'entity_type', 'parent_c_id',
       'normalized_name', 'category_code', 'status', 'founded_at', 'closed_at',
       'description_y', 'overview', 'tag_list', 'country_code', 'state_code',
       'city', 'region', 'first_investment_at', 'last_investment_at',
       'investment_rounds', 'invested_companies', 'first_funding_at',
       'last_funding_at', 'funding_rounds', 'funding_total_usd',
       'first_milestone_at', 'last_milestone_at', 'milestones',
       'relationships', 'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_cat

Unnamed: 0.1,Unnamed: 0,mile_cfpr_id,milestone_at,description_x,cat_mile_description,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description_y,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,...,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,funding_round_id,fr_c_id,funded_at,funding_round_type,funding_round_code,raised_amount_usd,pre_money_valuation_usd,post_money_valuation_usd,participants,is_first_round,is_last_round,funded_year,funded_quarter,cat_fr_type,num_fr_type,log_participants,is_fr_raised_private
0,0,c:12,2008-06-09,Survives iPhone 3G Stevenote,survives,c:12,Company,,twitter,social,ipo,2006-03-21,,Real time communication platform,"Created in 2006, Twitter is a global real-time communications platform with 400 million monthly visitors to twitter.com, more than 200 million monthly active users around the world.\n\nWe see a billion tweets every 2.5 days on every conceivable topic. World leaders, major athletes, star performers, news organizations and entertainment outlets are among the millions of active Twitter accounts through which users can truly get the pulse of the planet.","text, messaging, social, community, twitter, tweet, twttr, microblog, sms",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2007-07-01,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,social,0.0,0.0,261.0,c:12,2007-07-01,series-a,a,5000000.0,20000000.0,,10.0,0.0,1.0,2007.0,2007Q3,series-a,1.0,2.397895,0.0
1,0,c:12,2008-06-09,Survives iPhone 3G Stevenote,survives,c:12,Company,,twitter,social,ipo,2006-03-21,,Real time communication platform,"Created in 2006, Twitter is a global real-time communications platform with 400 million monthly visitors to twitter.com, more than 200 million monthly active users around the world.\n\nWe see a billion tweets every 2.5 days on every conceivable topic. World leaders, major athletes, star performers, news organizations and entertainment outlets are among the millions of active Twitter accounts through which users can truly get the pulse of the planet.","text, messaging, social, community, twitter, tweet, twttr, microblog, sms",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2007-07-01,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,social,0.0,0.0,2106.0,c:12,2008-05-01,series-b,b,15000000.0,,,6.0,0.0,0.0,2008.0,2008Q2,series-b,2.0,1.94591,0.0
2,0,c:12,2008-06-09,Survives iPhone 3G Stevenote,survives,c:12,Company,,twitter,social,ipo,2006-03-21,,Real time communication platform,"Created in 2006, Twitter is a global real-time communications platform with 400 million monthly visitors to twitter.com, more than 200 million monthly active users around the world.\n\nWe see a billion tweets every 2.5 days on every conceivable topic. World leaders, major athletes, star performers, news organizations and entertainment outlets are among the millions of active Twitter accounts through which users can truly get the pulse of the planet.","text, messaging, social, community, twitter, tweet, twttr, microblog, sms",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2007-07-01,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,social,0.0,0.0,5082.0,c:12,2009-02-13,series-c+,c,35000000.0,,,6.0,0.0,0.0,2009.0,2009Q1,series-c+,3.0,1.94591,0.0
3,0,c:12,2008-06-09,Survives iPhone 3G Stevenote,survives,c:12,Company,,twitter,social,ipo,2006-03-21,,Real time communication platform,"Created in 2006, Twitter is a global real-time communications platform with 400 million monthly visitors to twitter.com, more than 200 million monthly active users around the world.\n\nWe see a billion tweets every 2.5 days on every conceivable topic. World leaders, major athletes, star performers, news organizations and entertainment outlets are among the millions of active Twitter accounts through which users can truly get the pulse of the planet.","text, messaging, social, community, twitter, tweet, twttr, microblog, sms",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2007-07-01,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,social,0.0,0.0,8775.0,c:12,2009-09-25,series-c+,d,100000000.0,,,6.0,0.0,0.0,2009.0,2009Q3,series-c+,3.0,1.94591,0.0
4,0,c:12,2008-06-09,Survives iPhone 3G Stevenote,survives,c:12,Company,,twitter,social,ipo,2006-03-21,,Real time communication platform,"Created in 2006, Twitter is a global real-time communications platform with 400 million monthly visitors to twitter.com, more than 200 million monthly active users around the world.\n\nWe see a billion tweets every 2.5 days on every conceivable topic. World leaders, major athletes, star performers, news organizations and entertainment outlets are among the millions of active Twitter accounts through which users can truly get the pulse of the planet.","text, messaging, social, community, twitter, tweet, twttr, microblog, sms",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2007-07-01,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,social,0.0,0.0,11042.0,c:12,2010-01-08,series-c+,e,5166511.0,,,,0.0,0.0,2010.0,2010Q1,series-c+,3.0,,0.0


In [19]:
print(df_mile.shape)
print(df_obj.shape)
print(df_fr.shape)
print("="*60)
print(mile_obj.shape) 
print(mile_obj_fr.shape) #2만행

(39456, 5)
(462620, 45)
(52928, 17)
(39456, 50)
(58605, 67)


In [20]:
mile_obj_fr.to_csv("./data/join/growth_issues_final.csv", encoding="utf-8", index=False)
print("="*60)
print("성장률(규모) growth_issues csv 추출 완료!")
print("="*60)

성장률(규모) growth_issues csv 추출 완료!


### 스타트업 M&A(산업)
* objects ↔ acquisitions
* 인수당한 회사 기준 `acquired_object_id`

In [21]:
# 회사 풀 유지하면서, M&A 여부만 붙이기→ objects 기준으로 LEFT JOIN
obj_acq = df_obj.merge(
    df_acq,
    left_on="objects_cfpr_id",
    right_on="acquired_c_id",
    how="left"
)

# M&A 여부 플래그
obj_acq["is_acquired"] = obj_acq["acquisition_id"].notna().astype(int)

# 산업별 M&A 비율
mna_rate_by_industry = (
    obj_acq
    .groupby("category_code")["is_acquired"]
    .mean()
    .reset_index(name="mna_rate")
)

In [22]:
# 내용 확인
print(obj_acq.columns)
display(obj_acq.head())

Index(['objects_cfpr_id', 'entity_type', 'parent_c_id', 'normalized_name',
       'category_code', 'status', 'founded_at', 'closed_at', 'description',
       'overview', 'tag_list', 'country_code', 'state_code', 'city', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_category_filled',
       'is_obj_funding_total_usd_private', 'is_obj_funding_rounds_private',
     

Unnamed: 0,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,last_funding_at,funding_rounds,funding_total_usd,first_milestone_at,last_milestone_at,...,is_obj_state_missing,is_obj_inv_rounds_missing,is_obj_inv_comp_missing,cat_obj_status,obj_overview_fixed,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,acquisition_id,acquiring_c_id,acquired_c_id,term_code,price_amount,price_currency_code,acquired_at,is_acq_price_private,is_acquisitions_acq_at_missing,price_amount_usd,acqusition_currency_rate,is_acquired
0,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,0,0,0,operating,"wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. wetpaints own online property, wetpaint entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million facebook fans, is a proof point to the companys success in building and engaging audiences. media companies can license wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. founded by internet pioneer ben elowitz, and with offices in new york and seattle, wetpaint is backed by accel partners, the investors behind facebook",Software & Technology,seattle,seattle,WA,seattle,web,0,0,,,,,,,,,,,,0
1,c:10,Company,,flektor,games_video,acquired,,,,"Flektor is a rich-media mash-up platform that enables consumers to create, remix and share photos and videos on the internet without the need for advanced video-editing skills or software.\n\nFox Interactive Media, a division of News Corporation, announced that it had completed the purchase of Flektor on May 30, 2007. The estimated puchase price is $15-20 million.","flektor, photo, video",USA,CA,Culver City,Los Angeles,,,0,0,,,,,,,...,0,0,0,acquired,"flektor is a rich-media mash-up platform that enables consumers to create, remix and share photos and videos on the internet without the need for advanced video-editing skills or software. fox interactive media, a division of news corporation, announced that it had completed the purchase of flektor on may 30, 2007. the estimated puchase price is $15-20 million",Software & Technology,los angeles,los angeles,CA,culver city,games_video,1,1,1.0,c:11,c:10,,20000000.0,USD,2007-05-30,0.0,0.0,20000000.0,1.0,1
2,c:100,Company,,there,games_video,acquired,,,,"There.com is an online virtual world where anyone can explore, meet friends and play games. It was founded in 1998 by Will Harvey, a Stanford computer science Ph.D. and game developer, and Jeffrey Ventrella, an expert on artificial life from MIT's Media Lab. The duo raised approximately $37 million - including $20 million from employees, $11 million from angel investors and $6 million from Sutter Hill Ventures. In 2005 the company was spun off under Makena Technologies, and in March 2010 There closed to the public. In May 2011, There announced it would reopen as a 18+ Cloud-based service. As of Nov 2013, There is open.\n\nThere.com is a subscription service with a monthly fee of $10.00. Additional in-game accessories can be purchased for separate fees.\n\nOther online virtual worlds include [Kaneva](http://www.crunchbase.com/company/kaneva), [Second Life](http://www.crunchbase.com/company/secondlife) and [Cyworld](http://www.crunchbase.com/company/cyworld).","virtualworld, there, teens",USA,CA,San Mateo,SF Bay,,,0,0,,,,,2003-02-01,2011-09-23,...,0,0,0,acquired,"there.com is an online virtual world where anyone can explore, meet friends and play games. it was founded in 1998 by will harvey, a stanford computer science ph.d. and game developer, and jeffrey ventrella, an expert on artificial life from mit's media lab. the duo raised approximately $37 million - including $20 million from employees, $11 million from angel investors and $6 million from sutter hill ventures. in 2005 the company was spun off under makena technologies, and in march 2010 there closed to the public. in may 2011, there announced it would reopen as a 18+ cloud-based service. as of nov 2013, there is open. there.com is a subscription service with a monthly fee of $10.00. additional in-game accessories can be purchased for separate fees. other online virtual worlds include [kaneva](http://www.crunchbase.com/company/kaneva), [second life](http://www.crunchbase.com/company/secondlife) and [cyworld](http://www.crunchbase.com/company/cyworld)",Software & Technology,sf bay area,sf bay area,CA,san mateo,games_video,1,1,20.0,c:377,c:100,cash,,USD,2005-05-29,1.0,0.0,,1.0,1
3,c:10000,Company,,mywebbo,network_hosting,operating,2008-07-26,,,"BRAND NEW ONLINE SOCIAL NETWORKING WEBSITE,FOR MAKING NEW FRIENDS OR CHATTING TO OLD ONE'S.\n\nPACKED WITH NEW FEATURES SUCH AS RATING PROFILES , RATING MUSIC,VIDEO'S AND PICTURES ,UPLOADING MUSIC ,VIDEO'S PICTURES , CREATING CLASSIFIED ADS ,SHOUTOUT BOX!, AND ONLINE CHAT AREA FOR MAKING NEW FRIENDS OR SIMPLY CHATTING TO YOUR OLD ONE'S ,THERE ARE LOADS OF GREAT FEATURES FOR ANYONE TO TRY .. PLUS MANY MORE TO COME .","social-network, new, website, web, friends, chat, people",,,,unknown,,,0,0,,,0.0,0.0,,,...,1,0,0,operating,"brand new online social networking website,for making new friends or chatting to old one's. packed with new features such as rating profiles , rating music,video's and pictures ,uploading music ,video's pictures , creating classified ads ,shoutout box!, and online chat area for making new friends or simply chatting to your old one's ,there are loads of great features for anyone to try .. plus many more to come",Software & Technology,,,,,network_hosting,0,0,,,,,,,,,,,,0
4,c:10001,Company,,the movie streamer,games_video,operating,2008-07-26,,,"This company shows free movies online on their website which, in fact, is not illegal since they are not the ones hosting the videos.","watch, full-length, moives, online, for, free, streaming, videos, tv-shows",,,,unknown,,,0,0,,,0.0,0.0,,,...,1,0,0,operating,"this company shows free movies online on their website which, in fact, is not illegal since they are not the ones hosting the videos",Software & Technology,,,,,games_video,0,0,,,,,,,,,,,,0


In [23]:
print(df_obj.shape)
print(df_acq.shape)
print("="*60)
print(obj_acq.shape) #1백행

(462620, 45)
(9562, 11)
(462770, 57)


In [24]:
obj_acq.to_csv("./data/join/mna_industry_final.csv", encoding="utf-8", index=False)
print("="*60)
print("M&A(산업) mna_industry csv 추출 완료!")
print("="*60)

M&A(산업) mna_industry csv 추출 완료!


### 스타트업 M&A(규모)
* objects ↔ relationships ↔ acquisitions

In [25]:
# objects + relationships
obj_rel = df_obj.merge(
    df_rel,
    left_on="objects_cfpr_id",
    right_on="rel_cf_id",
    how="left"
)

# acquisitions 붙이기
obj_rel_acq = obj_rel.merge(
    df_acq,
    left_on="objects_cfpr_id",
    right_on="acquired_c_id",
    how="left"
)

obj_rel_acq["is_acquired"] = obj_rel_acq["acquisition_id"].notna().astype(int)

# 회사별 인원수/인수여부 요약
mna_size_feat = (
    obj_rel_acq
    .groupby("objects_cfpr_id")
    .agg(
        n_people=("rel_p_id", "nunique"),
        is_acquired=("is_acquired", "max")   # 한 번이라도 인수되면 1
    )
    .reset_index()
)

In [26]:
# 내용 확인
print(obj_rel_acq.columns)
display(obj_rel_acq.head())

Index(['objects_cfpr_id', 'entity_type', 'parent_c_id', 'normalized_name',
       'category_code', 'status', 'founded_at', 'closed_at', 'description',
       'overview', 'tag_list', 'country_code', 'state_code', 'city', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_category_filled',
       'is_obj_funding_total_usd_private', 'is_obj_funding_rounds_private',
     

Unnamed: 0,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,last_funding_at,funding_rounds,funding_total_usd,first_milestone_at,last_milestone_at,...,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,relationship_id,rel_p_id,rel_cf_id,start_at,end_at,is_past,sequence,title,has_end_date,cat_rel_title,acquisition_id,acquiring_c_id,acquired_c_id,term_code,price_amount,price_currency_code,acquired_at,is_acq_price_private,is_acquisitions_acq_at_missing,price_amount_usd,acqusition_currency_rate,is_acquired
0,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,web,0,0,1.0,p:2,c:1,,,0.0,8.0,Co-Founder/CEO/Board of Directors,0.0,Founder,,,,,,,,,,,,0
1,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,web,0,0,2.0,p:3,c:1,,,1.0,279242.0,VP Marketing,0.0,VP,,,,,,,,,,,,0
2,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,web,0,0,7832.0,p:7235,c:1,,,0.0,9.0,Board,0.0,Board,,,,,,,,,,,,0
3,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,web,0,0,11362.0,p:10130,c:1,,,1.0,9.0,VP User Experience,0.0,VP,,,,,,,,,,,,0
4,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,web,0,0,11363.0,p:10131,c:1,,,0.0,5.0,"SVP, Strategy and Business Development",0.0,VP,,,,,,,,,,,,0


In [27]:
print(df_obj.shape)
print(df_rel.shape)
print(df_acq.shape)
print("="*60)
print(obj_rel.shape)
print(obj_rel_acq.shape) #26만행

(462620, 45)
(402412, 10)
(9562, 11)
(726575, 55)
(728207, 67)


In [28]:
obj_rel_acq.to_csv("./data/join/mna_size_final.csv", encoding="utf-8", index=False)
print("="*60)
print("M&A(규모) mna_size csv 추출 완료!")
print("="*60)

M&A(규모) mna_size csv 추출 완료!


### 스타트업 M&A(위치)
* offices ↔ objects ↔ acquisitions

In [29]:
off_obj_acq = (
    df_off
    .merge(df_obj, left_on="offices_c_id", right_on="objects_cfpr_id", how="left")
    .merge(df_acq, left_on="offices_c_id", right_on="acquired_c_id", how="left")
)

off_obj_acq["is_acquired"] = off_obj_acq["acquisition_id"].notna().astype(int)

mna_rate_by_city = (
    off_obj_acq 
    .groupby("city_x")["is_acquired"]
    .mean()
    .reset_index(name="mna_rate")
)

In [30]:
# 내용 확인
print(off_obj_acq.columns)
display(off_obj_acq.head())

Index(['offices_c_id', 'office_id', 'description_x', 'city_x', 'state_code_x',
       'country_code_x', 'latitude', 'longitude', 'offices_description_fixed',
       'cat_offices_description', 'offices_state_filled', 'objects_cfpr_id',
       'entity_type', 'parent_c_id', 'normalized_name', 'category_code',
       'status', 'founded_at', 'closed_at', 'description_y', 'overview',
       'tag_list', 'country_code_y', 'state_code_y', 'city_y', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overvie

Unnamed: 0,offices_c_id,office_id,description_x,city_x,state_code_x,country_code_x,latitude,longitude,offices_description_fixed,cat_offices_description,offices_state_filled,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description_y,overview,tag_list,country_code_y,state_code_y,city_y,...,is_obj_state_missing,is_obj_inv_rounds_missing,is_obj_inv_comp_missing,cat_obj_status,obj_overview_fixed,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,acquisition_id,acquiring_c_id,acquired_c_id,term_code,price_amount,price_currency_code,acquired_at,is_acq_price_private,is_acquisitions_acq_at_missing,price_amount_usd,acqusition_currency_rate,is_acquired
0,c:1,1,,Seattle,WA,USA,47.603122,-122.333253,no office info,No Office Info,WA,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,...,0,0,0,operating,"wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. wetpaints own online property, wetpaint entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million facebook fans, is a proof point to the companys success in building and engaging audiences. media companies can license wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. founded by internet pioneer ben elowitz, and with offices in new york and seattle, wetpaint is backed by accel partners, the investors behind facebook",Software & Technology,seattle,seattle,WA,seattle,web,0,0,,,,,,,,,,,,0
1,c:3,3,Headquarters,Pleasanton,CA,USA,37.692934,-121.904945,headquarter,Headquarters,CA,c:3,Company,,zoho,software,operating,2005-09-15,,Online Business Apps Suite,"Zoho offers a suite of Business, Collaboration & Productivity applications. Apps include CRM, Customer Support, Office Suite, Email Hosting, Project Management, Accounting, App Creator among many others. \n\nZoho's parent company is Zoho Corp (previously AdventNet), ""a software company started in 1996 focusing on building quality and affordable software for businesses."" \n\nZoho's main competitors are [Google](/company/google) Docs & Spreadsheets, Salesforce as well as [Microsoft](/company/microsoft) Office.","zoho, officesuite, spreadsheet, writer, projects, sheet, crm, show, creator, wiki, planner, suite, notebook, chat, meeting, mail",USA,CA,Pleasanton,...,0,0,0,operating,"zoho offers a suite of business, collaboration & productivity applications. apps include crm, customer support, office suite, email hosting, project management, accounting, app creator among many others. zoho's parent company is zoho corp (previously adventnet), ""a software company started in 1996 focusing on building quality and affordable software for businesses."" zoho's main competitors are [google](/company/google) docs & spreadsheets, salesforce as well as [microsoft](/company/microsoft) office",Software & Technology,sf bay area,sf bay area,CA,pleasanton,software,0,0,,,,,,,,,,,,0
2,c:4,4,,San Francisco,CA,USA,37.764726,-122.394523,no office info,No Office Info,CA,c:4,Company,,digg,news,acquired,2004-10-11,,user driven social content website,"Digg is a user driven social content website. Everything on Digg is user-submitted. After you submit content, other people read your submission and ""Digg"" what they like best. If your story receives enough Diggs, it's promoted to the front page for other visitors to see.\n\n[Kevin Rose](http://www.crunchbase.com/person/kevin-rose) came up with the idea for Digg in the fall of 2004. He found programmer [Owen Byrne](http://www.crunchbase.com/person/owen-byrne) through [eLance](http://www.crunchbase.com/company/elance) and paid him $10/hour to develop the idea. In addition, Rose paid $99 per month for hosting and $1,200 for the Digg.com domain. In December of 2004, Kevin launched his creation to the world through a post on his blog.\n\nIn February of 2005, Paris Hilton's cell phone was hacked. Images and phone numbers from the phone were posted online and it didn't take long for a user to post the link on Digg. The rapidshare downloads site started to receive an enormous amount of traffic and it was then, Rose says, he saw ""the power of breaking stories before anyone else.""\n\nDigg has been a force ever since. Acquisition offers have been made, Rose was on the cover of [BusinessWeek](/company/businessweek) and according to [Alexa](/company/alexa), Digg is in the top 100 most trafficked sites on the internet. The success hasn't come without its share of problems though. The site has had to face [services aimed at gaming](http://www.techcrunch.com/2007/04/02/subvert-and-profit-next-service-to-try-gaming-digg/) the way stories hit the front page, as well as a [user revolt](http://www.techcrunch.com/2007/05/01/digg-surrenders-to-mob/). Digg has however been able to get over these hurdles as it continues to be one of the social news leaders.","community, social, news, bookmark, digg, technology, design",USA,CA,San Francisco,...,0,0,0,acquired,"digg is a user driven social content website. everything on digg is user-submitted. after you submit content, other people read your submission and ""digg"" what they like best. if your story receives enough diggs, it's promoted to the front page for other visitors to see. [kevin rose](http://www.crunchbase.com/person/kevin-rose) came up with the idea for digg in the fall of 2004. he found programmer [owen byrne](http://www.crunchbase.com/person/owen-byrne) through [elance](http://www.crunchbase.com/company/elance) and paid him $10/hour to develop the idea. in addition, rose paid $99 per month for hosting and $1,200 for the digg.com domain. in december of 2004, kevin launched his creation to the world through a post on his blog. in february of 2005, paris hilton's cell phone was hacked. images and phone numbers from the phone were posted online and it didn't take long for a user to post the link on digg. the rapidshare downloads site started to receive an enormous amount of traffic and it was then, rose says, he saw ""the power of breaking stories before anyone else."" digg has been a force ever since. acquisition offers have been made, rose was on the cover of [businessweek](/company/businessweek) and according to [alexa](/company/alexa), digg is in the top 100 most trafficked sites on the internet. the success hasn't come without its share of problems though. the site has had to face [services aimed at gaming](http://www.techcrunch.com/2007/04/02/subvert-and-profit-next-service-to-try-gaming-digg/) the way stories hit the front page, as well as a [user revolt](http://www.techcrunch.com/2007/05/01/digg-surrenders-to-mob/). digg has however been able to get over these hurdles as it continues to be one of the social news leaders",Software & Technology,sf bay area,sf bay area,CA,san francisco,news,0,0,7386.0,c:20953,c:4,,500000.0,USD,2012-07-12,0.0,0.0,500000.0,1.0,1
3,c:5,5,Headquarters,Menlo Park,CA,USA,37.41605,-122.151801,headquarter,Headquarters,CA,c:5,Company,,facebook,social,ipo,2004-02-01,,Social network,"Facebook is the world's largest social network, with over [1.15 billion monthly active users](http://techcrunch.com/2013/07/24/facebook-growth-2/).\n\nFacebook was founded by [Mark Zuckerberg](http://www.crunchbase.com/person/mark-zuckerberg) in February 2004, initially as an exclusive network for Harvard students. It was a huge hit: in 2 weeks, half of the schools in the Boston area began demanding a Facebook network. Zuckerberg immediately recruited his friends [Dustin Moskovitz](http://www.crunchbase.com/person/dustin-moskovitz), [Chris Hughes](/person/chris-hughes), and [Eduardo Saverin](/person/eduardo-saverin) to help build Facebook, and within four months, Facebook added 30 more college networks. \n\nThe original idea for the term Facebook came from Zuckerberg's high school (Phillips Exeter Academy). The Exeter Face Book was passed around to every student as a way for students to get to know their classmates for the following year. It was a physical paper book until Zuckerberg brought it to the internet.\n\nWith this success, Zuckerberg, Moskowitz and Hughes moved out to [Palo Alto](/maps/city/Palo%2520Alto) for the summer and rented a sublet. A few weeks later, Zuckerberg ran into the former cofounder of [Napster](/company/napster), [Sean Parker](/person/sean-parker). Parker soon moved in to Zuckerberg's apartment and they began working together. Parker provided the introduction to their first investor, [Peter Thiel](/person/peter-thiel), cofounder of [PayPal](/company/paypal) and managing partner of [The Founders Fund](http://www.crunchbase.com/financial-organization/founders-fund). Thiel invested $500,000 into Facebook. \n\nWith millions more users, [Friendster](/company/friendster) [attempted](http://www.techcrunch.com/2006/12/12/yahoos-project-fraternity-docs-leaked/) to acquire the company for $10 million in mid 2004. Facebook turned down the offer and subsequently received $12.7 million in funding from [Accel Partners](http://www.crunchbase.com/financial-organization/accel-partners), at a valuation of [around $100 million](http://www.techcrunch.com/2005/09/07/85-of-college-students-use-facebook/). Facebook continued to grow, opening up to high school students in September 2005 and adding an immensely popular photo sharing feature the next month. The next spring, Facebook received $25 million in funding from [Greylock Partners](http://www.crunchbase.com/financial-organization/greylock) and [Meritech Capital](http://www.crunchbase.com/financial-organization/meritech-capital-partners), as well as previous investors Accel Partners and Peter Thiel. The pre-money valuation for this deal was about $525 million. Facebook subsequently [opened](http://www.techcrunch.com/2006/04/26/facebook-goes-beyond-college-high-school-markets/) up to work networks, eventually amassing over 20,000 work networks. Finally in September 2006, Facebook [opened](http://www.techcrunch.com/2006/09/26/facebook-just-launched-open-registrations/) to anyone with an email address. \n \nIn the summer of 2006, [Yahoo](/company/yahoo) [attempted to acquire](http://www.techcrunch.com/2006/09/21/facebook-and-yahoo-in-acquisition-talks-for-1-billion/) the company for $1 billion dollars. [Reports](http://www.wired.com/techbiz/startups/news/2007/09/ff_facebook) actually indicated that Zuckerberg made a verbal agreement to sell Facebook to Yahoo. A few days later when Yahoo's stock price took a dive, the offer was lowered to $800 million and Zuckerberg walked away from the deal. Yahoo later [offered](http://www.techcrunch.com/2006/12/12/yahoos-project-fraternity-docs-leaked/) $1 billion again, this time Zuckerberg turned Yahoo down and earned instant notoriety as the ""kid"" who turned down a billion. This was not the first time Zuckerberg turned down an acquisition offer; [Viacom](/company/viacom) had previously [unsuccessfully](http://www.techcrunch.com/2006/03/28/facebook-is-doing-the-skype-dance/) attempted to acquire the company for $750 million in March 2006. \n\nNot long after, in October 2007, [Microsoft invested](http://www.techcrunch.com/2007/10/24/facebook-takes-the-microsoft-money-and-runs/) $240 million into Facebook for 1.6 percent of the company in. This meant a valuation of over $15 billion, making Facebook the [5th most valuable US Internet company](http://www.techcrunch.com/2007/10/25/perspective-facebook-is-now-5th-most-valuable-us-internet-company/), yet with only $150 million in annual revenue. Many explained Microsoft's decision as being solely driven by the desire to outbid Google.\n\nFacebook continued to receive funding, most notably in January 2011 receiving a [$1.5 billion round](http://techcrunch.com/2011/01/21/facebook-raises-1-5-billion-at-50-billion-valuation/), valuing the company at $50 billion. A year later, in February 2012, Facebook [announced](http://techcrunch.com/2012/02/01/facebook-files-for-5-billion-ipo/) that it was filing for its long-anticipated initial public offering. The company went public on May 18, 2012, opening on the NASDAQ with shares trading at [$42.05](http://techcrunch.com/2012/05/18/facebook-share-open-10-5-higher-at-42/).\n\nOne sour note for Facebook has been the [controversy](http://www.techcrunch.com/2007/07/16/the-ghost-of-zuckerbergs-past-may-haunt-facebook-ipo/) with social network ConnectU. The founders of ConnectU, former classmates of Mark Zuckerberg at Harvard, allege that Zuckerberg stole their original source code for Facebook. The ordeal has [gone to court](http://www.techcrunch.com/2007/10/10/facebook-vs-connectu-facebook-makes-untrue-assertions-claims-connectu/), and has now been [resolved](http://techcrunch.com/2011/06/22/after-seven-years-the-winklevosses-give-up-on-battle-with-facebook/). \n\nNotwithstanding this lingering controversy, Facebook's growth has been staggering. Facebook announced [astonishing numbers](http://techcrunch.com/2012/02/01/facebooks-s-1-845-million-users-every-month-more-than-half-daily-and-nearly-half-mobile/) in February 2012 upon filing for its IPO. As of July 2013, over [1.15 billion](http://techcrunch.com/2013/07/24/facebook-growth-2/) users log into Facebook every month, and 669 million users log in daily. Mobile users now make up half of Facebook's user base, with 819 million monthly actives. Facebook has also announced that by the end of 2011 there had been 100 billion friend connections, and in recent months users had been registering 2.7 billion Likes and Comments per day. Facebook is one of the [most trafficked sites](http://techcrunch.com/2010/12/29/hitwise-facebook-overtakes-google-to-become-most-visited-website-in-2010/) in the US, and its international growth has been impressive. Additionally, Facebook is the top photo sharing site with 250 million photos uploaded per day.\n\nFacebook users' [passion](http://www.techcrunch.com/2007/11/13/i-just-cant-be-a-college-student-without-facebook/)or [addiction](http://www.techcrunch.com/2007/03/09/career-advice-dont-choose-facebook-over-your-job/)to the site is unparalleled: more than half use the product every single day and users spend an average of 19 minutes a day on Facebook. The site's popularity has garnered it pop culture fame, so much so that in 2010 a feature film entitled _The Social Network_ was released which chronicled Facebook's inception.\n\nIn its 2012 [IPO filing](http://techcrunch.com/2012/02/01/facebook-files-for-5-billion-ipo/), Facebook announced that it intends to grow in the near future by expanding its global user base, increasing engagement by developing new social tools, improving the mobile experience, and creating more value for advertisers and users.","facebook, college, students, profiles, network, online-communities, social-networking",USA,CA,Menlo Park,...,0,0,0,ipo,"facebook is the world's largest social network, with over [1.15 billion monthly active users](http://techcrunch.com/2013/07/24/facebook-growth-2/). facebook was founded by [mark zuckerberg](http://www.crunchbase.com/person/mark-zuckerberg) in february 2004, initially as an exclusive network for harvard students. it was a huge hit: in 2 weeks, half of the schools in the boston area began demanding a facebook network. zuckerberg immediately recruited his friends [dustin moskovitz](http://www.crunchbase.com/person/dustin-moskovitz), [chris hughes](/person/chris-hughes), and [eduardo saverin](/person/eduardo-saverin) to help build facebook, and within four months, facebook added 30 more college networks. the original idea for the term facebook came from zuckerberg's high school (phillips exeter academy). the exeter face book was passed around to every student as a way for students to get to know their classmates for the following year. it was a physical paper book until zuckerberg brought it to the internet. with this success, zuckerberg, moskowitz and hughes moved out to [palo alto](/maps/city/palo%2520alto) for the summer and rented a sublet. a few weeks later, zuckerberg ran into the former cofounder of [napster](/company/napster), [sean parker](/person/sean-parker). parker soon moved in to zuckerberg's apartment and they began working together. parker provided the introduction to their first investor, [peter thiel](/person/peter-thiel), cofounder of [paypal](/company/paypal) and managing partner of [the founders fund](http://www.crunchbase.com/financial-organization/founders-fund). thiel invested $500,000 into facebook. with millions more users, [friendster](/company/friendster) [attempted](http://www.techcrunch.com/2006/12/12/yahoos-project-fraternity-docs-leaked/) to acquire the company for $10 million in mid 2004. facebook turned down the offer and subsequently received $12.7 million in funding from [accel partners](http://www.crunchbase.com/financial-organization/accel-partners), at a valuation of [around $100 million](http://www.techcrunch.com/2005/09/07/85-of-college-students-use-facebook/). facebook continued to grow, opening up to high school students in september 2005 and adding an immensely popular photo sharing feature the next month. the next spring, facebook received $25 million in funding from [greylock partners](http://www.crunchbase.com/financial-organization/greylock) and [meritech capital](http://www.crunchbase.com/financial-organization/meritech-capital-partners), as well as previous investors accel partners and peter thiel. the pre-money valuation for this deal was about $525 million. facebook subsequently [opened](http://www.techcrunch.com/2006/04/26/facebook-goes-beyond-college-high-school-markets/) up to work networks, eventually amassing over 20,000 work networks. finally in september 2006, facebook [opened](http://www.techcrunch.com/2006/09/26/facebook-just-launched-open-registrations/) to anyone with an email address. in the summer of 2006, [yahoo](/company/yahoo) [attempted to acquire](http://www.techcrunch.com/2006/09/21/facebook-and-yahoo-in-acquisition-talks-for-1-billion/) the company for $1 billion dollars. [reports](http://www.wired.com/techbiz/startups/news/2007/09/ff_facebook) actually indicated that zuckerberg made a verbal agreement to sell facebook to yahoo. a few days later when yahoo's stock price took a dive, the offer was lowered to $800 million and zuckerberg walked away from the deal. yahoo later [offered](http://www.techcrunch.com/2006/12/12/yahoos-project-fraternity-docs-leaked/) $1 billion again, this time zuckerberg turned yahoo down and earned instant notoriety as the ""kid"" who turned down a billion. this was not the first time zuckerberg turned down an acquisition offer; [viacom](/company/viacom) had previously [unsuccessfully](http://www.techcrunch.com/2006/03/28/facebook-is-doing-the-skype-dance/) attempted to acquire the company for $750 million in march 2006. not long after, in october 2007, [microsoft invested](http://www.techcrunch.com/2007/10/24/facebook-takes-the-microsoft-money-and-runs/) $240 million into facebook for 1.6 percent of the company in. this meant a valuation of over $15 billion, making facebook the [5th most valuable us internet company](http://www.techcrunch.com/2007/10/25/perspective-facebook-is-now-5th-most-valuable-us-internet-company/), yet with only $150 million in annual revenue. many explained microsoft's decision as being solely driven by the desire to outbid google. facebook continued to receive funding, most notably in january 2011 receiving a [$1.5 billion round](http://techcrunch.com/2011/01/21/facebook-raises-1-5-billion-at-50-billion-valuation/), valuing the company at $50 billion. a year later, in february 2012, facebook [announced](http://techcrunch.com/2012/02/01/facebook-files-for-5-billion-ipo/) that it was filing for its long-anticipated initial public offering. the company went public on may 18, 2012, opening on the nasdaq with shares trading at [$42.05](http://techcrunch.com/2012/05/18/facebook-share-open-10-5-higher-at-42/). one sour note for facebook has been the [controversy](http://www.techcrunch.com/2007/07/16/the-ghost-of-zuckerbergs-past-may-haunt-facebook-ipo/) with social network connectu. the founders of connectu, former classmates of mark zuckerberg at harvard, allege that zuckerberg stole their original source code for facebook. the ordeal has [gone to court](http://www.techcrunch.com/2007/10/10/facebook-vs-connectu-facebook-makes-untrue-assertions-claims-connectu/), and has now been [resolved](http://techcrunch.com/2011/06/22/after-seven-years-the-winklevosses-give-up-on-battle-with-facebook/). notwithstanding this lingering controversy, facebook's growth has been staggering. facebook announced [astonishing numbers](http://techcrunch.com/2012/02/01/facebooks-s-1-845-million-users-every-month-more-than-half-daily-and-nearly-half-mobile/) in february 2012 upon filing for its ipo. as of july 2013, over [1.15 billion](http://techcrunch.com/2013/07/24/facebook-growth-2/) users log into facebook every month, and 669 million users log in daily. mobile users now make up half of facebook's user base, with 819 million monthly actives. facebook has also announced that by the end of 2011 there had been 100 billion friend connections, and in recent months users had been registering 2.7 billion likes and comments per day. facebook is one of the [most trafficked sites](http://techcrunch.com/2010/12/29/hitwise-facebook-overtakes-google-to-become-most-visited-website-in-2010/) in the us, and its international growth has been impressive. additionally, facebook is the top photo sharing site with 250 million photos uploaded per day. facebook users' [passion](http://www.techcrunch.com/2007/11/13/i-just-cant-be-a-college-student-without-facebook/)or [addiction](http://www.techcrunch.com/2007/03/09/career-advice-dont-choose-facebook-over-your-job/)to the site is unparalleled: more than half use the product every single day and users spend an average of 19 minutes a day on facebook. the site's popularity has garnered it pop culture fame, so much so that in 2010 a feature film entitled _the social network_ was released which chronicled facebook's inception. in its 2012 [ipo filing](http://techcrunch.com/2012/02/01/facebook-files-for-5-billion-ipo/), facebook announced that it intends to grow in the near future by expanding its global user base, increasing engagement by developing new social tools, improving the mobile experience, and creating more value for advertisers and users",Software & Technology,sf bay area,sf bay area,CA,menlo park,social,0,0,,,,,,,,,,,,0
4,c:7,7,,Palo Alto,CA,ISR,0.0,0.0,no office info,No Office Info,CA,c:7,Company,,omnidrive,network_hosting,closed,2005-11-01,2008-09-15,,"Currently in public beta, Omnidrive makes it easy to access, edit and share your files from any computer with a web browser. The site was founded in late 2004 by Nik Cubrilovic and later launched at the November 2006 Web 2.0 Conference. An interesting slew of features make Omnidrive an attractive choice in the online file storage field.\n\nOmnidrive's competitors include [Box.net](http://www.crunchbase.com/company/box-net), [Streamload](http://www.streamload.com) and [Xdrive](http://www.xdrive.com).\n\n","storage, sharing, edit, online",ISR,CA,Palo Alto,...,0,0,0,closed,"currently in public beta, omnidrive makes it easy to access, edit and share your files from any computer with a web browser. the site was founded in late 2004 by nik cubrilovic and later launched at the november 2006 web 2.0 conference. an interesting slew of features make omnidrive an attractive choice in the online file storage field. omnidrive's competitors include [box.net](http://www.crunchbase.com/company/box-net), [streamload](http://www.streamload.com) and [xdrive](http://www.xdrive.com)",Software & Technology,sf bay area,sf bay area,CA,palo alto,network_hosting,0,0,,,,,,,,,,,,0


In [31]:
print(df_off.shape)
print(df_obj.shape)
print(df_acq.shape)
print("="*60)
print(off_obj.shape) 
print(off_obj_acq.shape) #1백행

(112718, 11)
(462620, 45)
(9562, 11)
(112718, 56)
(112870, 68)


In [32]:
off_obj_acq.to_csv("./data/join/mna_addr_final.csv", encoding="utf-8", index=False)
print("="*60)
print("M&A(위치) mna_addr csv 추출 완료!")
print("="*60)

M&A(위치) mna_addr csv 추출 완료!


### 스타트업 M&A(이슈/마일스톤)
* milestones ↔ objects ↔ acquisitions

In [33]:
mile_obj_acq = (
    df_mile
    .merge(df_obj, left_on="mile_cfpr_id", right_on="objects_cfpr_id", how="left")
    .merge(df_acq, left_on="mile_cfpr_id", right_on="acquired_c_id", how="left")
)

mile_obj_acq["is_acquired"] = mile_obj_acq["acquisition_id"].notna().astype(int)

mna_rate_by_issue = (
    mile_obj_acq
    .groupby("cat_mile_description")["is_acquired"]
    .mean()
    .reset_index(name="mna_rate")
)

In [34]:
# 내용 확인
print(mile_obj_acq.columns)
display(mile_obj_acq.head())

Index(['Unnamed: 0', 'mile_cfpr_id', 'milestone_at', 'description_x',
       'cat_mile_description', 'objects_cfpr_id', 'entity_type', 'parent_c_id',
       'normalized_name', 'category_code', 'status', 'founded_at', 'closed_at',
       'description_y', 'overview', 'tag_list', 'country_code', 'state_code',
       'city', 'region', 'first_investment_at', 'last_investment_at',
       'investment_rounds', 'invested_companies', 'first_funding_at',
       'last_funding_at', 'funding_rounds', 'funding_total_usd',
       'first_milestone_at', 'last_milestone_at', 'milestones',
       'relationships', 'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_cat

Unnamed: 0.1,Unnamed: 0,mile_cfpr_id,milestone_at,description_x,cat_mile_description,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description_y,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,...,is_obj_state_missing,is_obj_inv_rounds_missing,is_obj_inv_comp_missing,cat_obj_status,obj_overview_fixed,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,acquisition_id,acquiring_c_id,acquired_c_id,term_code,price_amount,price_currency_code,acquired_at,is_acq_price_private,is_acquisitions_acq_at_missing,price_amount_usd,acqusition_currency_rate,is_acquired
0,0,c:12,2008-06-09,Survives iPhone 3G Stevenote,survives,c:12,Company,,twitter,social,ipo,2006-03-21,,Real time communication platform,"Created in 2006, Twitter is a global real-time communications platform with 400 million monthly visitors to twitter.com, more than 200 million monthly active users around the world.\n\nWe see a billion tweets every 2.5 days on every conceivable topic. World leaders, major athletes, star performers, news organizations and entertainment outlets are among the millions of active Twitter accounts through which users can truly get the pulse of the planet.","text, messaging, social, community, twitter, tweet, twttr, microblog, sms",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2007-07-01,...,0.0,0.0,0.0,ipo,"created in 2006, twitter is a global real-time communications platform with 400 million monthly visitors to twitter.com, more than 200 million monthly active users around the world. we see a billion tweets every 2.5 days on every conceivable topic. world leaders, major athletes, star performers, news organizations and entertainment outlets are among the millions of active twitter accounts through which users can truly get the pulse of the planet",Software & Technology,sf bay area,sf bay area,CA,san francisco,social,0.0,0.0,,,,,,,,,,,,0
1,1,c:3138,2008-06-17,Twhirl announces support for Seesmic video playback.,product_update,c:3138,Company,,twhirl,software,acquired,2007-11-12,,Twitter Desktop Client,"Twhirl is a free desktop client for the Twitter microblogging service that is powered by the cross-platform Adobe AIR. The application helps the user sort through incoming ""Tweets"" by color coding them and allowing timeline filtering and visual & auditory notifications.\n\nThere are many additional features that are not available on the standard Twitter site, such as favorites settings, URL shortening, customizable color schemes, multiple Twitter account connections, localization, and cross-posting capabilities to Pownce and Jaiku!","AIR, twitter, pownce, jaiku, microblogging",,,,unknown,,,0.0,0.0,,...,1.0,0.0,0.0,acquired,"twhirl is a free desktop client for the twitter microblogging service that is powered by the cross-platform adobe air. the application helps the user sort through incoming ""tweets"" by color coding them and allowing timeline filtering and visual & auditory notifications. there are many additional features that are not available on the standard twitter site, such as favorites settings, url shortening, customizable color schemes, multiple twitter account connections, localization, and cross-posting capabilities to pownce and jaiku!",Software & Technology,,,,,software,1.0,1.0,239.0,c:200,c:3138,,,USD,2008-04-01,1.0,0.0,,1.0,1
2,2,c:59,2008-06-18,"More than 4 Billion videos viewed at Google Sites, 37.9% according to comScore",more,c:59,Company,,google,search,ipo,1998-09-07,,,"Google provides search and advertising services, which together aim to organize and monetize the world's information. In addition to its dominant search engine, it offers a plethora of online tools and platforms including: [Gmail](http://www.crunchbase.com/product/gmail), [Maps](http://www.crunchbase.com/product/google-maps), [YouTube](http://www.crunchbase.com/company/youtube), and [Google+](/product/google-plus), the company's extension into the social space. Most of its Web-based products are free, funded by Google's highly integrated online advertising platforms [AdWords](/product/google-adwords-2) and [AdSense](/product/google-adsense). Google promotes the idea that advertising should be highly targeted and relevant to users thus providing them with a rich source of information. \n\nGoogle has also made strong moves into the web-based apps space with acquisitions of [Jotspot](http://www.crunchbase.com/company/jotspot) (documents), [2Web Technologies](http://www.crunchbase.com/company/2web-technologies) (spreadsheets) and [Zenter](http://www.crunchbase.com/company/zenter) (presentations). The company has also made a splash with the acquisitions of [YouTube](http://www.crunchbase.com/company/youtube), [DoubleClick](http://www.crunchbase.com/company/doubleclick) and [Feedburner](http://www.crunchbase.com/company/feedburner). \n\n","search, gmail, blogger, find, information, youtube, google",USA,CA,Mountain View,SF Bay,2004-06-18,2013-05-01,29.0,26.0,1998-08-01,...,0.0,0.0,0.0,ipo,"google provides search and advertising services, which together aim to organize and monetize the world's information. in addition to its dominant search engine, it offers a plethora of online tools and platforms including: [gmail](http://www.crunchbase.com/product/gmail), [maps](http://www.crunchbase.com/product/google-maps), [youtube](http://www.crunchbase.com/company/youtube), and [google+](/product/google-plus), the company's extension into the social space. most of its web-based products are free, funded by google's highly integrated online advertising platforms [adwords](/product/google-adwords-2) and [adsense](/product/google-adsense). google promotes the idea that advertising should be highly targeted and relevant to users thus providing them with a rich source of information. google has also made strong moves into the web-based apps space with acquisitions of [jotspot](http://www.crunchbase.com/company/jotspot) (documents), [2web technologies](http://www.crunchbase.com/company/2web-technologies) (spreadsheets) and [zenter](http://www.crunchbase.com/company/zenter) (presentations). the company has also made a splash with the acquisitions of [youtube](http://www.crunchbase.com/company/youtube), [doubleclick](http://www.crunchbase.com/company/doubleclick) and [feedburner](http://www.crunchbase.com/company/feedburner)",Software & Technology,sf bay area,sf bay area,CA,mountain view,search,0.0,0.0,,,,,,,,,,,,0
3,3,c:314,2008-06-18,Reddit goes Open Source,launch,c:314,Company,,reddit,web,acquired,2005-01-01,,Social news website,"Launched in 2005, Reddit is a social news website that displays news based on your personal preferences and what the community likes. Your preferences are determined based on your history of voting stories up or down.\n\nThe company was started by two University of Virginia grads, Alexis Ohanian and Steve Huffman in the Y Combinator program. Two others, Christopher Slowe and Aaron Swartz, later joined the team.\n\nConde Nast, owner of Wired and other magazines/websites, acquired Reddit in October of 2006. Reddit competitors include [Digg](http://www.crunchbase.com/company/digg), [Netscape](http://www.techcrunch.com/tag/netscape/) and [Newsvine](http://www.crunchbase.com/company/newsvine).\n\nIn February 2008, Reddit began experimenting with allowing users to create their own 'subreddits,' such as [shutupandtakemymoney](http://reddit.com/r/shutupandtakemymoney) and [todayilearned](http://reddit.com/r/todayilearned).\n\nIn September 2011, the company changed hands from Condé Nast to its parent company, Advanced Publications. As part of the new structure, reddit incorporated and now reports directly to a board, including Alexis Ohanian , Bob Sauerberg (President), Joe Simon (Chief Technology Officer) from Condé Nast, and Andrew Siegel (Senior Vice President, Strategy and Corporate Development) from Advance.","social, news, reddit, voting",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2005-06-01,...,0.0,0.0,0.0,acquired,"launched in 2005, reddit is a social news website that displays news based on your personal preferences and what the community likes. your preferences are determined based on your history of voting stories up or down. the company was started by two university of virginia grads, alexis ohanian and steve huffman in the y combinator program. two others, christopher slowe and aaron swartz, later joined the team. conde nast, owner of wired and other magazines/websites, acquired reddit in october of 2006. reddit competitors include [digg](http://www.crunchbase.com/company/digg), [netscape](http://www.techcrunch.com/tag/netscape/) and [newsvine](http://www.crunchbase.com/company/newsvine). in february 2008, reddit began experimenting with allowing users to create their own 'subreddits,' such as [shutupandtakemymoney](http://reddit.com/r/shutupandtakemymoney) and [todayilearned](http://reddit.com/r/todayilearned). in september 2011, the company changed hands from condé nast to its parent company, advanced publications. as part of the new structure, reddit incorporated and now reports directly to a board, including alexis ohanian , bob sauerberg (president), joe simon (chief technology officer) from condé nast, and andrew siegel (senior vice president, strategy and corporate development) from advance",Software & Technology,sf bay area,sf bay area,CA,san francisco,web,0.0,0.0,14.0,c:318,c:314,cash,,USD,2006-10-01,1.0,0.0,,1.0,1
4,4,c:314,2008-01-22,Adds the ability to create your own Reddits,product_update,c:314,Company,,reddit,web,acquired,2005-01-01,,Social news website,"Launched in 2005, Reddit is a social news website that displays news based on your personal preferences and what the community likes. Your preferences are determined based on your history of voting stories up or down.\n\nThe company was started by two University of Virginia grads, Alexis Ohanian and Steve Huffman in the Y Combinator program. Two others, Christopher Slowe and Aaron Swartz, later joined the team.\n\nConde Nast, owner of Wired and other magazines/websites, acquired Reddit in October of 2006. Reddit competitors include [Digg](http://www.crunchbase.com/company/digg), [Netscape](http://www.techcrunch.com/tag/netscape/) and [Newsvine](http://www.crunchbase.com/company/newsvine).\n\nIn February 2008, Reddit began experimenting with allowing users to create their own 'subreddits,' such as [shutupandtakemymoney](http://reddit.com/r/shutupandtakemymoney) and [todayilearned](http://reddit.com/r/todayilearned).\n\nIn September 2011, the company changed hands from Condé Nast to its parent company, Advanced Publications. As part of the new structure, reddit incorporated and now reports directly to a board, including Alexis Ohanian , Bob Sauerberg (President), Joe Simon (Chief Technology Officer) from Condé Nast, and Andrew Siegel (Senior Vice President, Strategy and Corporate Development) from Advance.","social, news, reddit, voting",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2005-06-01,...,0.0,0.0,0.0,acquired,"launched in 2005, reddit is a social news website that displays news based on your personal preferences and what the community likes. your preferences are determined based on your history of voting stories up or down. the company was started by two university of virginia grads, alexis ohanian and steve huffman in the y combinator program. two others, christopher slowe and aaron swartz, later joined the team. conde nast, owner of wired and other magazines/websites, acquired reddit in october of 2006. reddit competitors include [digg](http://www.crunchbase.com/company/digg), [netscape](http://www.techcrunch.com/tag/netscape/) and [newsvine](http://www.crunchbase.com/company/newsvine). in february 2008, reddit began experimenting with allowing users to create their own 'subreddits,' such as [shutupandtakemymoney](http://reddit.com/r/shutupandtakemymoney) and [todayilearned](http://reddit.com/r/todayilearned). in september 2011, the company changed hands from condé nast to its parent company, advanced publications. as part of the new structure, reddit incorporated and now reports directly to a board, including alexis ohanian , bob sauerberg (president), joe simon (chief technology officer) from condé nast, and andrew siegel (senior vice president, strategy and corporate development) from advance",Software & Technology,sf bay area,sf bay area,CA,san francisco,web,0.0,0.0,14.0,c:318,c:314,cash,,USD,2006-10-01,1.0,0.0,,1.0,1


In [35]:
print(df_mile.shape)
print(df_obj.shape)
print(df_acq.shape)
print("="*60)
print(mile_obj.shape) 
print(mile_obj_acq.shape) #1백행 미만

(39456, 5)
(462620, 45)
(9562, 11)
(39456, 50)
(39575, 62)


In [36]:
mile_obj_acq.to_csv("./data/join/mna_issues_final.csv", encoding="utf-8", index=False)
print("="*60)
print("M&A(이슈) mna_issues csv 추출 완료!")
print("="*60)

M&A(이슈) mna_issues csv 추출 완료!


### 스타트업 IPO(산업)
* objects ↔ ipos

In [37]:
obj_ipo = df_obj.merge(
    df_ipos,
    left_on="objects_cfpr_id",
    right_on="ipos_c_id",
    how="left"
)

obj_ipo["is_ipo"] = obj_ipo["ipo_id"].notna().astype(int)

ipo_rate_by_industry = (
    obj_ipo
    .groupby("category_code")["is_ipo"]
    .mean()
    .reset_index(name="ipo_rate")
)

In [38]:
# 내용 확인
print(obj_ipo.columns)
display(obj_ipo.head())

Index(['objects_cfpr_id', 'entity_type', 'parent_c_id', 'normalized_name',
       'category_code', 'status', 'founded_at', 'closed_at', 'description',
       'overview', 'tag_list', 'country_code', 'state_code', 'city', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_category_filled',
       'is_obj_funding_total_usd_private', 'is_obj_funding_rounds_private',
     

Unnamed: 0,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,last_funding_at,funding_rounds,funding_total_usd,first_milestone_at,last_milestone_at,...,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,ipo_id,ipos_c_id,valuation_amount,valuation_currency_code,raised_amount,raised_currency_code,public_at,stock_symbol,is_ipos_public_at_missing,stock_normalized,is_ipos_valuation_private,valuation_amount_usd,valuation_currency_rate,is_ipos_raised_private,ipo_raised_amount_usd,ipo_raised_currency_rate,is_ipo
0,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,Software & Technology,seattle,seattle,WA,seattle,web,0,0,,,,,,,,,,,,,,,,,0
1,c:10,Company,,flektor,games_video,acquired,,,,"Flektor is a rich-media mash-up platform that enables consumers to create, remix and share photos and videos on the internet without the need for advanced video-editing skills or software.\n\nFox Interactive Media, a division of News Corporation, announced that it had completed the purchase of Flektor on May 30, 2007. The estimated puchase price is $15-20 million.","flektor, photo, video",USA,CA,Culver City,Los Angeles,,,0,0,,,,,,,...,Software & Technology,los angeles,los angeles,CA,culver city,games_video,1,1,,,,,,,,,,,,,,,,,0
2,c:100,Company,,there,games_video,acquired,,,,"There.com is an online virtual world where anyone can explore, meet friends and play games. It was founded in 1998 by Will Harvey, a Stanford computer science Ph.D. and game developer, and Jeffrey Ventrella, an expert on artificial life from MIT's Media Lab. The duo raised approximately $37 million - including $20 million from employees, $11 million from angel investors and $6 million from Sutter Hill Ventures. In 2005 the company was spun off under Makena Technologies, and in March 2010 There closed to the public. In May 2011, There announced it would reopen as a 18+ Cloud-based service. As of Nov 2013, There is open.\n\nThere.com is a subscription service with a monthly fee of $10.00. Additional in-game accessories can be purchased for separate fees.\n\nOther online virtual worlds include [Kaneva](http://www.crunchbase.com/company/kaneva), [Second Life](http://www.crunchbase.com/company/secondlife) and [Cyworld](http://www.crunchbase.com/company/cyworld).","virtualworld, there, teens",USA,CA,San Mateo,SF Bay,,,0,0,,,,,2003-02-01,2011-09-23,...,Software & Technology,sf bay area,sf bay area,CA,san mateo,games_video,1,1,,,,,,,,,,,,,,,,,0
3,c:10000,Company,,mywebbo,network_hosting,operating,2008-07-26,,,"BRAND NEW ONLINE SOCIAL NETWORKING WEBSITE,FOR MAKING NEW FRIENDS OR CHATTING TO OLD ONE'S.\n\nPACKED WITH NEW FEATURES SUCH AS RATING PROFILES , RATING MUSIC,VIDEO'S AND PICTURES ,UPLOADING MUSIC ,VIDEO'S PICTURES , CREATING CLASSIFIED ADS ,SHOUTOUT BOX!, AND ONLINE CHAT AREA FOR MAKING NEW FRIENDS OR SIMPLY CHATTING TO YOUR OLD ONE'S ,THERE ARE LOADS OF GREAT FEATURES FOR ANYONE TO TRY .. PLUS MANY MORE TO COME .","social-network, new, website, web, friends, chat, people",,,,unknown,,,0,0,,,0.0,0.0,,,...,Software & Technology,,,,,network_hosting,0,0,,,,,,,,,,,,,,,,,0
4,c:10001,Company,,the movie streamer,games_video,operating,2008-07-26,,,"This company shows free movies online on their website which, in fact, is not illegal since they are not the ones hosting the videos.","watch, full-length, moives, online, for, free, streaming, videos, tv-shows",,,,unknown,,,0,0,,,0.0,0.0,,,...,Software & Technology,,,,,games_video,0,0,,,,,,,,,,,,,,,,,0


In [39]:
print(df_obj.shape)
print(df_ipos.shape)
print("="*60)
print(obj_ipo.shape) #1행

(462620, 45)
(1259, 16)
(462621, 62)


In [40]:
obj_ipo.to_csv("./data/join/ipo_industry_final.csv", encoding="utf-8", index=False)
print("="*60)
print("IPO(산업) ipo_industry csv 추출 완료!")
print("="*60)

IPO(산업) ipo_industry csv 추출 완료!


### 스타트업 IPO(규모)
* objects ↔ relationships ↔ ipos

In [41]:
# relationships + objects
obj_rel = df_obj.merge(
    df_rel,
    left_on="objects_cfpr_id",
    right_on="rel_cf_id",
    how="left"
)

# ipos 붙이기
obj_rel_ipos = obj_rel.merge(
    df_ipos,
    left_on="objects_cfpr_id",
    right_on="ipos_c_id",
    how="left"
)

obj_rel_ipos["is_ipo"] = obj_rel_ipos["ipo_id"].notna().astype(int)

# 회사별 인원수/인수여부 요약
ma_size_feat = (
    obj_rel_ipos
    .groupby("objects_cfpr_id")
    .agg(
        n_people=("rel_p_id", "nunique"),
        is_acquired=("is_ipo", "max")   # 한 번이라도 인수되면 1
    )
    .reset_index()
)

In [42]:
# 내용 확인
print(obj_rel_ipos.columns)
display(obj_rel_ipos.head())

Index(['objects_cfpr_id', 'entity_type', 'parent_c_id', 'normalized_name',
       'category_code', 'status', 'founded_at', 'closed_at', 'description',
       'overview', 'tag_list', 'country_code', 'state_code', 'city', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_category_filled',
       'is_obj_funding_total_usd_private', 'is_obj_funding_rounds_private',
     

Unnamed: 0,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,last_funding_at,funding_rounds,funding_total_usd,first_milestone_at,last_milestone_at,...,rel_cf_id,start_at,end_at,is_past,sequence,title,has_end_date,cat_rel_title,ipo_id,ipos_c_id,valuation_amount,valuation_currency_code,raised_amount,raised_currency_code,public_at,stock_symbol,is_ipos_public_at_missing,stock_normalized,is_ipos_valuation_private,valuation_amount_usd,valuation_currency_rate,is_ipos_raised_private,ipo_raised_amount_usd,ipo_raised_currency_rate,is_ipo
0,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,0.0,8.0,Co-Founder/CEO/Board of Directors,0.0,Founder,,,,,,,,,,,,,,,,,0
1,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,1.0,279242.0,VP Marketing,0.0,VP,,,,,,,,,,,,,,,,,0
2,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,0.0,9.0,Board,0.0,Board,,,,,,,,,,,,,,,,,0
3,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,1.0,9.0,VP User Experience,0.0,VP,,,,,,,,,,,,,,,,,0
4,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,Seattle,,,0,0,2005-10-01,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,...,c:1,,,0.0,5.0,"SVP, Strategy and Business Development",0.0,VP,,,,,,,,,,,,,,,,,0


In [43]:
print(df_obj.shape)
print(df_rel.shape)
print(df_ipos.shape)
print("="*60)
print(obj_rel.shape)
print(obj_rel_ipos.shape) #26만행

(462620, 45)
(402412, 10)
(1259, 16)
(726575, 55)
(726737, 72)


In [44]:
obj_rel_ipos.to_csv("./data/join/ipo_size_final.csv", encoding="utf-8", index=False)
print("="*60)
print("IPO(규모) ipo_size csv 추출 완료!")
print("="*60)

IPO(규모) ipo_size csv 추출 완료!


### 스타트업 IPO(위치)
* offices ↔ objects ↔ ipos

In [45]:
off_obj_ipos = (
    df_off
    .merge(df_obj, left_on="offices_c_id", right_on="objects_cfpr_id", how="left")
    .merge(df_ipos, left_on="offices_c_id", right_on="ipos_c_id", how="left")
)

off_obj_ipos["is_ipo"] = off_obj_ipos["ipo_id"].notna().astype(int)

ma_rate_by_city = (
    off_obj_ipos
    .groupby("city_x")["is_ipo"]
    .mean()
    .reset_index(name="ipos_rate")
)

In [46]:
# 내용 확인
print(off_obj_ipos.columns)
display(off_obj_ipos.head())

Index(['offices_c_id', 'office_id', 'description_x', 'city_x', 'state_code_x',
       'country_code_x', 'latitude', 'longitude', 'offices_description_fixed',
       'cat_offices_description', 'offices_state_filled', 'objects_cfpr_id',
       'entity_type', 'parent_c_id', 'normalized_name', 'category_code',
       'status', 'founded_at', 'closed_at', 'description_y', 'overview',
       'tag_list', 'country_code_y', 'state_code_y', 'city_y', 'region',
       'first_investment_at', 'last_investment_at', 'investment_rounds',
       'invested_companies', 'first_funding_at', 'last_funding_at',
       'funding_rounds', 'funding_total_usd', 'first_milestone_at',
       'last_milestone_at', 'milestones', 'relationships',
       'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overvie

Unnamed: 0,offices_c_id,office_id,description_x,city_x,state_code_x,country_code_x,latitude,longitude,offices_description_fixed,cat_offices_description,offices_state_filled,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description_y,overview,tag_list,country_code_y,state_code_y,city_y,...,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,ipo_id,ipos_c_id,valuation_amount,valuation_currency_code,raised_amount,raised_currency_code,public_at,stock_symbol,is_ipos_public_at_missing,stock_normalized,is_ipos_valuation_private,valuation_amount_usd,valuation_currency_rate,is_ipos_raised_private,ipo_raised_amount_usd,ipo_raised_currency_rate,is_ipo
0,c:1,1,,Seattle,WA,USA,47.603122,-122.333253,no office info,No Office Info,WA,c:1,Company,,wetpaint,web,operating,2005-10-17,,Technology Platform Company,"Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wetpaints own online property, Wetpaint Entertainment, an entertainment news site that attracts more than 12 million unique visitors monthly and has over 2 million Facebook fans, is a proof point to the companys success in building and engaging audiences. Media companies can license Wetpaints platform which includes a dynamic playbook tailored to their individual needs and comprehensive training. Founded by Internet pioneer Ben Elowitz, and with offices in New York and Seattle, Wetpaint is backed by Accel Partners, the investors behind Facebook.","wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system",USA,WA,Seattle,...,Software & Technology,seattle,seattle,WA,seattle,web,0,0,,,,,,,,,,,,,,,,,0
1,c:3,3,Headquarters,Pleasanton,CA,USA,37.692934,-121.904945,headquarter,Headquarters,CA,c:3,Company,,zoho,software,operating,2005-09-15,,Online Business Apps Suite,"Zoho offers a suite of Business, Collaboration & Productivity applications. Apps include CRM, Customer Support, Office Suite, Email Hosting, Project Management, Accounting, App Creator among many others. \n\nZoho's parent company is Zoho Corp (previously AdventNet), ""a software company started in 1996 focusing on building quality and affordable software for businesses."" \n\nZoho's main competitors are [Google](/company/google) Docs & Spreadsheets, Salesforce as well as [Microsoft](/company/microsoft) Office.","zoho, officesuite, spreadsheet, writer, projects, sheet, crm, show, creator, wiki, planner, suite, notebook, chat, meeting, mail",USA,CA,Pleasanton,...,Software & Technology,sf bay area,sf bay area,CA,pleasanton,software,0,0,,,,,,,,,,,,,,,,,0
2,c:4,4,,San Francisco,CA,USA,37.764726,-122.394523,no office info,No Office Info,CA,c:4,Company,,digg,news,acquired,2004-10-11,,user driven social content website,"Digg is a user driven social content website. Everything on Digg is user-submitted. After you submit content, other people read your submission and ""Digg"" what they like best. If your story receives enough Diggs, it's promoted to the front page for other visitors to see.\n\n[Kevin Rose](http://www.crunchbase.com/person/kevin-rose) came up with the idea for Digg in the fall of 2004. He found programmer [Owen Byrne](http://www.crunchbase.com/person/owen-byrne) through [eLance](http://www.crunchbase.com/company/elance) and paid him $10/hour to develop the idea. In addition, Rose paid $99 per month for hosting and $1,200 for the Digg.com domain. In December of 2004, Kevin launched his creation to the world through a post on his blog.\n\nIn February of 2005, Paris Hilton's cell phone was hacked. Images and phone numbers from the phone were posted online and it didn't take long for a user to post the link on Digg. The rapidshare downloads site started to receive an enormous amount of traffic and it was then, Rose says, he saw ""the power of breaking stories before anyone else.""\n\nDigg has been a force ever since. Acquisition offers have been made, Rose was on the cover of [BusinessWeek](/company/businessweek) and according to [Alexa](/company/alexa), Digg is in the top 100 most trafficked sites on the internet. The success hasn't come without its share of problems though. The site has had to face [services aimed at gaming](http://www.techcrunch.com/2007/04/02/subvert-and-profit-next-service-to-try-gaming-digg/) the way stories hit the front page, as well as a [user revolt](http://www.techcrunch.com/2007/05/01/digg-surrenders-to-mob/). Digg has however been able to get over these hurdles as it continues to be one of the social news leaders.","community, social, news, bookmark, digg, technology, design",USA,CA,San Francisco,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,news,0,0,,,,,,,,,,,,,,,,,0
3,c:5,5,Headquarters,Menlo Park,CA,USA,37.41605,-122.151801,headquarter,Headquarters,CA,c:5,Company,,facebook,social,ipo,2004-02-01,,Social network,"Facebook is the world's largest social network, with over [1.15 billion monthly active users](http://techcrunch.com/2013/07/24/facebook-growth-2/).\n\nFacebook was founded by [Mark Zuckerberg](http://www.crunchbase.com/person/mark-zuckerberg) in February 2004, initially as an exclusive network for Harvard students. It was a huge hit: in 2 weeks, half of the schools in the Boston area began demanding a Facebook network. Zuckerberg immediately recruited his friends [Dustin Moskovitz](http://www.crunchbase.com/person/dustin-moskovitz), [Chris Hughes](/person/chris-hughes), and [Eduardo Saverin](/person/eduardo-saverin) to help build Facebook, and within four months, Facebook added 30 more college networks. \n\nThe original idea for the term Facebook came from Zuckerberg's high school (Phillips Exeter Academy). The Exeter Face Book was passed around to every student as a way for students to get to know their classmates for the following year. It was a physical paper book until Zuckerberg brought it to the internet.\n\nWith this success, Zuckerberg, Moskowitz and Hughes moved out to [Palo Alto](/maps/city/Palo%2520Alto) for the summer and rented a sublet. A few weeks later, Zuckerberg ran into the former cofounder of [Napster](/company/napster), [Sean Parker](/person/sean-parker). Parker soon moved in to Zuckerberg's apartment and they began working together. Parker provided the introduction to their first investor, [Peter Thiel](/person/peter-thiel), cofounder of [PayPal](/company/paypal) and managing partner of [The Founders Fund](http://www.crunchbase.com/financial-organization/founders-fund). Thiel invested $500,000 into Facebook. \n\nWith millions more users, [Friendster](/company/friendster) [attempted](http://www.techcrunch.com/2006/12/12/yahoos-project-fraternity-docs-leaked/) to acquire the company for $10 million in mid 2004. Facebook turned down the offer and subsequently received $12.7 million in funding from [Accel Partners](http://www.crunchbase.com/financial-organization/accel-partners), at a valuation of [around $100 million](http://www.techcrunch.com/2005/09/07/85-of-college-students-use-facebook/). Facebook continued to grow, opening up to high school students in September 2005 and adding an immensely popular photo sharing feature the next month. The next spring, Facebook received $25 million in funding from [Greylock Partners](http://www.crunchbase.com/financial-organization/greylock) and [Meritech Capital](http://www.crunchbase.com/financial-organization/meritech-capital-partners), as well as previous investors Accel Partners and Peter Thiel. The pre-money valuation for this deal was about $525 million. Facebook subsequently [opened](http://www.techcrunch.com/2006/04/26/facebook-goes-beyond-college-high-school-markets/) up to work networks, eventually amassing over 20,000 work networks. Finally in September 2006, Facebook [opened](http://www.techcrunch.com/2006/09/26/facebook-just-launched-open-registrations/) to anyone with an email address. \n \nIn the summer of 2006, [Yahoo](/company/yahoo) [attempted to acquire](http://www.techcrunch.com/2006/09/21/facebook-and-yahoo-in-acquisition-talks-for-1-billion/) the company for $1 billion dollars. [Reports](http://www.wired.com/techbiz/startups/news/2007/09/ff_facebook) actually indicated that Zuckerberg made a verbal agreement to sell Facebook to Yahoo. A few days later when Yahoo's stock price took a dive, the offer was lowered to $800 million and Zuckerberg walked away from the deal. Yahoo later [offered](http://www.techcrunch.com/2006/12/12/yahoos-project-fraternity-docs-leaked/) $1 billion again, this time Zuckerberg turned Yahoo down and earned instant notoriety as the ""kid"" who turned down a billion. This was not the first time Zuckerberg turned down an acquisition offer; [Viacom](/company/viacom) had previously [unsuccessfully](http://www.techcrunch.com/2006/03/28/facebook-is-doing-the-skype-dance/) attempted to acquire the company for $750 million in March 2006. \n\nNot long after, in October 2007, [Microsoft invested](http://www.techcrunch.com/2007/10/24/facebook-takes-the-microsoft-money-and-runs/) $240 million into Facebook for 1.6 percent of the company in. This meant a valuation of over $15 billion, making Facebook the [5th most valuable US Internet company](http://www.techcrunch.com/2007/10/25/perspective-facebook-is-now-5th-most-valuable-us-internet-company/), yet with only $150 million in annual revenue. Many explained Microsoft's decision as being solely driven by the desire to outbid Google.\n\nFacebook continued to receive funding, most notably in January 2011 receiving a [$1.5 billion round](http://techcrunch.com/2011/01/21/facebook-raises-1-5-billion-at-50-billion-valuation/), valuing the company at $50 billion. A year later, in February 2012, Facebook [announced](http://techcrunch.com/2012/02/01/facebook-files-for-5-billion-ipo/) that it was filing for its long-anticipated initial public offering. The company went public on May 18, 2012, opening on the NASDAQ with shares trading at [$42.05](http://techcrunch.com/2012/05/18/facebook-share-open-10-5-higher-at-42/).\n\nOne sour note for Facebook has been the [controversy](http://www.techcrunch.com/2007/07/16/the-ghost-of-zuckerbergs-past-may-haunt-facebook-ipo/) with social network ConnectU. The founders of ConnectU, former classmates of Mark Zuckerberg at Harvard, allege that Zuckerberg stole their original source code for Facebook. The ordeal has [gone to court](http://www.techcrunch.com/2007/10/10/facebook-vs-connectu-facebook-makes-untrue-assertions-claims-connectu/), and has now been [resolved](http://techcrunch.com/2011/06/22/after-seven-years-the-winklevosses-give-up-on-battle-with-facebook/). \n\nNotwithstanding this lingering controversy, Facebook's growth has been staggering. Facebook announced [astonishing numbers](http://techcrunch.com/2012/02/01/facebooks-s-1-845-million-users-every-month-more-than-half-daily-and-nearly-half-mobile/) in February 2012 upon filing for its IPO. As of July 2013, over [1.15 billion](http://techcrunch.com/2013/07/24/facebook-growth-2/) users log into Facebook every month, and 669 million users log in daily. Mobile users now make up half of Facebook's user base, with 819 million monthly actives. Facebook has also announced that by the end of 2011 there had been 100 billion friend connections, and in recent months users had been registering 2.7 billion Likes and Comments per day. Facebook is one of the [most trafficked sites](http://techcrunch.com/2010/12/29/hitwise-facebook-overtakes-google-to-become-most-visited-website-in-2010/) in the US, and its international growth has been impressive. Additionally, Facebook is the top photo sharing site with 250 million photos uploaded per day.\n\nFacebook users' [passion](http://www.techcrunch.com/2007/11/13/i-just-cant-be-a-college-student-without-facebook/)or [addiction](http://www.techcrunch.com/2007/03/09/career-advice-dont-choose-facebook-over-your-job/)to the site is unparalleled: more than half use the product every single day and users spend an average of 19 minutes a day on Facebook. The site's popularity has garnered it pop culture fame, so much so that in 2010 a feature film entitled _The Social Network_ was released which chronicled Facebook's inception.\n\nIn its 2012 [IPO filing](http://techcrunch.com/2012/02/01/facebook-files-for-5-billion-ipo/), Facebook announced that it intends to grow in the near future by expanding its global user base, increasing engagement by developing new social tools, improving the mobile experience, and creating more value for advertisers and users.","facebook, college, students, profiles, network, online-communities, social-networking",USA,CA,Menlo Park,...,Software & Technology,sf bay area,sf bay area,CA,menlo park,social,0,0,847.0,c:5,104000000000.0,USD,18400000000.0,USD,2012-05-18,NASDAQ:FB,0.0,NASDAQ,0.0,104000000000.0,1.0,0.0,18400000000.0,1.0,1
4,c:7,7,,Palo Alto,CA,ISR,0.0,0.0,no office info,No Office Info,CA,c:7,Company,,omnidrive,network_hosting,closed,2005-11-01,2008-09-15,,"Currently in public beta, Omnidrive makes it easy to access, edit and share your files from any computer with a web browser. The site was founded in late 2004 by Nik Cubrilovic and later launched at the November 2006 Web 2.0 Conference. An interesting slew of features make Omnidrive an attractive choice in the online file storage field.\n\nOmnidrive's competitors include [Box.net](http://www.crunchbase.com/company/box-net), [Streamload](http://www.streamload.com) and [Xdrive](http://www.xdrive.com).\n\n","storage, sharing, edit, online",ISR,CA,Palo Alto,...,Software & Technology,sf bay area,sf bay area,CA,palo alto,network_hosting,0,0,,,,,,,,,,,,,,,,,0


In [47]:
print(df_off.shape)
print(df_obj.shape)
print(df_ipos.shape)
print("="*60)
print(off_obj.shape) 
print(off_obj_ipos.shape) #1개행

(112718, 11)
(462620, 45)
(1259, 16)
(112718, 56)
(112719, 73)


In [48]:
off_obj_ipos.to_csv("./data/join/ipo_addr_final.csv", encoding="utf-8", index=False)
print("="*60)
print("IPO(위치) ipo_addr csv 추출 완료!")
print("="*60)

IPO(위치) ipo_addr csv 추출 완료!


### 스타트업 IPO(이슈/마일스톤)
* milestones ↔ objects ↔ ipos

In [49]:
mile_obj_ipos = (
    df_mile
    .merge(df_obj, left_on="mile_cfpr_id", right_on="objects_cfpr_id", how="left")
    .merge(df_ipos, left_on="mile_cfpr_id", right_on="ipos_c_id", how="left")
)

mile_obj_ipos["is_ipo"] = mile_obj_ipos["ipo_id"].notna().astype(int)

ipos_rate_by_issue = (
    mile_obj_ipos
    .groupby("cat_mile_description")["is_ipo"]
    .mean()
    .reset_index(name="ipos_rate")
)

In [50]:
# 내용 확인
print(mile_obj_ipos.columns)
display(mile_obj_ipos.head())

Index(['Unnamed: 0', 'mile_cfpr_id', 'milestone_at', 'description_x',
       'cat_mile_description', 'objects_cfpr_id', 'entity_type', 'parent_c_id',
       'normalized_name', 'category_code', 'status', 'founded_at', 'closed_at',
       'description_y', 'overview', 'tag_list', 'country_code', 'state_code',
       'city', 'region', 'first_investment_at', 'last_investment_at',
       'investment_rounds', 'invested_companies', 'first_funding_at',
       'last_funding_at', 'funding_rounds', 'funding_total_usd',
       'first_milestone_at', 'last_milestone_at', 'milestones',
       'relationships', 'is_obj_parent_id_missing', 'is_obj_category_missing',
       'is_obj_founded_missing', 'is_obj_closed_missing',
       'is_obj_overview_missing', 'is_obj_state_missing',
       'is_obj_inv_rounds_missing', 'is_obj_inv_comp_missing',
       'cat_obj_status', 'obj_overview_fixed', 'cat_obj_overview',
       'obj_region_fixed', 'cat_obj_region', 'obj_state_filled',
       'obj_city_fixed', 'obj_cat

Unnamed: 0.1,Unnamed: 0,mile_cfpr_id,milestone_at,description_x,cat_mile_description,objects_cfpr_id,entity_type,parent_c_id,normalized_name,category_code,status,founded_at,closed_at,description_y,overview,tag_list,country_code,state_code,city,region,first_investment_at,last_investment_at,investment_rounds,invested_companies,first_funding_at,...,cat_obj_overview,obj_region_fixed,cat_obj_region,obj_state_filled,obj_city_fixed,obj_category_filled,is_obj_funding_total_usd_private,is_obj_funding_rounds_private,ipo_id,ipos_c_id,valuation_amount,valuation_currency_code,raised_amount,raised_currency_code,public_at,stock_symbol,is_ipos_public_at_missing,stock_normalized,is_ipos_valuation_private,valuation_amount_usd,valuation_currency_rate,is_ipos_raised_private,ipo_raised_amount_usd,ipo_raised_currency_rate,is_ipo
0,0,c:12,2008-06-09,Survives iPhone 3G Stevenote,survives,c:12,Company,,twitter,social,ipo,2006-03-21,,Real time communication platform,"Created in 2006, Twitter is a global real-time communications platform with 400 million monthly visitors to twitter.com, more than 200 million monthly active users around the world.\n\nWe see a billion tweets every 2.5 days on every conceivable topic. World leaders, major athletes, star performers, news organizations and entertainment outlets are among the millions of active Twitter accounts through which users can truly get the pulse of the planet.","text, messaging, social, community, twitter, tweet, twttr, microblog, sms",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2007-07-01,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,social,0.0,0.0,1310.0,c:12,18100000000.0,USD,1820000000.0,USD,2013-11-07,NYSE:TWTR,0.0,NYSE,0.0,18100000000.0,1.0,0.0,1820000000.0,1.0,1
1,1,c:3138,2008-06-17,Twhirl announces support for Seesmic video playback.,product_update,c:3138,Company,,twhirl,software,acquired,2007-11-12,,Twitter Desktop Client,"Twhirl is a free desktop client for the Twitter microblogging service that is powered by the cross-platform Adobe AIR. The application helps the user sort through incoming ""Tweets"" by color coding them and allowing timeline filtering and visual & auditory notifications.\n\nThere are many additional features that are not available on the standard Twitter site, such as favorites settings, URL shortening, customizable color schemes, multiple Twitter account connections, localization, and cross-posting capabilities to Pownce and Jaiku!","AIR, twitter, pownce, jaiku, microblogging",,,,unknown,,,0.0,0.0,,...,Software & Technology,,,,,software,1.0,1.0,,,,,,,,,,,,,,,,,0
2,2,c:59,2008-06-18,"More than 4 Billion videos viewed at Google Sites, 37.9% according to comScore",more,c:59,Company,,google,search,ipo,1998-09-07,,,"Google provides search and advertising services, which together aim to organize and monetize the world's information. In addition to its dominant search engine, it offers a plethora of online tools and platforms including: [Gmail](http://www.crunchbase.com/product/gmail), [Maps](http://www.crunchbase.com/product/google-maps), [YouTube](http://www.crunchbase.com/company/youtube), and [Google+](/product/google-plus), the company's extension into the social space. Most of its Web-based products are free, funded by Google's highly integrated online advertising platforms [AdWords](/product/google-adwords-2) and [AdSense](/product/google-adsense). Google promotes the idea that advertising should be highly targeted and relevant to users thus providing them with a rich source of information. \n\nGoogle has also made strong moves into the web-based apps space with acquisitions of [Jotspot](http://www.crunchbase.com/company/jotspot) (documents), [2Web Technologies](http://www.crunchbase.com/company/2web-technologies) (spreadsheets) and [Zenter](http://www.crunchbase.com/company/zenter) (presentations). The company has also made a splash with the acquisitions of [YouTube](http://www.crunchbase.com/company/youtube), [DoubleClick](http://www.crunchbase.com/company/doubleclick) and [Feedburner](http://www.crunchbase.com/company/feedburner). \n\n","search, gmail, blogger, find, information, youtube, google",USA,CA,Mountain View,SF Bay,2004-06-18,2013-05-01,29.0,26.0,1998-08-01,...,Software & Technology,sf bay area,sf bay area,CA,mountain view,search,0.0,0.0,4.0,c:59,,USD,,,2004-08-25,NASDAQ:GOOG,0.0,NASDAQ,1.0,,1.0,1.0,,,1
3,3,c:314,2008-06-18,Reddit goes Open Source,launch,c:314,Company,,reddit,web,acquired,2005-01-01,,Social news website,"Launched in 2005, Reddit is a social news website that displays news based on your personal preferences and what the community likes. Your preferences are determined based on your history of voting stories up or down.\n\nThe company was started by two University of Virginia grads, Alexis Ohanian and Steve Huffman in the Y Combinator program. Two others, Christopher Slowe and Aaron Swartz, later joined the team.\n\nConde Nast, owner of Wired and other magazines/websites, acquired Reddit in October of 2006. Reddit competitors include [Digg](http://www.crunchbase.com/company/digg), [Netscape](http://www.techcrunch.com/tag/netscape/) and [Newsvine](http://www.crunchbase.com/company/newsvine).\n\nIn February 2008, Reddit began experimenting with allowing users to create their own 'subreddits,' such as [shutupandtakemymoney](http://reddit.com/r/shutupandtakemymoney) and [todayilearned](http://reddit.com/r/todayilearned).\n\nIn September 2011, the company changed hands from Condé Nast to its parent company, Advanced Publications. As part of the new structure, reddit incorporated and now reports directly to a board, including Alexis Ohanian , Bob Sauerberg (President), Joe Simon (Chief Technology Officer) from Condé Nast, and Andrew Siegel (Senior Vice President, Strategy and Corporate Development) from Advance.","social, news, reddit, voting",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2005-06-01,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,web,0.0,0.0,,,,,,,,,,,,,,,,,0
4,4,c:314,2008-01-22,Adds the ability to create your own Reddits,product_update,c:314,Company,,reddit,web,acquired,2005-01-01,,Social news website,"Launched in 2005, Reddit is a social news website that displays news based on your personal preferences and what the community likes. Your preferences are determined based on your history of voting stories up or down.\n\nThe company was started by two University of Virginia grads, Alexis Ohanian and Steve Huffman in the Y Combinator program. Two others, Christopher Slowe and Aaron Swartz, later joined the team.\n\nConde Nast, owner of Wired and other magazines/websites, acquired Reddit in October of 2006. Reddit competitors include [Digg](http://www.crunchbase.com/company/digg), [Netscape](http://www.techcrunch.com/tag/netscape/) and [Newsvine](http://www.crunchbase.com/company/newsvine).\n\nIn February 2008, Reddit began experimenting with allowing users to create their own 'subreddits,' such as [shutupandtakemymoney](http://reddit.com/r/shutupandtakemymoney) and [todayilearned](http://reddit.com/r/todayilearned).\n\nIn September 2011, the company changed hands from Condé Nast to its parent company, Advanced Publications. As part of the new structure, reddit incorporated and now reports directly to a board, including Alexis Ohanian , Bob Sauerberg (President), Joe Simon (Chief Technology Officer) from Condé Nast, and Andrew Siegel (Senior Vice President, Strategy and Corporate Development) from Advance.","social, news, reddit, voting",USA,CA,San Francisco,SF Bay,,,0.0,0.0,2005-06-01,...,Software & Technology,sf bay area,sf bay area,CA,san francisco,web,0.0,0.0,,,,,,,,,,,,,,,,,0


In [51]:
print(df_mile.shape)
print(df_obj.shape)
print(df_ipos.shape)
print("="*60)
print(mile_obj.shape) #3개행
print(mile_obj_ipos.shape) #3개행

(39456, 5)
(462620, 45)
(1259, 16)
(39456, 50)
(39459, 67)


In [52]:
mile_obj_ipos.to_csv("./data/join/ipo_issues_final.csv", encoding="utf-8", index=False)
print("="*60)
print("IPO(이슈/마일스톤) ipo_issues csv 추출 완료!")
print("="*60)

IPO(이슈/마일스톤) ipo_issues csv 추출 완료!
