In [35]:

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [36]:
# Tüm sütunların görünmesini sağlar

pd.set_option('display.max_columns', None)

In [37]:
companies=pd.read_csv('Modified_Unicorn_Companies.csv')
companies.head()

Unnamed: 0,Company,Valuation,Date Joined,Industry,City,Country/Region,Continent,Year Founded,Funding,Select Investors
0,Bytedance,180,2017-04-07,Artificial intelligence,Beijing,China,Asia,2012,$8B,"Sequoia Capital China, SIG Asia Investments, S..."
1,SpaceX,100,2012-12-01,Other,Hawthorne,United States,North America,2002,$7B,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
2,SHEIN,100,2018-07-03,E-commerce & direct-to-consumer,Shenzhen,China,Asia,2008,$2B,"Tiger Global Management, Sequoia Capital China..."
3,Stripe,95,2014-01-23,FinTech,San Francisco,United States,North America,2010,$2B,"Khosla Ventures, LowercaseCapital, capitalG"
4,Klarna,46,2011-12-12,Fintech,Stockholm,Sweden,Europe,2005,$4B,"Institutional Venture Partners, Sequoia Capita..."


In [38]:
companies.dtypes

Unnamed: 0,0
Company,object
Valuation,int64
Date Joined,object
Industry,object
City,object
Country/Region,object
Continent,object
Year Founded,int64
Funding,object
Select Investors,object


In [39]:
# 'Date Joined' sütununu tarih formatına çevir

companies['Date Joined'] = pd.to_datetime(companies['Date Joined'])

In [40]:
# 'Date Joined' ve 'Year Founded' farkından şirketin unicorn olma süresini bul

companies['Years To Unicorn'] = companies['Date Joined'].dt.year - companies['Year Founded']

In [41]:
companies['Years To Unicorn'].describe()

Unnamed: 0,Years To Unicorn
count,1074.0
mean,7.013035
std,5.331842
min,-3.0
25%,4.0
50%,6.0
75%,9.0
max,98.0


In [42]:
# Negatif 'Years To Unicorn' değerine sahip (mantıksız) şirketleri listele
companies[companies['Years To Unicorn'] < 0]

Unnamed: 0,Company,Valuation,Date Joined,Industry,City,Country/Region,Continent,Year Founded,Funding,Select Investors,Years To Unicorn
527,InVision,2,2017-11-01,Internet software & services,New York,United States,North America,2020,$349M,"FirstMark Capital, Tiger Global Management, IC...",-3


In [43]:
# InVision şirketinin kuruluş yılını 2011 olarak düzelt

companies.loc[companies['Company']=='InVision', 'Year Founded'] = 2011

# InVision şirketine ait güncel veriyi görüntüle

companies[companies['Company']=='InVision']

Unnamed: 0,Company,Valuation,Date Joined,Industry,City,Country/Region,Continent,Year Founded,Funding,Select Investors,Years To Unicorn
527,InVision,2,2017-11-01,Internet software & services,New York,United States,North America,2011,$349M,"FirstMark Capital, Tiger Global Management, IC...",-3


In [44]:
# Unicorn olma süresini yeniden hesapla

companies['Years To Unicorn'] = companies['Date Joined'].dt.year - companies['Year Founded']

# Unicorn olma süresine ait temel istatistikleri görüntüle

companies['Years To Unicorn'].describe()

Unnamed: 0,Years To Unicorn
count,1074.0
mean,7.021415
std,5.323155
min,0.0
25%,4.0
50%,6.0
75%,9.0
max,98.0


In [45]:
# Sektör kategorilerini içeren listeyi oluştur

industry_list = ['Artificial intelligence', 'Other','E-commerce & direct-to-consumer', 'Fintech',\
       'Internet software & services','Supply chain, logistics, & delivery', 'Consumer & retail',\
       'Data management & analytics', 'Edtech', 'Health', 'Hardware','Auto & transportation', \
        'Travel', 'Cybersecurity','Mobile & telecommunications']

In [46]:
# Şirketlerin faaliyet gösterdiği sektörleri içeren bir liste oluştur

set(companies['Industry']) - set(industry_list)

{'Artificial Intelligence', 'Data management and analytics', 'FinTech'}

In [47]:
# 'Industry' sütunundaki sektör isimlerini düzelten bir sözlük ile değiştir

replacement_dict = {'Artificial Intelligence': 'Artificial intelligence',
                   'Data management and analytics': 'Data management & analytics',
                   'FinTech': 'Fintech'
                   }

# 'Industry' sütunundaki sektörlerden, industry_list'te olmayanları bul

companies['Industry'] = companies['Industry'].replace(replacement_dict)
set(companies['Industry']) - set(industry_list)

set()

In [48]:
# 'Company' sütununda tekrarlanan (duplicate) şirketleri göster

companies[companies.duplicated(subset=['Company'],keep=False)]

Unnamed: 0,Company,Valuation,Date Joined,Industry,City,Country/Region,Continent,Year Founded,Funding,Select Investors,Years To Unicorn
385,BrewDog,2,2017-04-10,Consumer & retail,Aberdeen,United Kingdom,Europe,2007,$233M,"TSG Consumer Partners, Crowdcube",10
386,BrewDog,2,2017-04-10,Consumer & retail,Aberdeen,UnitedKingdom,Europe,2007,$233M,TSG Consumer Partners,10
510,ZocDoc,2,2015-08-20,Health,New York,United States,North America,2007,$374M,"Founders Fund, Khosla Ventures, Goldman Sachs",8
511,ZocDoc,2,2015-08-20,Health,,United States,North America,2007,$374M,Founders Fund,8
1031,SoundHound,1,2018-05-03,Artificial intelligence,Santa Clara,United States,North America,2005,$215M,"Tencent Holdings, Walden Venture Capital, Glob...",13
1032,SoundHound,1,2018-05-03,Other,Santa Clara,United States,North America,2005,$215M,Tencent Holdings,13


In [49]:
# 'Company' sütunundaki tekrarlanan şirketleri sil, ilk kaydı tut

companies=companies.drop_duplicates(subset=['Company'], keep='first')

In [50]:
# Şirketleri 'Valuation' değerine göre iki gruba ayır, 'low' ve 'high' olarak etiketle

companies['High Valuation'] = pd.qcut(companies['Valuation'], 2, labels = ['low', 'high'])

In [51]:
# Şirketlerin hangi kıtalarda yer aldığını sayarak dağılımı göster

companies['Continent'].value_counts()

Unnamed: 0_level_0,count
Continent,Unnamed: 1_level_1
North America,586
Asia,310
Europe,143
South America,21
Oceania,8
Africa,3


In [52]:
# 'Continent' sütunundaki kıtaları sayısal değerlere dönüştür

continent_dict =  {'North America': 1,
                  'Asia': 2,
                  'Europe': 3,
                  'South America': 4,
                  'Oceania': 5,
                  'Africa': 6
                 }
# İlk 5 satırı göstererek 'Continent Number' sütununu kontrol et

companies['Continent Number'] = companies['Continent'].replace(continent_dict)
companies.head()

  companies['Continent Number'] = companies['Continent'].replace(continent_dict)


Unnamed: 0,Company,Valuation,Date Joined,Industry,City,Country/Region,Continent,Year Founded,Funding,Select Investors,Years To Unicorn,High Valuation,Continent Number
0,Bytedance,180,2017-04-07,Artificial intelligence,Beijing,China,Asia,2012,$8B,"Sequoia Capital China, SIG Asia Investments, S...",5,high,2
1,SpaceX,100,2012-12-01,Other,Hawthorne,United States,North America,2002,$7B,"Founders Fund, Draper Fisher Jurvetson, Rothen...",10,high,1
2,SHEIN,100,2018-07-03,E-commerce & direct-to-consumer,Shenzhen,China,Asia,2008,$2B,"Tiger Global Management, Sequoia Capital China...",10,high,2
3,Stripe,95,2014-01-23,Fintech,San Francisco,United States,North America,2010,$2B,"Khosla Ventures, LowercaseCapital, capitalG",4,high,1
4,Klarna,46,2011-12-12,Fintech,Stockholm,Sweden,Europe,2005,$4B,"Institutional Venture Partners, Sequoia Capita...",6,high,3


In [53]:
# 'Country/Region' sütunundaki kategorik veriyi sayısal değerlere dönüştür

companies['Country/Region Numeric'] = companies['Country/Region'].astype('category').cat.codes

In [54]:
# 'Industry' sütunundaki kategorik veriyi one-hot encoding ile sayısal verilere dönüştür

industry_encoded = pd.get_dummies(companies['Industry'])

# 'Industry' için oluşturulan one-hot encoding sütunlarını orijinal veri setine ekle

companies = pd.concat([companies, industry_encoded], axis=1)

In [55]:
companies.head()

Unnamed: 0,Company,Valuation,Date Joined,Industry,City,Country/Region,Continent,Year Founded,Funding,Select Investors,Years To Unicorn,High Valuation,Continent Number,Country/Region Numeric,Artificial intelligence,Auto & transportation,Consumer & retail,Cybersecurity,Data management & analytics,E-commerce & direct-to-consumer,Edtech,Fintech,Hardware,Health,Internet software & services,Mobile & telecommunications,Other,"Supply chain, logistics, & delivery",Travel
0,Bytedance,180,2017-04-07,Artificial intelligence,Beijing,China,Asia,2012,$8B,"Sequoia Capital China, SIG Asia Investments, S...",5,high,2,9,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,SpaceX,100,2012-12-01,Other,Hawthorne,United States,North America,2002,$7B,"Founders Fund, Draper Fisher Jurvetson, Rothen...",10,high,1,44,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False
2,SHEIN,100,2018-07-03,E-commerce & direct-to-consumer,Shenzhen,China,Asia,2008,$2B,"Tiger Global Management, Sequoia Capital China...",10,high,2,9,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
3,Stripe,95,2014-01-23,Fintech,San Francisco,United States,North America,2010,$2B,"Khosla Ventures, LowercaseCapital, capitalG",4,high,1,44,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False
4,Klarna,46,2011-12-12,Fintech,Stockholm,Sweden,Europe,2005,$4B,"Institutional Venture Partners, Sequoia Capita...",6,high,3,38,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False
