In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

# reading data files
# using encoding = "ISO-8859-1" to avoid pandas encoding error
rounds = pd.read_csv("./rounds2.csv", encoding = "ISO-8859-1")
companies = pd.read_csv("./companies.txt", sep="\t", encoding = "ISO-8859-1")


In [2]:
rounds.head()

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10000000.0
1,/ORGANIZATION/-QOUNTER,/funding-round/22dacff496eb7acb2b901dec1dfe5633,venture,A,14-10-2014,
2,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,700000.0
3,/ORGANIZATION/-THE-ONE-OF-THEM-INC-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3406878.0
4,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2000000.0


In [3]:
companies.head()

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at
0,/Organization/-Fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,
1,/Organization/-Qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014
2,/Organization/-The-One-Of-Them-Inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,
3,/Organization/0-6-Com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007
4,/Organization/004-Technologies,004 Technologies,http://004gmbh.de/en/004-interact,Software,operating,USA,IL,"Springfield, Illinois",Champaign,01-01-2010


# Data Cleaning

### We can see that permalink of Companies data frame and Companies_permalink of rounds dataframe are different although they are same
### Let's convert them to lower case

In [4]:
companies["permalink"]=companies["permalink"].str.lower()
rounds["company_permalink"]=rounds["company_permalink"].str.lower()

In [5]:
len(companies["permalink"].unique())

66368

In [6]:
len(rounds["company_permalink"].unique())

66370

Appears like 2 extra companies are there in rounds file which received investment while not listed in companies file let's check them out 

In [7]:
rounds.loc[~rounds["company_permalink"].isin(companies["permalink"])]

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd
29597,/organization/e-cãbica,/funding-round/8491f74869e4fe8ba9c378394f8fbdea,seed,,01-02-2015,
31863,/organization/energystone-games-çµç³æ¸¸æ,/funding-round/b89553f3d2279c5683ae93f45a21cfe0,seed,,09-08-2014,
45176,/organization/huizuche-com-æ ç§ÿè½¦,/funding-round/8f8a32dbeeb0f831a78702f83af78a36,seed,,18-09-2014,
58473,/organization/magnet-tech-ç£ç³ç§æ,/funding-round/8fc91fbb32bc95e97f151dd0cb4166bf,seed,,16-08-2014,1625585.0
101036,/organization/tipcat-interactive-æ²èÿä¿¡æ¯ç...,/funding-round/41005928a1439cb2d706a43cb661f60f,seed,,06-09-2010,
109969,/organization/weiche-tech-åè½¦ç§æ,/funding-round/f74e457f838b81fa0b29649740f186d8,venture,A,06-09-2015,
113839,/organization/zengame-ç¦æ¸¸ç§æ,/funding-round/6ba28fb4f3eadf5a9c6c81bc5dde6cdf,seed,,17-07-2010,


Seems like problemiswith encoding.By converting into english

In [8]:
rounds['company_permalink'] = rounds.company_permalink.str.encode('utf-8').str.decode('ascii', 'ignore')
rounds.loc[~rounds['company_permalink'].isin(companies['permalink']), :]

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd
77,/organization/10north,/funding-round/b41ff7de932f8b6e5bbeed3966c0ed6a,equity_crowdfunding,,12-08-2014,
729,/organization/51wofang-,/funding-round/346b9180d276a74e0fbb2825e66c6f5b,venture,A,06-07-2015,5000000.0
2670,/organization/adslinked,/funding-round/449ae54bb63c768c232955ca6911dee4,seed,,29-09-2014,100000.0
3166,/organization/aesthetic-everything-social-network,/funding-round/62593455f1a69857ed05d5734cc04132,equity_crowdfunding,,12-10-2014,
3291,/organization/affluent-attach-club-2,/funding-round/626678bdf1654bc4df9b1b34647a4df1,seed,,15-10-2014,100000.0
4568,/organization/allgu-outlet,/funding-round/49e8a9b54ed19c8505ca92dc031a8e9c,venture,,17-03-2015,1400000.0
8097,/organization/asiansbook,/funding-round/3f243ab92b4fe397d41b4734a17ca5f0,seed,,15-09-2014,100000.0
8652,/organization/atlye-gri,/funding-round/75bdeacd95a647108aa4bc480e77894d,grant,,01-11-2012,13940.0
9784,/organization/axgaz,/funding-round/511a41181aaf193bbd419babfb8d66e9,venture,,06-07-2015,4094392.0
14311,/organization/boral-bikes-incorporated,/funding-round/be79575bf4b5b5d6fa64670800a3ca5e,seed,,26-02-2015,


In [9]:
companies['permalink'] = companies.permalink.str.encode('utf-8').str.decode('ascii', 'ignore')


In [10]:
print(len(companies["permalink"].unique()))
print(len(rounds["company_permalink"].unique()))

66368
66368


In [11]:
rounds.to_csv("rounds_new.csv", sep=',', index=False)

# write companies file
companies.to_csv("companies_new.csv", sep='\t', index=False)

In [12]:
rounds = pd.read_csv("rounds_new.csv", encoding = "ISO-8859-1")
companies = pd.read_csv("companies_new.csv", sep="\t", encoding = "ISO-8859-1")

# Missing Value Treatment,Merging and selecting appropriate columns

In [13]:
# missing values in companies df
(companies.isnull().sum()/len(companies.index))*100

permalink         0.000000
name              0.001507
homepage_url      7.621143
category_list     4.743250
status            0.000000
country_code     10.483968
state_code       12.878194
region           12.099204
city             12.096191
founded_at       22.934245
dtype: float64

In [14]:
(rounds.isnull().sum()/len(rounds.index))*100

company_permalink           0.000000
funding_round_permalink     0.000000
funding_round_type          0.000000
funding_round_code         72.909725
funded_at                   0.000000
raised_amount_usd          17.390321
dtype: float64

In [15]:
master = pd.merge(companies, rounds, how="inner", left_on="permalink", right_on="company_permalink")
master.head()

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd
0,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10000000.0
1,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014,/organization/-qounter,/funding-round/22dacff496eb7acb2b901dec1dfe5633,venture,A,14-10-2014,
2,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,700000.0
3,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3406878.0
4,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2000000.0


In [16]:
master =  master.drop(['company_permalink'], axis=1) 

In [17]:
(master.isnull().sum()/len(master.index))*100

permalink                   0.000000
name                        0.000870
homepage_url                5.336280
category_list               2.966533
status                      0.000000
country_code                7.549435
state_code                  9.522484
region                      8.844792
city                        8.842182
founded_at                 17.852265
funding_round_permalink     0.000000
funding_round_type          0.000000
funding_round_code         72.909725
funded_at                   0.000000
raised_amount_usd          17.390321
dtype: float64

In [18]:
master =  master.drop(['funding_round_code'], axis=1) 

In [19]:
master = master.drop(['homepage_url', 'founded_at', 'state_code', 'region', 'city'], axis=1)
master.head()

Unnamed: 0,permalink,name,category_list,status,country_code,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd
0,/organization/-fame,#fame,Media,operating,IND,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.0
1,/organization/-qounter,:Qounter,Application Platforms|Real Time|Social Network...,operating,USA,/funding-round/22dacff496eb7acb2b901dec1dfe5633,venture,14-10-2014,
2,/organization/-qounter,:Qounter,Application Platforms|Real Time|Social Network...,operating,USA,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,01-03-2014,700000.0
3,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",Apps|Games|Mobile,operating,,/funding-round/650b8f704416801069bb178a1418776b,venture,30-01-2014,3406878.0
4,/organization/0-6-com,0-6.com,Curated Web,operating,CHN,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,19-03-2008,2000000.0


In [20]:
master.shape

(114949, 9)

In [21]:
master['raised_amount_usd'].describe()

count    9.495900e+04
mean     1.042687e+07
std      1.148212e+08
min      0.000000e+00
25%      3.225000e+05
50%      1.680511e+06
75%      7.000000e+06
max      2.127194e+10
Name: raised_amount_usd, dtype: float64

In [22]:
master =master[~np.isnan(master['raised_amount_usd'])]
(master.isnull().sum()/len(master.index))*100

permalink                  0.000000
name                       0.001053
category_list              1.099422
status                     0.000000
country_code               6.161607
funding_round_permalink    0.000000
funding_round_type         0.000000
funded_at                  0.000000
raised_amount_usd          0.000000
dtype: float64

In [23]:
country_codes = master['country_code'].astype('category')

# displaying frequencies of each category
100*(master['country_code'].value_counts()/len(master.index))

USA    65.342937
GBR     5.285439
CAN     2.754873
CHN     2.029297
IND     1.736539
FRA     1.528028
ISR     1.436409
ESP     1.131014
DEU     1.097316
AUS     0.683453
RUS     0.619215
IRL     0.592887
SWE     0.589728
SGP     0.574985
NLD     0.560242
JPN     0.510747
BRA     0.508641
ITA     0.508641
CHE     0.460199
KOR     0.454933
CHL     0.454933
FIN     0.402279
DNK     0.330669
ARG     0.312767
BEL     0.308554
HKG     0.263272
TUR     0.206405
NOR     0.201139
BGR     0.200086
MEX     0.199033
         ...    
MCO     0.002106
ZMB     0.002106
TUN     0.002106
BLZ     0.002106
BAH     0.002106
KWT     0.002106
CIV     0.002106
ALB     0.002106
DOM     0.002106
SOM     0.001053
MNE     0.001053
LAO     0.001053
TGO     0.001053
UZB     0.001053
KNA     0.001053
GRD     0.001053
HND     0.001053
QAT     0.001053
DZA     0.001053
OMN     0.001053
SYC     0.001053
GGY     0.001053
MKD     0.001053
DMA     0.001053
PSE     0.001053
IRN     0.001053
SEN     0.001053
PRY     0.0010

In [24]:
master = master[~pd.isnull(master['country_code'])]

In [25]:
(master.isnull().sum()/len(master.index))*100

permalink                  0.000000
name                       0.001122
category_list              0.649773
status                     0.000000
country_code               0.000000
funding_round_permalink    0.000000
funding_round_type         0.000000
funded_at                  0.000000
raised_amount_usd          0.000000
dtype: float64

In [26]:
# writing the clean dataframe to an another file
master.to_csv("master_df.csv", sep=',', index=False)

In [27]:
df = pd.read_csv("master_df.csv", sep=",", encoding="ISO-8859-1")
df.head()

Unnamed: 0,permalink,name,category_list,status,country_code,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd
0,/organization/-fame,#fame,Media,operating,IND,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.0
1,/organization/-qounter,:Qounter,Application Platforms|Real Time|Social Network...,operating,USA,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,01-03-2014,700000.0
2,/organization/0-6-com,0-6.com,Curated Web,operating,CHN,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,19-03-2008,2000000.0
3,/organization/01games-technology,01Games Technology,Games,operating,HKG,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,01-07-2014,41250.0
4,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,Biotechnology,operating,CAN,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,11-09-2009,43360.0


In [28]:
df["funding_round_type"].value_counts()

venture                  48111
seed                     21233
debt_financing            6533
angel                     4427
grant                     1979
private_equity            1835
undisclosed               1352
convertible_note          1326
equity_crowdfunding       1136
post_ipo_equity            601
product_crowdfunding       332
post_ipo_debt              151
non_equity_assistance       63
secondary_market            29
Name: funding_round_type, dtype: int64

## There are number of funding types but we need only "Seed","Private equity", "angel", "venture"

In [29]:
df = df[(df["funding_round_type"]=="seed")|
        (df["funding_round_type"]=="venture") |
        (df["funding_round_type"] == "angel") |
        (df["funding_round_type"] == "private_equity")]

In [30]:
df.groupby("funding_round_type")["raised_amount_usd"].mean()/1000000


funding_round_type
angel              0.968560
private_equity    73.618564
seed               0.748104
venture           11.735780
Name: raised_amount_usd, dtype: float64

Let's go with median  since they are most occuring values and due to it's robust ness

In [31]:
df.groupby("funding_round_type")["raised_amount_usd"].median()/1000000

funding_round_type
angel              0.410737
private_equity    20.000000
seed               0.300000
venture            5.000000
Name: raised_amount_usd, dtype: float64

### We can see that "Private Equity" is beyond while "Angel" and " Seed" are below range. So "Venture" type of funding is more suitable

## Country Wise Analysis

In [32]:
df = df[df.funding_round_type=="venture"]

# group by country codes and compare the total funding amounts
country_wise = df.groupby('country_code')['raised_amount_usd'].sum().sort_values(ascending=False)
print(country_wise)

country_code
USA    4.225108e+11
CHN    3.983542e+10
GBR    2.024563e+10
IND    1.439186e+10
CAN    9.583332e+09
FRA    7.259537e+09
ISR    6.907515e+09
DEU    6.346960e+09
JPN    3.363677e+09
SWE    3.254953e+09
NLD    2.939404e+09
CHE    2.827560e+09
SGP    2.793918e+09
ESP    1.835831e+09
BRA    1.785987e+09
IRL    1.676131e+09
RUS    1.570426e+09
AUS    1.322935e+09
DNK    1.228311e+09
BEL    1.070542e+09
FIN    1.043200e+09
NOR    9.561924e+08
KOR    9.399883e+08
MYS    8.830588e+08
HKG    7.812670e+08
AUT    6.309550e+08
TWN    6.239795e+08
TUR    5.590975e+08
ITA    4.882894e+08
NZL    4.483164e+08
           ...     
KWT    1.400000e+07
LIE    1.309172e+07
MNE    1.220000e+07
SVN    1.201751e+07
BGR    1.130000e+07
KAZ    1.100000e+07
GRC    1.074378e+07
BAH    8.900000e+06
TTO    8.500000e+06
SVK    8.241062e+06
BGD    7.002000e+06
LBN    6.455000e+06
GGY    3.960000e+06
TUN    3.920000e+06
SEN    2.860000e+06
HRV    2.633669e+06
UGA    2.500000e+06
PER    2.469270e+06
BWA    

In [33]:
top9 = country_wise[0:10]
top9

country_code
USA    4.225108e+11
CHN    3.983542e+10
GBR    2.024563e+10
IND    1.439186e+10
CAN    9.583332e+09
FRA    7.259537e+09
ISR    6.907515e+09
DEU    6.346960e+09
JPN    3.363677e+09
SWE    3.254953e+09
Name: raised_amount_usd, dtype: float64

#### Top3 English Speaking Countries : "USA","GBR","IND"

In [34]:
top3_eng = top9.loc[["USA","GBR","IND"]]
top3_eng.sort_values(ascending=False)

country_code
USA    4.225108e+11
GBR    2.024563e+10
IND    1.439186e+10
Name: raised_amount_usd, dtype: float64

In [35]:
df = df[(df.country_code=='USA') | (df.country_code=='GBR') | (df.country_code=='IND')]
df.head()

Unnamed: 0,permalink,name,category_list,status,country_code,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd
0,/organization/-fame,#fame,Media,operating,IND,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.0
7,/organization/0xdata,H2O.ai,Analytics,operating,USA,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,09-11-2015,20000000.0
8,/organization/0xdata,H2O.ai,Analytics,operating,USA,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,03-01-2013,1700000.0
9,/organization/0xdata,H2O.ai,Analytics,operating,USA,/funding-round/e1cfcbe1bdf4c70277c5f29a3482f24e,venture,19-07-2014,8900000.0
15,/organization/1-mainstream,1 Mainstream,Apps|Cable|Distribution|Software,acquired,USA,/funding-round/b952cbaf401f310927430c97b68162ea,venture,17-03-2015,5000000.0


In [36]:
df["category_list"]=df["category_list"].to_string()

In [37]:
df["main_category"]=df["category_list"].apply(lambda x : x.split("|")[0])

In [38]:
df = df.drop('category_list', axis=1)
df.head()

Unnamed: 0,permalink,name,status,country_code,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd,main_category
0,/organization/-fame,#fame,operating,IND,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.0,0 ...
7,/organization/0xdata,H2O.ai,operating,USA,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,09-11-2015,20000000.0,0 ...
8,/organization/0xdata,H2O.ai,operating,USA,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,03-01-2013,1700000.0,0 ...
9,/organization/0xdata,H2O.ai,operating,USA,/funding-round/e1cfcbe1bdf4c70277c5f29a3482f24e,venture,19-07-2014,8900000.0,0 ...
15,/organization/1-mainstream,1 Mainstream,acquired,USA,/funding-round/b952cbaf401f310927430c97b68162ea,venture,17-03-2015,5000000.0,0 ...


In [39]:
mapping = pd.read_csv("mapping.csv", sep=",")
mapping.head()

Unnamed: 0,category_list,Automotive & Sports,Blanks,Cleantech / Semiconductors,Entertainment,Health,Manufacturing,"News, Search and Messaging",Others,"Social, Finance, Analytics, Advertising"
0,,0,1,0,0,0,0,0,0,0
1,3D,0,0,0,0,0,1,0,0,0
2,3D Printing,0,0,0,0,0,1,0,0,0
3,3D Technology,0,0,0,0,0,1,0,0,0
4,Accounting,0,0,0,0,0,0,0,0,1


In [40]:
mapping.isnull().sum()

category_list                              1
Automotive & Sports                        0
Blanks                                     0
Cleantech / Semiconductors                 0
Entertainment                              0
Health                                     0
Manufacturing                              0
News, Search and Messaging                 0
Others                                     0
Social, Finance, Analytics, Advertising    0
dtype: int64

In [41]:
mapping = mapping[~pd.isnull(mapping["category_list"])]
mapping.isnull().sum()

category_list                              0
Automotive & Sports                        0
Blanks                                     0
Cleantech / Semiconductors                 0
Entertainment                              0
Health                                     0
Manufacturing                              0
News, Search and Messaging                 0
Others                                     0
Social, Finance, Analytics, Advertising    0
dtype: int64

In [42]:
mapping['category_list'] = mapping['category_list'].str.lower()
df['main_category'] = df['main_category'].str.lower()

In [43]:
mapping['category_list']

1                                      3d
2                             3d printing
3                           3d technology
4                              accounting
5                        active lifestyle
6                            ad targeting
7                      advanced materials
8                        adventure travel
9                             advertising
10                  advertising exchanges
11                   advertising networks
12                  advertising platforms
13                                 advice
14                              aerospace
15                            agriculture
16                  air pollution control
17                             algorithms
18                            all markets
19                           all students
20                    alter0tive medicine
21                                 alumni
22                               a0lytics
23                                android
24                                

In [44]:
df[~df['main_category'].isin(mapping['category_list'])]


Unnamed: 0,permalink,name,status,country_code,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd,main_category
0,/organization/-fame,#fame,operating,IND,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.0,0 ...
7,/organization/0xdata,H2O.ai,operating,USA,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,09-11-2015,20000000.0,0 ...
8,/organization/0xdata,H2O.ai,operating,USA,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,03-01-2013,1700000.0,0 ...
9,/organization/0xdata,H2O.ai,operating,USA,/funding-round/e1cfcbe1bdf4c70277c5f29a3482f24e,venture,19-07-2014,8900000.0,0 ...
15,/organization/1-mainstream,1 Mainstream,acquired,USA,/funding-round/b952cbaf401f310927430c97b68162ea,venture,17-03-2015,5000000.0,0 ...
21,/organization/10-minutes-with,10 Minutes With,operating,GBR,/funding-round/0faccbbcc5818dc5326469f13f5a8ac8,venture,09-10-2014,4000000.0,0 ...
25,/organization/1000memories,1000memories,acquired,USA,/funding-round/502bd0e50c27616995e4bdad24605ef8,venture,16-02-2011,2520000.0,0 ...
29,/organization/1000museums-com,1000museums.com,operating,USA,/funding-round/13be128d655076a025221d7fddc90d68,venture,14-10-2009,265940.0,0 ...
30,/organization/1000museums-com,1000museums.com,operating,USA,/funding-round/6aeb32ab3fc05db2b606d7a2466e8c09,venture,15-01-2014,2906832.0,0 ...
31,/organization/1000museums-com,1000museums.com,operating,USA,/funding-round/89317984885b44f02e4befb9dc2e588c,venture,01-08-2014,1500000.0,0 ...


In [45]:
mapping[~mapping['category_list'].isin(df['main_category'])]

Unnamed: 0,category_list,Automotive & Sports,Blanks,Cleantech / Semiconductors,Entertainment,Health,Manufacturing,"News, Search and Messaging",Others,"Social, Finance, Analytics, Advertising"
1,3d,0,0,0,0,0,1,0,0,0
2,3d printing,0,0,0,0,0,1,0,0,0
3,3d technology,0,0,0,0,0,1,0,0,0
4,accounting,0,0,0,0,0,0,0,0,1
5,active lifestyle,0,0,0,0,1,0,0,0,0
6,ad targeting,0,0,0,0,0,0,0,0,1
7,advanced materials,0,0,0,0,0,1,0,0,0
8,adventure travel,1,0,0,0,0,0,0,0,0
9,advertising,0,0,0,0,0,0,0,0,1
10,advertising exchanges,0,0,0,0,0,0,0,0,1


In [54]:
mapping['category_list'] = mapping['category_list'].apply(lambda x: x.replace('0', 'na'))

In [55]:
df = pd.merge(df, mapping, how='inner', left_on='main_category', right_on='category_list')

In [56]:
df.shape

(0, 29)