Pipeline for Extracting Keywords from the dataset

In [34]:
import pandas as pd
import spacy
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
#import numpy as np
import pickle 
import matplotlib.pyplot as plt

In [2]:
nlp = spacy.load("en_core_web_sm")

path = "/Users/ethanvirtudazo/Desktop/DS105_Dataset/6622_jobs.xls"
df_import = pd.read_excel(path)

df_import.shape

(6623, 11)

DATA WRANGLING

In [3]:
df_import.head(2)

Unnamed: 0.1,Unnamed: 0,title,details,deadline,opport_type,commence_date,contract_type,location,Renumeration,company,links
0,0,Rothschild & Co - Private Equity Long-Term Int...,This London-based 6-month internship is an exc...,2023-04-30,Internship,2023-07-01 00:00:00,Temporary,London,,Rothschild & Co,https://careers.lse.ac.uk//students/jobs/detai...
1,1,2023 HSBC Global Graduate Programme (Hong Kong...,You’re excited about starting your career and ...,2023-01-06,Graduate employment,2023-07-03 00:00:00,Temporary,Hong KongSingapore,,HSBC (HSBC) - Hong Kong,https://careers.lse.ac.uk//students/jobs/detai...


In [9]:
df1 = df_import.iloc[:, 1:5]

df2 = df_import.iloc[:, [7,9,10]]

pdList = [df1,df2]
df = pd.concat(pdList,axis=1)
df.head()

Unnamed: 0,title,details,deadline,opport_type,location,company,links
0,Rothschild & Co - Private Equity Long-Term Int...,This London-based 6-month internship is an exc...,2023-04-30,Internship,London,Rothschild & Co,https://careers.lse.ac.uk//students/jobs/detai...
1,2023 HSBC Global Graduate Programme (Hong Kong...,You’re excited about starting your career and ...,2023-01-06,Graduate employment,Hong KongSingapore,HSBC (HSBC) - Hong Kong,https://careers.lse.ac.uk//students/jobs/detai...
2,2023 HSBC Global Internship Programme (Hong Ko...,You’re excited about starting your career and ...,2023-01-06,Internship,Hong KongSingapore,HSBC (HSBC) - Hong Kong,https://careers.lse.ac.uk//students/jobs/detai...
3,"Graduate Training Scheme, Capital Markets",Graduate Training Scheme – LondonGreySpark Par...,2022-12-17,Graduate employment,London,GreySpark Partners Ltd,https://careers.lse.ac.uk//students/jobs/detai...
4,6-Months Internship – Sell-side Tech M&A,"At IPTP, we understand software from decades o...",2022-12-31,Internship,France,Inflexion Points Technology Partners (IPTP),https://careers.lse.ac.uk//students/jobs/detai...


In [11]:
df.shape

types = df.dtypes
print(types)

df_title = df.iloc[:,0]
df_text = df.iloc[:, [1]]

df_dline = df.iloc[:, [2]]


title                  object
details                object
deadline       datetime64[ns]
opport_type            object
location               object
company                object
links                  object
dtype: object


In [12]:
df_text.head()

Unnamed: 0,details
0,This London-based 6-month internship is an exc...
1,You’re excited about starting your career and ...
2,You’re excited about starting your career and ...
3,Graduate Training Scheme – LondonGreySpark Par...
4,"At IPTP, we understand software from decades o..."


In [13]:
df_dline.head()

Unnamed: 0,deadline
0,2023-04-30
1,2023-01-06
2,2023-01-06
3,2022-12-17
4,2022-12-31


In [14]:
type(df_text)

pandas.core.frame.DataFrame

In [15]:
len(df_text)

6623

PIPELINE PART 1: NLP() 

In [18]:
len(df_text)

6623

In [20]:
# convering the column of texts into a list of text
lst_lst = df_text.values.tolist()

# storing the data type of every element in the list of texts (which contains all of the job details)
# we expect the text data to be a 'str' object
# non_str_index tells us which indexes include non-string values and therefore should be removed
non_str_index = [i for i, sublst in enumerate(lst_lst) if any(not isinstance(val, str) for val in sublst)]
non_str_index

df_text_filtered = df_text.drop(non_str_index, axis=0, inplace=False) 
len(df_text_filtered)

6620

In [23]:
df_text_filtered_1 = df_text_filtered[0:1000]
df_text_filtered_2 = df_text_filtered[1000:2000]
df_text_filtered_3 = df_text_filtered[2000:3000]
df_text_filtered_4 = df_text_filtered[3000:4000]
df_text_filtered_5 = df_text_filtered[4000:5000]
df_text_filtered_6 = df_text_filtered[5000:6000]
df_text_filtered_7 = df_text_filtered[6000:]

In [32]:
docs_1 = df_text_filtered_1['details'].apply(nlp)

In [33]:
type(docs_1)

pandas.core.series.Series

In [35]:
docs_2 = df_text_filtered_2['details'].apply(nlp)
len(docs_2)

1000

In [36]:
docs_3 = df_text_filtered_3['details'].apply(nlp)
len(docs_3)

1000

In [37]:
docs_4 = df_text_filtered_4['details'].apply(nlp)
len(docs_4)

1000

In [38]:
docs_5 = df_text_filtered_5['details'].apply(nlp)
len(docs_5)

1000

In [39]:
docs_6 = df_text_filtered_6['details'].apply(nlp)
len(docs_6)

1000

In [40]:
docs_7 = df_text_filtered_7['details'].apply(nlp)
len(docs_7)

620

In [41]:
docs_all = pd.concat([docs_1,docs_2,docs_3,docs_4,docs_5,docs_6,docs_7],
ignore_index = True,
sort = False)

docs_all.shape

(6620,)

In [43]:
len(docs_all)

6620

In [42]:
docs_all.head()

0    (This, London, -, based, 6, -, month, internsh...
1    (You, ’re, excited, about, starting, your, car...
2    (You, ’re, excited, about, starting, your, car...
3    (Graduate, Training, Scheme, –, LondonGreySpar...
4    (At, IPTP, ,, we, understand, software, from, ...
Name: details, dtype: object

In [None]:
# Optional!

# use this to store the list of documents locally (running nlp() again takes time)

#with open("all_spacy_docs.pickle", "wb") as f:
#    pickle.dump(all_docs, f)

# requires GitHub LFS as the resulting pickle object is larger than 100MB

In [None]:
# use this code to open the 
#with open("all_spacy_docs.pickle", "rb") as f:
#    loaded_all_docs = pickle.load(f)

PIPELINE PART 2: Parts-of-Speech Filter 

In [45]:
# Keeping only Propernouns and Nouns

filtered_docs_all = list([[token.text for token in doc if token.pos_ in ['PROPN', 'NOUN']] for doc in docs_all])
len(filtered_docs_all)

6620

PIPELINE PART 3: Dictionary Filter 

In [57]:
#Creating the Dictionaries for eachskill

#Filtering for IT Skills: Microsoft
MS_keys = ["Microsoft","MS","MS-Office","Powerpoint","Excel"]
MS_score_cutoff = 80

#Filtering for Financial Modelling 
# assumption: occurence of 'modelling' string is in the context of 'financial modelling'
FM_keys = ["modelling"]
FM_score_cutoff = 80

#Filtering for Data Platform Skills: Bloomberg and/or FactSet
DATA_keys = ["Bloomberg","FactSet"]
DATA_score_cutoff = 75

# Programming: Python
PY_keys = ["Python"]
PY_score_cutoff = 75

# Programming: Databases
SQL_keys = ["SQL","mySQL"]
SQL_score_cutoff = 75

# Programming: R
R_keys = ["RStudio", "R"]
R_score_cutoff = 80

# Programming: etc.
# assumption for category: these programming languages occur less than Python, R, SQL, etc.

PETC_keys = ["C++","C#","JAVA","JavaScript","CSS","HTML","PERL"]
PETC_score_cutoff = 90


In [49]:
# Dataframe for storing the keywords
# these 2 dataframes will be the ones appended to by the functions/filters

df_word = pd.DataFrame(columns=['Microsoft Office', 'Financial Modelling', 'Data Platform', 'Python', 'R', 'SQL','Other Programming Languages'])
#df_word #for viewing

# Dataframe for storing occurence of keywords

df_bool = pd.DataFrame(columns=['Microsoft Office', 'Financial Modelling', 'Data Platform', 'Python', 'R', 'SQL','Other Programming Languages'])
#df_bool # for viewing

In [53]:
# Creating a function to pass the texts through the filter and output the keywords extracted

def dict_filter_word(input_text):
    MS_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = MS_score_cutoff) for key in MS_keys] if match is not None and match[1] >= MS_score_cutoff]
    FM_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = FM_score_cutoff) for key in FM_keys] if match is not None and match[1] >= FM_score_cutoff]
    DATA_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = DATA_score_cutoff) for key in DATA_keys] if match is not None and match[1] >= DATA_score_cutoff]
    PY_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = PY_score_cutoff) for key in PY_keys] if match is not None and match[1] >= PY_score_cutoff]
    R_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = R_score_cutoff) for key in R_keys] if match is not None and match[1] >= R_score_cutoff]
    SQL_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = SQL_score_cutoff) for key in SQL_keys] if match is not None and match[1] >= SQL_score_cutoff]
    PETC_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = PETC_score_cutoff) for key in PETC_keys] if match is not None and match[1] >= PETC_score_cutoff]

    df_word.loc[len(df_word)] = [MS_match, FM_match, DATA_match, PY_match, R_match, SQL_match, PETC_match]

In [54]:
# Creating a function to pass the texts through the filter and output the occurence of keywords

def dict_filter_bool(input_text):
    MS = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = MS_score_cutoff) for key in MS_keys] if match is not None and match[1] >= MS_score_cutoff]))
    FM = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = FM_score_cutoff) for key in FM_keys] if match is not None and match[1] >= FM_score_cutoff]))
    DATA = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = DATA_score_cutoff) for key in DATA_keys] if match is not None and match[1] >= DATA_score_cutoff]))
    PY = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = PY_score_cutoff) for key in PY_keys] if match is not None and match[1] >= PY_score_cutoff]))
    R = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = R_score_cutoff) for key in R_keys] if match is not None and match[1] >= R_score_cutoff]))
    SQL = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = SQL_score_cutoff) for key in SQL_keys] if match is not None and match[1] >= SQL_score_cutoff]))
    PETC = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = PETC_score_cutoff) for key in PETC_keys] if match is not None and match[1] >= PETC_score_cutoff]))

    df_bool.loc[len(df_bool)] = [MS, FM, DATA, PY, R, SQL, PETC]

In [46]:
# Convert the list containing the respective list of tokens for each jobs into a series 
filtered_docs_all_s = pd.Series(filtered_docs_all)
len(filtered_docs_all_s)

6620

In [56]:
#applying the filters

filtered_docs_all_s.apply(dict_filter_word)

filtered_docs_all_s.apply(dict_filter_bool)


KeyboardInterrupt: 

In [None]:
df_word

In [None]:
df_bool

In [11]:
#Individual Job View of NER 
# NER = Name Entity Recognition
rend_doc_1 = docs_1[73]

from spacy import displacy
displacy.render(rend_doc_1,style="ent")

In [18]:
#Filtering for IT Skills: Microsoft
MS_keys = ["Microsoft","MS","MS-Office","Powerpoint","Excel"]
MS_score_cutoff = 75

MS_match = [match for match in [process.extractOne(key, text, scorer = fuzz.token_set_ratio, score_cutoff = MS_score_cutoff) 
for key in MS_keys] if match is not None and match[1] >= MS_score_cutoff]

print(MS_match)

[]


In [19]:
#Filtering for Financial Modelling 
# assumption: occurence of 'modelling' string is in the context of 'financial modelling'
FM_keys = ["modelling"]
FM_score_cutoff = 75

FM_match = [match for match in [process.extractOne(key, text, scorer = fuzz.token_set_ratio, score_cutoff = FM_score_cutoff)
for key in FM_keys] if match is not None and match[1] >= FM_score_cutoff]

print(FM_match)

[('modelling', 100)]


In [20]:
#Filtering for Data Platform Skills: Bloomberg and/or FactSet
DATA_keys = ["Bloomberg","FactSet"]
DATA_score_cutoff = 75

DATA_match = [match for match in [process.extractOne(key, text, scorer = fuzz.token_set_ratio, score_cutoff = DATA_score_cutoff) 
for key in DATA_keys] if match is not None and match[1] >= DATA_score_cutoff]

print(DATA_match)

[]


Filtering for Programming Skills: Python, SQL, Java, JavaScript, C++, C#

In [21]:
# Programming: Python
PY_keys = ["Python"]
PY_score_cutoff = 75

PY_match = [match for match in [process.extractOne(key, text, scorer = fuzz.token_set_ratio, score_cutoff = PY_score_cutoff) 
for key in PY_keys] if match is not None and match[1] >= PY_score_cutoff]
print(PY_match)

[('Python', 100)]


In [22]:
# Programming: Databases
SQL_keys = ["SQL","mySQL"]
SQL_score_cutoff = 75

SQL_match = [match for match in [process.extractOne(key, text, scorer = fuzz.token_set_ratio, score_cutoff = SQL_score_cutoff) 
for key in SQL_keys] if match is not None and match[1] >= SQL_score_cutoff]
print(SQL_match)

[('SQL', 100), ('SQL', 75)]


In [23]:
# Programming: R
R_keys = ["RStudio", "R"]
R_score_cutoff = 75

R_match = [match for match in [process.extractOne(key, text, scorer = fuzz.token_set_ratio, score_cutoff = R_score_cutoff) 
for key in R_keys] if match is not None and match[1] >= R_score_cutoff]
print(R_match)

[('R', 100)]


In [24]:
PETC_keys = ["C++","C#","JAVA","JavaScript","CSS","HTML","PERL"]
PETC_score_cutoff = 75

PETC_match = [match for match in [process.extractOne(key, text, scorer = fuzz.token_set_ratio, score_cutoff = PETC_score_cutoff) for key in PETC_keys] if match is not None and match[1] >= PETC_score_cutoff]
print(PETC_match)

[('C', 100), ('C', 100)]


In [254]:
# Creating a function to pass the texts through the filter and output the keywords extracted

def dict_filter_word(input_text):
    MS_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = MS_score_cutoff) for key in MS_keys] if match is not None and match[1] >= MS_score_cutoff]
    FM_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = FM_score_cutoff) for key in FM_keys] if match is not None and match[1] >= FM_score_cutoff]
    DATA_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = DATA_score_cutoff) for key in DATA_keys] if match is not None and match[1] >= DATA_score_cutoff]
    PY_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = PY_score_cutoff) for key in PY_keys] if match is not None and match[1] >= PY_score_cutoff]
    R_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = R_score_cutoff) for key in R_keys] if match is not None and match[1] >= R_score_cutoff]
    SQL_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = SQL_score_cutoff) for key in SQL_keys] if match is not None and match[1] >= SQL_score_cutoff]
    PETC_match = [match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = PETC_score_cutoff) for key in PETC_keys] if match is not None and match[1] >= PETC_score_cutoff]

    df_word.loc[len(df_word)] = [MS_match, FM_match, DATA_match, PY_match, R_match, SQL_match, PETC_match]

In [236]:
# Creating a function to pass the texts through the filter and output the occurence of keywords

def dict_filter_bool(input_text):
    MS = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = MS_score_cutoff) for key in MS_keys] if match is not None and match[1] >= MS_score_cutoff]))
    FM = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = FM_score_cutoff) for key in FM_keys] if match is not None and match[1] >= FM_score_cutoff]))
    DATA = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = DATA_score_cutoff) for key in DATA_keys] if match is not None and match[1] >= DATA_score_cutoff]))
    PY = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = PY_score_cutoff) for key in PY_keys] if match is not None and match[1] >= PY_score_cutoff]))
    R = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = R_score_cutoff) for key in R_keys] if match is not None and match[1] >= R_score_cutoff]))
    SQL = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = SQL_score_cutoff) for key in SQL_keys] if match is not None and match[1] >= SQL_score_cutoff]))
    PETC = int(bool([match for match in [process.extractOne(key, input_text, scorer = fuzz.token_set_ratio, score_cutoff = PETC_score_cutoff) for key in PETC_keys] if match is not None and match[1] >= PETC_score_cutoff]))

    df_bool.loc[len(df_bool)] = [MS, FM, DATA, PY, R, SQL, PETC]

In [245]:
tst_txts_s = filtered_docs_all[0:10]

In [255]:
tst_txts_s = pd.Series(tst_txts)
tst_txts_s.apply(dict_filter_bool)
df_bool

Unnamed: 0,Microsoft,Financial Modelling,Data Platform,Python,R,SQL,Other Programming Languages
0,1,0,0,0,0,0,1
1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0
3,1,0,0,0,0,0,1
4,1,0,0,0,0,0,0
5,1,1,0,0,0,0,0
6,1,1,0,0,0,0,0
7,1,1,0,0,0,0,0
8,1,0,0,0,0,0,0
9,1,0,0,0,0,0,1


In [256]:
tst_txts_s.apply(dict_filter_word)
df_word

Unnamed: 0,Microsoft,Financial Modelling,Data Platform,Python,R,SQL,Other Programming Languages
0,"[(Microsoft, 100), (Office, 100), (PowerPoint,...",[],[],[],[],[],"[(c., 100), (c., 100)]"
1,[],[],[],[],[],[],[]
2,[],[],[],[],[],[],[]
3,"[(MS, 100), (MS, 100), (PowerPoint, 100), (Exc...",[],[],[],[],[],"[(class, 75)]"
4,"[(Office, 100), (PowerPoint, 100), (Excel, 100)]",[],[],[],[],[],[]
5,"[(Excel, 100)]","[(modeling, 94)]",[],[],[],[],[]
6,"[(Excel, 100)]","[(modeling, 94)]",[],[],[],[],[]
7,"[(Microsoft, 100), (office, 100), (PowerPoint,...","[(modelling, 100)]",[],[],[],[],[]
8,"[(office, 100)]",[],[],[],[],[],[]
9,"[(offices, 75)]",[],[],[],[],[],"[(Class, 75)]"


Below is the code for applying the pipeline to the entire dataset

1. Cleaning the data 
-identify index of non-text data
-removing nan (non-text) data

2. Convert to nlp object 'document'


3. 

Running Code on the Entire Dataset

In [18]:
# convering the column of texts into a list of text
lst_lst = df_text.values.tolist()
len(lst_lst)

# storing the data type of every element in the list of texts (which contains all of the job details)
# we expect the text data to be a 'str' object
# non_str_index tells us which indexes include non-string values and therefore should be removed
non_str_index = [i for i, sublst in enumerate(lst_lst) if any(not isinstance(val, str) for val in sublst)]
non_str_index

len(df_text) # NEW CHUNK

df_text_filtered = df_text.drop(non_str_index, axis=0, inplace=False) # NEW CHUNK
len(df_text_filtered)

df_text_all = df_text_filtered  # NEW CHUNK
len(df_text_all)


# use this to apply the nlp() to the entire dataset

all_docs = df_text_all['details'].apply(nlp)
len(all_docs) 


 # NEW CHUNK

# Optional!

# use this to store the list of documents locally (running nlp() again takes time)

#with open("all_spacy_docs.pickle", "wb") as f:
#    pickle.dump(all_docs, f)

# requires GitHub LFS as the resulting pickle object is larger than 100MB

# NEW CHUNK

# use this code to open the 
#with open("all_spacy_docs.pickle", "rb") as f:
#    loaded_all_docs = pickle.load(f)


# NEW CHUNK

# Keeping only Propernouns and Nouns

filtered_docs_all = list([[token.text for token in doc if token.pos_ in ['PROPN', 'NOUN']] for doc in all_docs])
len(filtered_docs_all)

# NEW CHUNK

filtered_docs_all_s = pd.Series(filtered_docs_all)
len(filtered_docs_all_s)

# NEW CHUNK

all_words = filtered_docs_all_s.apply(dict_filter_word)
len(all_words)

# NEW CHUNK

all_bools = filtered_docs_all_s.apply(dict_filter_bool)


pandas.core.frame.DataFrame

END of PIPELINE

In [None]:
# CREATING DATAFRAME FOR R

# 1 Turning titles 
df_title_r_1 = df_title[0:101]
df_title_r_1 = pd.DataFrame(df_title_r_1)
type(df_title_r_1)

pandas.core.frame.DataFrame

In [None]:
# CREATING DATAFRAME FOR R

df_r_1 = pd.concat([df_title_r_1, df_fil_1], axis = 1)
df_r_1

Unnamed: 0,title,0
0,Rothschild & Co - Private Equity Long-Term Int...,London month internship exciting opportunity R...
1,2023 HSBC Global Graduate Programme (Hong Kong...,excited career many paths possibilities global...
2,2023 HSBC Global Internship Programme (Hong Ko...,excited career many paths possibilities global...
3,"Graduate Training Scheme, Capital Markets",Graduate Training Scheme LondonGreySpark Partn...
4,6-Months Internship – Sell-side Tech M&A,IPTP software decades deep experience technolo...
...,...,...
96,M&A Analyst Intern,MAJOR RESPONSIBILITIESGather financial operat...
97,12 Month Internship - Financial Crimes and San...,Job summaryFinancial Crime Financial Security ...
98,Investment Associate - Fixed Income,Position OverviewPutnam energetic curious indi...
99,12 Month Internship - Central Compliance,SummaryThe Central Compliance team responsible...


In [None]:
df_r_1.iloc[95,1]

'2R Capital Investment Management Limited independent investment company London UK successful credit business process new initiatives equity investing private assets equity primary objective long term capital clients commensurate reasonable risk attention mid - sized European companies fundamental investors extensive research businesses regions industry sectors significant expertise private assets space equity debt small medium sized companies significant growth potential sectors regions Job Opportunities analysts investment opportunities Europe internship full time positions available Targeted training successful candidates self starters activities little supervision keen interest securities investing good research writing financial modelling abilities European languages important Day day activities search origination potential investment opportunities primary research analysis specific sectors companies valuation investment opportunities Direct interaction entrepreneurs managers inve

In [None]:
#Export as .csv file
#df_r_1.to_csv('df_r_1.csv')