In [1]:
import pandas as pd
import numpy as np


In [2]:
data = [
    {
        "name": "Dr. Alice Smith",
        "title": "Director of Toxicology",
        "company": "BioNova Therapeutics",
        "location": "Boston",
        "company_funding": "Series B",
        "text": "Published research on 3D liver toxicity models"
    },
    {
        "name": "Dr. John Lee",
        "title": "Senior Scientist",
        "company": "EarlyStage Bio",
        "location": "Texas",
        "company_funding": "Seed",
        "text": "Works on in-vitro cell culture experiments"
    },
    {
        "name": "Dr. Maria Gomez",
        "title": "Head of Preclinical Safety",
        "company": "PharmaCore",
        "location": "Cambridge",
        "company_funding": "Public",
        "text": "Leads safety assessment and liver injury studies"
    },
    {
        "name": "Dr. Kevin Brown",
        "title": "VP Preclinical Research",
        "company": "Livera Biotech",
        "location": "San Francisco",
        "company_funding": "Series C",
        "text": "Oversees preclinical programs using 3D hepatic models for drug safety evaluation"
    },
    {
        "name": "Dr. Sophie Martin",
        "title": "Principal Scientist",
        "company": "HepatoTech",
        "location": "Basel",
        "company_funding": "Series A",
        "text": "Develops in-vitro liver toxicity assays and organ-on-chip platforms"
    },
    {
        "name": "Dr. Anil Kumar",
        "title": "Director of Translational Safety",
        "company": "NeoPharm Labs",
        "location": "London",
        "company_funding": "Private",
        "text": "Leads translational safety studies with focus on liver injury biomarkers"
    },
    {
        "name": "Dr. Emily Chen",
        "title": "Senior Toxicologist",
        "company": "CellNova CRO",
        "location": "San Diego",
        "company_funding": "Bootstrapped",
        "text": "Conducts in-vitro toxicity screening for small molecule drug candidates"
    },
    {
        "name": "Dr. Robert Wilson",
        "title": "Research Scientist",
        "company": "OncoGenix",
        "location": "New York",
        "company_funding": "Public",
        "text": "Works on oncology drug discovery and cancer cell line experiments"
    }

] 
df = pd.DataFrame(data)
df.sample()

Unnamed: 0,name,title,company,location,company_funding,text
2,Dr. Maria Gomez,Head of Preclinical Safety,PharmaCore,Cambridge,Public,Leads safety assessment and liver injury studies


In [3]:
def role_score(title):
    if "Director" in title or "Head" in title:
        return 1.0
    elif "Senior" in title:
        return 0.6
    else: return 0.3

df['role_score'] = df['title'].apply(role_score)
df

Unnamed: 0,name,title,company,location,company_funding,text,role_score
0,Dr. Alice Smith,Director of Toxicology,BioNova Therapeutics,Boston,Series B,Published research on 3D liver toxicity models,1.0
1,Dr. John Lee,Senior Scientist,EarlyStage Bio,Texas,Seed,Works on in-vitro cell culture experiments,0.6
2,Dr. Maria Gomez,Head of Preclinical Safety,PharmaCore,Cambridge,Public,Leads safety assessment and liver injury studies,1.0
3,Dr. Kevin Brown,VP Preclinical Research,Livera Biotech,San Francisco,Series C,Oversees preclinical programs using 3D hepatic...,0.3
4,Dr. Sophie Martin,Principal Scientist,HepatoTech,Basel,Series A,Develops in-vitro liver toxicity assays and or...,0.3
5,Dr. Anil Kumar,Director of Translational Safety,NeoPharm Labs,London,Private,Leads translational safety studies with focus ...,1.0
6,Dr. Emily Chen,Senior Toxicologist,CellNova CRO,San Diego,Bootstrapped,Conducts in-vitro toxicity screening for small...,0.6
7,Dr. Robert Wilson,Research Scientist,OncoGenix,New York,Public,Works on oncology drug discovery and cancer ce...,0.3


In [4]:
biotech_hubs = ['Boston', 'Cambridge', 'San Francisco']

def location_score(location):
    if location in biotech_hubs:
        return 1.0
    else: return 0.4

df['location_score'] = df['location'].apply(location_score)
df

Unnamed: 0,name,title,company,location,company_funding,text,role_score,location_score
0,Dr. Alice Smith,Director of Toxicology,BioNova Therapeutics,Boston,Series B,Published research on 3D liver toxicity models,1.0,1.0
1,Dr. John Lee,Senior Scientist,EarlyStage Bio,Texas,Seed,Works on in-vitro cell culture experiments,0.6,0.4
2,Dr. Maria Gomez,Head of Preclinical Safety,PharmaCore,Cambridge,Public,Leads safety assessment and liver injury studies,1.0,1.0
3,Dr. Kevin Brown,VP Preclinical Research,Livera Biotech,San Francisco,Series C,Oversees preclinical programs using 3D hepatic...,0.3,1.0
4,Dr. Sophie Martin,Principal Scientist,HepatoTech,Basel,Series A,Develops in-vitro liver toxicity assays and or...,0.3,0.4
5,Dr. Anil Kumar,Director of Translational Safety,NeoPharm Labs,London,Private,Leads translational safety studies with focus ...,1.0,0.4
6,Dr. Emily Chen,Senior Toxicologist,CellNova CRO,San Diego,Bootstrapped,Conducts in-vitro toxicity screening for small...,0.6,0.4
7,Dr. Robert Wilson,Research Scientist,OncoGenix,New York,Public,Works on oncology drug discovery and cancer ce...,0.3,0.4


In [5]:
keywords = ["3d","in-vitro","liver","toxicity","safety"]

def text_relevance_score(text):
    text = text.lower()
    score = 0
    for word in keywords:
        if word in text:
            score += 1
    return score/len(keywords)

df['text_relevance'] = df['text'].apply(text_relevance_score)
df

Unnamed: 0,name,title,company,location,company_funding,text,role_score,location_score,text_relevance
0,Dr. Alice Smith,Director of Toxicology,BioNova Therapeutics,Boston,Series B,Published research on 3D liver toxicity models,1.0,1.0,0.6
1,Dr. John Lee,Senior Scientist,EarlyStage Bio,Texas,Seed,Works on in-vitro cell culture experiments,0.6,0.4,0.2
2,Dr. Maria Gomez,Head of Preclinical Safety,PharmaCore,Cambridge,Public,Leads safety assessment and liver injury studies,1.0,1.0,0.4
3,Dr. Kevin Brown,VP Preclinical Research,Livera Biotech,San Francisco,Series C,Oversees preclinical programs using 3D hepatic...,0.3,1.0,0.4
4,Dr. Sophie Martin,Principal Scientist,HepatoTech,Basel,Series A,Develops in-vitro liver toxicity assays and or...,0.3,0.4,0.6
5,Dr. Anil Kumar,Director of Translational Safety,NeoPharm Labs,London,Private,Leads translational safety studies with focus ...,1.0,0.4,0.4
6,Dr. Emily Chen,Senior Toxicologist,CellNova CRO,San Diego,Bootstrapped,Conducts in-vitro toxicity screening for small...,0.6,0.4,0.4
7,Dr. Robert Wilson,Research Scientist,OncoGenix,New York,Public,Works on oncology drug discovery and cancer ce...,0.3,0.4,0.0


In [6]:
df['final_score'] = (
    0.4*df['text_relevance']+
    0.3*df['role_score']+
    0.3*df['location_score']
)

df['final_probability'] = (df['final_score'] * 100).round(1)

df[[
    'name',
    'title',
    'company',
    'location',
    'final_probability'
]].sort_values(by = 'final_probability',ascending = False)

Unnamed: 0,name,title,company,location,final_probability
0,Dr. Alice Smith,Director of Toxicology,BioNova Therapeutics,Boston,84.0
2,Dr. Maria Gomez,Head of Preclinical Safety,PharmaCore,Cambridge,76.0
5,Dr. Anil Kumar,Director of Translational Safety,NeoPharm Labs,London,58.0
3,Dr. Kevin Brown,VP Preclinical Research,Livera Biotech,San Francisco,55.0
6,Dr. Emily Chen,Senior Toxicologist,CellNova CRO,San Diego,46.0
4,Dr. Sophie Martin,Principal Scientist,HepatoTech,Basel,45.0
1,Dr. John Lee,Senior Scientist,EarlyStage Bio,Texas,38.0
7,Dr. Robert Wilson,Research Scientist,OncoGenix,New York,21.0


In [7]:
output_df = df.sort_values(
    by = 'final_probability',
    ascending = False,
).reset_index(drop = True)

output_df['Rank'] = output_df.index + 1

final_output = output_df[[
    "Rank",
    "final_probability",
    "name",
    "title",
    "company",
    "location"
]].rename(columns = {
    'final_probability': 'Probability'
})

final_output

Unnamed: 0,Rank,Probability,name,title,company,location
0,1,84.0,Dr. Alice Smith,Director of Toxicology,BioNova Therapeutics,Boston
1,2,76.0,Dr. Maria Gomez,Head of Preclinical Safety,PharmaCore,Cambridge
2,3,58.0,Dr. Anil Kumar,Director of Translational Safety,NeoPharm Labs,London
3,4,55.0,Dr. Kevin Brown,VP Preclinical Research,Livera Biotech,San Francisco
4,5,46.0,Dr. Emily Chen,Senior Toxicologist,CellNova CRO,San Diego
5,6,45.0,Dr. Sophie Martin,Principal Scientist,HepatoTech,Basel
6,7,38.0,Dr. John Lee,Senior Scientist,EarlyStage Bio,Texas
7,8,21.0,Dr. Robert Wilson,Research Scientist,OncoGenix,New York


In [8]:
final_output.to_csv('ranked_leads(keywords_matching).csv', index = False)

In [9]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

model = SentenceTransformer('all-MiniLM-L6-v2')
target_query = "3D in-vitro liver toxicity models for drug safety and therapy development"

lead_embeddings = model.encode(df['text'].tolist())
target_embedding = model.encode([target_query])

similarity_scores = cosine_similarity(lead_embeddings, target_embedding).flatten()

df['semantic_similarity'] = similarity_scores
df[['name','text','semantic_similarity']]
df.head()

Unnamed: 0,name,title,company,location,company_funding,text,role_score,location_score,text_relevance,final_score,final_probability,semantic_similarity
0,Dr. Alice Smith,Director of Toxicology,BioNova Therapeutics,Boston,Series B,Published research on 3D liver toxicity models,1.0,1.0,0.6,0.84,84.0,0.880678
1,Dr. John Lee,Senior Scientist,EarlyStage Bio,Texas,Seed,Works on in-vitro cell culture experiments,0.6,0.4,0.2,0.38,38.0,0.332719
2,Dr. Maria Gomez,Head of Preclinical Safety,PharmaCore,Cambridge,Public,Leads safety assessment and liver injury studies,1.0,1.0,0.4,0.76,76.0,0.441446
3,Dr. Kevin Brown,VP Preclinical Research,Livera Biotech,San Francisco,Series C,Oversees preclinical programs using 3D hepatic...,0.3,1.0,0.4,0.55,55.0,0.784506
4,Dr. Sophie Martin,Principal Scientist,HepatoTech,Basel,Series A,Develops in-vitro liver toxicity assays and or...,0.3,0.4,0.6,0.45,45.0,0.666577


In [10]:
final_output.to_csv('ranked_leads(embeddings_matching).csv', index = False)

In [11]:
df['final_score'] = (
    0.5*df['semantic_similarity']+
    0.3*df['role_score']+
    0.2*df['location_score']
)

df['final_probability'] = (df['final_score']*100).round(1)

df[[
    'name',
    'title',
    'company',
    'location',
    'final_probability'
]].sort_values(by = 'final_probability',ascending = False)

df

Unnamed: 0,name,title,company,location,company_funding,text,role_score,location_score,text_relevance,final_score,final_probability,semantic_similarity
0,Dr. Alice Smith,Director of Toxicology,BioNova Therapeutics,Boston,Series B,Published research on 3D liver toxicity models,1.0,1.0,0.6,0.940339,94.0,0.880678
1,Dr. John Lee,Senior Scientist,EarlyStage Bio,Texas,Seed,Works on in-vitro cell culture experiments,0.6,0.4,0.2,0.42636,42.6,0.332719
2,Dr. Maria Gomez,Head of Preclinical Safety,PharmaCore,Cambridge,Public,Leads safety assessment and liver injury studies,1.0,1.0,0.4,0.720723,72.1,0.441446
3,Dr. Kevin Brown,VP Preclinical Research,Livera Biotech,San Francisco,Series C,Oversees preclinical programs using 3D hepatic...,0.3,1.0,0.4,0.682253,68.2,0.784506
4,Dr. Sophie Martin,Principal Scientist,HepatoTech,Basel,Series A,Develops in-vitro liver toxicity assays and or...,0.3,0.4,0.6,0.503288,50.3,0.666577
5,Dr. Anil Kumar,Director of Translational Safety,NeoPharm Labs,London,Private,Leads translational safety studies with focus ...,1.0,0.4,0.4,0.644647,64.5,0.529295
6,Dr. Emily Chen,Senior Toxicologist,CellNova CRO,San Diego,Bootstrapped,Conducts in-vitro toxicity screening for small...,0.6,0.4,0.4,0.544668,54.5,0.569336
7,Dr. Robert Wilson,Research Scientist,OncoGenix,New York,Public,Works on oncology drug discovery and cancer ce...,0.3,0.4,0.0,0.327193,32.7,0.314385


In [12]:
def llm_intent_extraction(text):
    text = text.lower()
    return {
        'uses_in_vitro': any(word in text for word in ["in-vitro", "3d", "organ-on-chip"]),
         "focus_liver": any(word in text for word in ["liver", "hepatic"]),
        "focus_safety": any(word in text for word in ["toxicity", "safety"])
    }

df['llm_signals'] = df['text'].apply(llm_intent_extraction)
df[['name', 'llm_signals']]

Unnamed: 0,name,llm_signals
0,Dr. Alice Smith,"{'uses_in_vitro': True, 'focus_liver': True, '..."
1,Dr. John Lee,"{'uses_in_vitro': True, 'focus_liver': False, ..."
2,Dr. Maria Gomez,"{'uses_in_vitro': False, 'focus_liver': True, ..."
3,Dr. Kevin Brown,"{'uses_in_vitro': True, 'focus_liver': True, '..."
4,Dr. Sophie Martin,"{'uses_in_vitro': True, 'focus_liver': True, '..."
5,Dr. Anil Kumar,"{'uses_in_vitro': False, 'focus_liver': True, ..."
6,Dr. Emily Chen,"{'uses_in_vitro': True, 'focus_liver': False, ..."
7,Dr. Robert Wilson,"{'uses_in_vitro': False, 'focus_liver': False,..."
