# Persona-Maker

Classification logic depends on four parameters

- Bucket (10 buckets - sub headers below)
- Hierarchy (Decision Maker or Practitioner)
- Persona Bucket (4 persona buckets derived from Buckets)
- Persona (8 personas, based on combination of persona bucket and hierarchy)

This graphic explains the sorting logic
![Image](https://pages.databricks.com/rs/094-YMS-629/images/Personasv3.jpg)

In [1]:
#Import core libraries. Ensure you have these installed in your work environment.
import numpy as np
import pandas as pd
import re

In [None]:
#Import csv extract. Set encoding to ISO-8859-1 to avoid formatting issues. Substitute in desired input file name
df = pd.read_csv("--FileName.csv---", encoding = "ISO-8859-1")

In [3]:
#The 'set_bucket' method returns the persona based on job title
def set_bucket(title):
    
    csuitetech=['chief information','chief techn','chief data','chief archit','chief analyt','chief artifical intelligence']
    engineer=['engin','develop','programmer','dev ','devops','dev ops','desarr','desen','etl','java','apache','python','scala']
    datascientist=['data scien','deep learning','decision science','machine learning','ml','artificial intelligence','iot']
    dataanalyst=['data analys','bi analys','statist','insight','informatics','analyt','intelligen','analysis','reporting']
    it=['aws','azure','agile','analista','application','administra','capacity','continuity','cyber','computing','linux','unix','threat','sharepoint','itil','itsm','disaster recovery','delivery','deployment','desktop','dw','erp','crm','enterprise','mainframe','server','storage','middleware','implementation','informatique','solution','computer','tech','information','platform','cloud','dba', 'data','integration','warehouse','analyst','infrastructure','digital','software','network','system admin','systems','security','end user']
   
    if any(x in title.lower() for x in csuitetech):
        return 'C-Level Technology Decision Maker'
    elif re.search(r"((?!\w)|[ ]|[/]|[-]|^)c(i|d|t|ai)o", title.lower()):
        return 'C-Level Technology Decision Maker' 
    elif re.search(r"(.*)chief(.*)architect(.*)", title.lower()):
        return 'C-Level Technology Decision Maker' 
    elif re.search(r"(.*)chief(.*)(technology|information|digital|ai|it)(.*)officer(.*)", title.lower()):
        return 'C-Level Technology Decision Maker'
    elif re.search(r"(^|[ ]|[,]|[/])c\wo([,]|[ ]|$)", title.lower()):
        return 'C-Level Decision Maker'
    elif re.search(r"(.*)chief(.*)officer(.*)", title.lower()):
        return 'C-Level Decision Maker'
    elif any(x in title.lower() for x in datascientist):
        return 'Data Science'
    elif re.search(r"(^|[ ]|[,]|[/])ai([,]|[ ]|$)", title.lower()):
        return 'Data Science'
    elif any(x in title.lower() for x in engineer):
        return 'Engineering'
    elif re.search(r"(^|[ ])dev", title.lower()):
        return 'Engineering'
    elif re.search(r"(^|[,]|[ ])ai([,]|[ ])", title.lower()):
        return 'Data Science'
    elif any(x in title.lower() for x in dataanalyst):
        return 'Data Analyst'
    elif 'architect' in title.lower():
        return 'Architect'
    elif 'consult' in title.lower():
        return 'Consultant'  
    elif 'BI' in title:
        return 'Data Analyst'
    elif any(x in title.lower() for x in it):
        return 'Information Technology'    
    elif 'IT' in title:
        return 'Information Technology'
    elif 'IS' in title:
        return 'Information Technology'
    elif re.search(r"(^|[ ]|[,]|[/])is([,]|[ ]|$)", title.lower()):
        return 'Information Technology'
    elif 'blank' in title.lower():
        return 'Blank'
    else:
        return 'Other'

In [4]:
#The 'set_hierarchy' method classifies individuals as decision makers vs practitioners

def set_hierarchy(title):
    
    csuitetech=['chief information','chief techn','chief data','chief archit','chief analyt', 'chief artifical intelligence']
    decisionmaker=['principal','head','director','vp','president','founder','chief','co-founder']
    
    if any(x in title.lower() for x in csuitetech):
        return ''
    elif re.search(r"((?!\w)|[ ]|[/]|[-]|^)c(i|d|t|ai)o", title.lower()):
        return '' 
    elif re.search(r"(.*)chief(.*)architect(.*)", title.lower()):
        return '' 
    elif re.search(r"(.*)chief(.*)(technology|information|digital|ai|it)(.*)officer(.*)", title.lower()):
        return ''
    elif re.search(r"(^|[ ]|[,]|[/])c\wo([,]|[ ]|$)", title.lower()):
        return ''
    elif re.search(r"(.*)chief(.*)officer(.*)", title.lower()):
        return ''
    elif any(x in title.lower() for x in decisionmaker):
        return 'Decision Maker'  
    else:
        return 'Practitioner'

In [7]:
#The 'set_persona_bucket' method combines the sub-personas into the primary persona fields - Data Engineering or Data Science

def set_persona_bucket(bucket):
    
    de=['Engineering','Architect','Information Technology']
    ds=['Data Science','Data Analyst']
        
    if any(x in bucket for x in de):
        return 'Data Engineering'
    elif any(x in bucket for x in ds):
        return 'Data Science'
    elif 'C-Level Technology Decision Maker' in bucket:
        return 'C-Level Technology Decision Maker' 
    elif 'C-Level Decision Maker' in bucket:
        return 'C-Level Decision Maker' 
    else:
        return 'Other'    

In [None]:
#Inspect imported data
df.info()

In [None]:
#Fix NULLs
df['Title'].fillna('blank',inplace=True)
df.fillna('blank',inplace=True)
df.info()

In [11]:
df['Bucket'] = df['Title'].apply(lambda x: set_bucket(x))

In [12]:
df['Hierarchy'] = df['Title'].apply(lambda x: set_hierarchy(x))

In [13]:
df['Persona Bucket'] = df['Bucket'].apply(lambda x: set_persona_bucket(x))

In [14]:
df['Persona'] = df['Persona Bucket']+" "+df['Hierarchy']

In [17]:
#Fix final output file name
df.to_csv("---TargetFileName.csv---")

In [None]:
#End of Code