# Preparation of Targets

### BC & Melanoma targets Sweden.xlsx
This notebook prepares the targets data from `BC & Melanoma targets Sweden.xlsx`.

We see that the data contains some inconsistencies and eliminate them.

In [1]:
# Load required packages
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os

## Load data

In [3]:
# Read in data frame
targets = pd.read_excel("../../0_raw_data/novartis_data/BC & Melanoma targets Sweden.xlsx")

svenska_stader = pd.read_pickle("../processed_data/svenska_stader.pkl")

mapping = pd.read_pickle("../processed_data/mapping.pkl")

# Look at entire data frame
targets

Unnamed: 0,Name,Account Record Type,Account Type,Primary Specialty,Primary Parent: Name,Top Account Name,Country: Name,Products: Product Name,Tier,Segment,Product Metrics Name,Created Date,Last Modified By: Full Name,Last Modified Date,NVS_CORE_Sharing_Code,Product Metrics ID,Account ID,External ID,Salesforce ID
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,SE,BC,T3,S3,M-0001459248,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBH,0012o00002kTDKp,WSEM00021162,0012o00002kTDKpAAO
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,SE,BC,T3,S3,M-0001459249,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBI,0012o00002kTBeA,WSEM00119507,0012o00002kTBeAAAW
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,SE,BC,T2,S2,M-0001459250,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBJ,0012o00002iEZxE,WSEM00027290,0012o00002iEZxEAAW
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,SE,BC,T2,S2,M-0001459251,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBK,0012o00002kTAKC,WSEM00007941,0012o00002kTAKCAA4
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,SE,BC,T2,S2,M-0001459252,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBL,0012o00002iDs1j,WSEM00030426,0012o00002iDs1jAAC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,XN Oncology Targets,,,,,,,,,,,NaT,,NaT,,,,,
231,"Copyright (c) 2000-2021 salesforce.com, inc. A...",,,,,,,,,,,NaT,,NaT,,,,,
232,Confidential Information - Do Not Distribute,,,,,,,,,,,NaT,,NaT,,,,,
233,Generated By: Aswathi Padman 2021-09-17 12:16,,,,,,,,,,,NaT,,NaT,,,,,


## Preparatory steps

In [4]:
# Remove all rows with index > 228
targets = targets.loc[0:228, :] # both start and stop of the slice are included

In [5]:
# Rename all columns
targets = targets.rename(columns = {"Name": "name", "Account Record Type": "account_record_type", "Account Type": 
                                   "account_type", "Primary Specialty": "primary_specialty", "Primary Parent: Name": 
                                   "primary_parent_name", "Top Account Name": "top_account_name", "Country: Name": 
                                   "country_name", "Products: Product Name": "product_name", "Tier": "tier", "Segment": 
                                   "segment", "Product Metrics Name": "product_metrics_name", "Created Date": "created_date", 
                                   "Last Modified By: Full Name": "last_modified_by", "Last Modified Date": 
                                   "last_modified_date", "NVS_CORE_Sharing_Code": "nvs_core_sharing_code", 
                                   "Product Metrics ID": "product_metrics_id", "Account ID": "account_id", "External ID": 
                                   "external_id", "Salesforce ID": "salesforce_id"})

In [6]:
targets

Unnamed: 0,name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,country_name,product_name,tier,segment,product_metrics_name,created_date,last_modified_by,last_modified_date,nvs_core_sharing_code,product_metrics_id,account_id,external_id,salesforce_id
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,SE,BC,T3,S3,M-0001459248,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBH,0012o00002kTDKp,WSEM00021162,0012o00002kTDKpAAO
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,SE,BC,T3,S3,M-0001459249,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBI,0012o00002kTBeA,WSEM00119507,0012o00002kTBeAAAW
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,SE,BC,T2,S2,M-0001459250,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBJ,0012o00002iEZxE,WSEM00027290,0012o00002iEZxEAAW
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,SE,BC,T2,S2,M-0001459251,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBK,0012o00002kTAKC,WSEM00007941,0012o00002kTAKCAA4
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,SE,BC,T2,S2,M-0001459252,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPBL,0012o00002iDs1j,WSEM00030426,0012o00002iDs1jAAC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,SE,Melanoma,T2,S3,M-0001458277,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IOvV,0012o00002iEaBJ,WSEM00073634,0012o00002iEaBJAA0
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,SE,BC,T3,S3,M-0001459386,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IPDV,0012o00002iEYHR,WSEM00088671,0012o00002iEYHRAA4
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,SE,Melanoma,T2,S3,M-0001458252,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IOv6,0012o00002iETYR,WSEM00047130,0012o00002iETYRAA4
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,SE,Melanoma,T2,S3,M-0001458272,2021-09-09,Ronja Moller Christensen,2021-09-09,:XN::SE:::,a0E2o000023IOvQ,0012o00002kT9l2,WSEM00003776,0012o00002kT9l2AAC


We now drop irrelevant variables `country_name`, `product_metrics_name`, `created_date`, `last_modified_by` and `nvs_core_sharing_code`. 

There are four identifiers, of which we only retain `product_metrics_id`. We therefore drop `account_id`, `external_id` and `salesforce_id`.

In [7]:
# Drop irrelevant columns
targets.drop(["country_name", "product_metrics_name", "created_date", "last_modified_by", "last_modified_date",
              "nvs_core_sharing_code", "product_metrics_id", "external_id", "salesforce_id"], axis = 1, inplace = True)

In [8]:
targets

Unnamed: 0,name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,product_name,tier,segment,account_id
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j
...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2


In [9]:
# Rename certain columns (for merging later on)
targets = targets.rename(columns = {"name": "account_name", "product_name": "indication"})

In [10]:
targets.dtypes

account_name           object
account_record_type    object
account_type           object
primary_specialty      object
primary_parent_name    object
top_account_name       object
indication             object
tier                   object
segment                object
account_id             object
dtype: object

In [11]:
# Cast to appropriate data type
targets["account_name"] = targets["account_name"].astype('str')
targets["account_record_type"] = targets["account_record_type"].astype('category')
targets["account_type"] = targets["account_type"].astype('category')
targets["primary_specialty"] = targets["primary_specialty"].astype('category')
targets["primary_parent_name"] = targets["primary_parent_name"].astype('str')
targets["top_account_name"] = targets["top_account_name"].astype('str')
targets["indication"] = targets["indication"].astype('category')
targets["tier"] = targets["tier"].astype('category')
targets["segment"] = targets["segment"].astype('category')
targets["account_id"] = targets["account_id"].astype('str')
#targets.dtypes

In [12]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j
...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2


## Data exploration

To get a better impression of the given data, we compile some simple summary statistics and do some data exploration.

In [13]:
targets.shape

(229, 10)

In [14]:
targets.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229 entries, 0 to 228
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   account_name         229 non-null    object  
 1   account_record_type  229 non-null    category
 2   account_type         229 non-null    category
 3   primary_specialty    229 non-null    category
 4   primary_parent_name  229 non-null    object  
 5   top_account_name     229 non-null    object  
 6   indication           229 non-null    category
 7   tier                 228 non-null    category
 8   segment              229 non-null    category
 9   account_id           229 non-null    object  
dtypes: category(6), object(4)
memory usage: 9.7+ KB


In [15]:
# Report some important frequencies
print(f"Frequencies for 'account_record_type':\n{targets.account_record_type.value_counts()}\n") 
print(f"Frequencies for 'account_type':\n{targets.account_type.value_counts()}\n")
print(f"Frequencies for 'primary_specialty':\n{targets.primary_specialty.value_counts()}\n")
print(f"Frequencies for 'indication':\n{targets.indication.value_counts()}\n")
print(f"Frequencies for 'tier':\n{targets.tier.value_counts()}\n")
print(f"Frequencies for 'segment':\n{targets.segment.value_counts()}\n")

Frequencies for 'account_record_type':
HCP    229
Name: account_record_type, dtype: int64

Frequencies for 'account_type':
Doctor    162
Nurse      67
Name: account_type, dtype: int64

Frequencies for 'primary_specialty':
SE-42-Onkologi                 198
SE-1J-Bröstonkologi             10
SE-11-Kirurgi                    9
SE-99-Övrig                      6
SE-29-Internmedicin              2
SE-7O-Klinisk farmakologi        1
SE-7N-Bröstkirurgi               1
SE-1Y-Gynekologisk onkologi      1
SE-03-Klinisk patologi           1
Name: primary_specialty, dtype: int64

Frequencies for 'indication':
BC          147
Melanoma     82
Name: indication, dtype: int64

Frequencies for 'tier':
T2    84
T3    83
T1    27
T4    21
NT    11
ST     2
Name: tier, dtype: int64

Frequencies for 'segment':
S3    93
S4    66
S2    54
S1    16
Name: segment, dtype: int64



In [16]:
# number of duplicates
print(targets.duplicated().sum())
print(targets['account_name'].duplicated().sum())

0
21


In [17]:
# show duplicates in terms of account_name
targets[targets.duplicated(subset=['account_name'])]

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
128,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,Melanoma,T2,S3,0012o00002kTBeA
136,Kilian Bachmeier,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centralsjukhuset i Karlstad,Melanoma,T3,S3,0012o00002iEVn6
137,Per Edlund,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Gävle Sjukhus,Melanoma,T3,S3,0012o00002kTDCr
141,Johan Falkenius,HCP,Doctor,SE-42-Onkologi,ME Huvud- hals- lung- och hudcancer,Karolinska Univ Sjh Solna,Melanoma,T2,S2,0012o00002iETVR
146,Magnus Lagerlund,HCP,Doctor,SE-42-Onkologi,Onkologienheten,Länssjukhuset i Kalmar,Melanoma,T2,S2,0012o00002iESPQ
147,Charlotte Bratthäll,HCP,Doctor,SE-42-Onkologi,Onkologienheten,Länssjukhuset i Kalmar,Melanoma,T3,S3,0012o00002iEU4H
149,Andreas Nearchou,HCP,Doctor,SE-42-Onkologi,Onkologkliniken Sörmland,Mälarsjukhuset,Melanoma,T2,S3,0012o00002iEcW0
152,Sara Wirén,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002kTELw
159,Antonios Valachis,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Örebro,Melanoma,T1,S2,0012o00002iEVfq
161,Kenneth Villman,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Örebro,Melanoma,T2,S2,0012o00002kTArl


In [18]:
# show duplicates and originals
df = pd.DataFrame(columns = targets.columns)
for x in targets[targets.duplicated(subset = "account_name", keep = 'first')]["account_name"]:
    df = df.append(targets[targets["account_name"] == x])

df

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA
128,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,Melanoma,T2,S3,0012o00002kTBeA
70,Kilian Bachmeier,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centralsjukhuset i Karlstad,BC,T3,S3,0012o00002iEVn6
136,Kilian Bachmeier,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centralsjukhuset i Karlstad,Melanoma,T3,S3,0012o00002iEVn6
99,Per Edlund,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Gävle Sjukhus,BC,T2,S2,0012o00002kTDCr
137,Per Edlund,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Gävle Sjukhus,Melanoma,T3,S3,0012o00002kTDCr
122,Johan Falkenius,HCP,Doctor,SE-42-Onkologi,ME Huvud- hals- lung- och hudcancer,Karolinska Univ Sjh Solna,BC,T2,S2,0012o00002iETVR
141,Johan Falkenius,HCP,Doctor,SE-42-Onkologi,ME Huvud- hals- lung- och hudcancer,Karolinska Univ Sjh Solna,Melanoma,T2,S2,0012o00002iETVR
77,Magnus Lagerlund,HCP,Doctor,SE-42-Onkologi,Onkologienheten,Länssjukhuset i Kalmar,BC,T2,S2,0012o00002iESPQ
146,Magnus Lagerlund,HCP,Doctor,SE-42-Onkologi,Onkologienheten,Länssjukhuset i Kalmar,Melanoma,T2,S2,0012o00002iESPQ


In [19]:
# Are nurses always S4?
targets[targets['account_type'] == 'Nurse']

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
7,Ulrika Bergqvist,HCP,Nurse,SE-11-Kirurgi,Avd 8 Kirurg- ortopedi,Skellefteå Lasarett,BC,T4,S4,0012o00002iEdBr
9,Elisabeth Ryd Ausén,HCP,Nurse,SE-11-Kirurgi,Bröstcentrum,Capio S:t Görans Sjukhus AB,BC,T4,S4,0012o00002iEVPo
10,Marie Santonsson,HCP,Nurse,SE-11-Kirurgi,Kirurg och Urologmottagningen,Blekingesjukhuset i Karlskrona,BC,NT,S4,0012o00002iEcUJ
15,Anna Maria Hasselgren Häll,HCP,Nurse,SE-42-Onkologi,Aleris Christinakliniken,Sophiahemmet AB,BC,T4,S4,0012o00002iEQSf
18,Elisabet Karlsson,HCP,Nurse,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,BC,T4,S4,0012o00002kT9HL
...,...,...,...,...,...,...,...,...,...,...
202,Lola Svensson,HCP,Nurse,SE-42-Onkologi,Kirurgklinikens onkologiska dagvård,Blekingesjukhuset i Karlskrona,BC,NT,S4,0012o00002iEbln
209,Harriet Axelsson,HCP,Nurse,SE-42-Onkologi,Onkologenheten,Uddevalla sjukhus,BC,NT,S4,0012o00002iEUUU
212,Carina Larsson,HCP,Nurse,SE-42-Onkologi,Onkologkliniken Sörmland,Mälarsjukhuset,BC,T4,S4,0012o00002iET13
213,Karin Lycknert,HCP,Nurse,SE-42-Onkologi,Onkologmottagningen,Hallands sjukhus Varberg,BC,NT,S4,0012o00002iEbhJ


In [20]:
print(targets[targets['account_type'] == 'Nurse']['tier'].unique())
print(targets[targets['account_type'] == 'Doctor']['tier'].unique())

['T4', 'NT', 'T3', 'T2', 'T1']
Categories (5, object): ['T4', 'NT', 'T3', 'T2', 'T1']
['T3', 'T2', 'T1', NaN, 'ST']
Categories (4, object): ['T3', 'T2', 'T1', 'ST']


In [21]:
print(targets[targets['account_type'] == 'Nurse']['segment'].unique())
print(targets[targets['account_type'] == 'Doctor']['segment'].unique())

['S4', 'S3']
Categories (2, object): ['S4', 'S3']
['S3', 'S2', 'S1', 'S4']
Categories (4, object): ['S3', 'S2', 'S1', 'S4']


Observations by variable:

* `account_name`: 21 doctors/nurses show up twice: both as BC and Melanoma doctor/nurse. A doctor/nurse only changes in terms of indication, can change in terms of tier (doctors/nurses) and segment (only doctors, nurses never change in terms of segment), but never changes in terms of account_type, primary_specialty, primary_parent_name, top_account_name, account_id.
* `account_record_type`: It always takes the value 'HCP' to indicate the doctor/nurse is a health care provider.
* `account_type`: An HCP can be a doctor (162/229) or a nurse (67/229).
* `primary_specialty`: 9 categories, 'SE-42 Onkologi' is the most common.
* `primary_parent_name`: Seems to indicate the ward that a doctor/nurses works on. However, it sometimes also contains the hospital's name, e.g., where 'SE-Arbetsplats okänd' is given as top_account_name. In this case, we have that the top_account_name is 'Capio S:t Görans Sjukhus AB'.
* `top_account_name`: Indication of hospital that a doctor/nurse works at. However, it sometimes also contains entries which do not refer to a hospital, e.g., 'SE-Arbetsplats okänd'. In some of these cases, the hospital may be found in primary_parent_name, in other cases, further research might be required.
* `indication`: An HCP can be assigned to either BC (147/229) or Melanoma (82/229). 21 HCPs are assigned to both categories.
* `tier`: T2 and T3 most common, one missing value. Nurses can be T1, T2, T3, T4, NT. Doctors can be T1, T2, T3, ST. (One missing value for a doctor.)
* `segment`: S3 most common. Nurses can be S3 or S4. Doctors can be S1, S2, S3 or S4.
* `account_id`: Unique identifier for each doctor/nurse, is of course unchanged by whether a doctor/nurse is associated with BC or Melanoma.

In [22]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j
...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2


## Additional preparatory steps by variable

In the following, we predominantly correct inconsistencies in the data.

### `top_account_name`

Let us first take a look at the different hospitals given in `top_account_name`.

In [23]:
targets['top_account_name'].unique().tolist()

['Södra Älvsborgs Sjukhus Borås',
 'Akademiska sjukhuset',
 'Drottningmottagningen',
 'Universitetssjukhuset Linköping',
 'Hallands sjukhus Halmstad',
 'Lunds Universitet',
 'Mälarsjukhuset',
 'Skellefteå Lasarett',
 'Universitetssjukhuset Örebro',
 'Capio S:t Görans Sjukhus AB',
 'Blekingesjukhuset i Karlskrona',
 'Södersjukhuset',
 'Uddevalla sjukhus',
 'Länssjh Sundsvall-Härnösand',
 'Sophiahemmet AB',
 'Norrlands Universitetssjukhus',
 'Sahlgrenska Univ sjh',
 'Skånes Universitetssjukhus Malmö',
 'Västmanlands sjukhus Västerås',
 'Länssjukhuset i Kalmar',
 'Skånes Universitetssjukhus Lund',
 'SE-Arbetsplats okänd',
 'Karolinska Univ Sjh Solna',
 'Visby lasarett',
 'Länssjukhuset Ryhov',
 'Samverkansnämnden Uppsala Örebro sjv reg',
 'Johan Hartman',
 'Gävle Sjukhus',
 'Centralsjukhuset i Karlstad',
 'Centrallasarettet Växjö',
 'Falu Lasarett',
 'Skaraborgs Sjh Lidköping',
 'Helsingborgs lasarett',
 'Östersunds Sjukhus',
 'Per Karlsson Oncology Consulting',
 'TLV Tandvårds & läkemede

This list contains a few elements that do not refer to a hospital nor to another institution:
* 'SE-Arbetsplats okänd': workplace unknown
*  'Samverkansnämnden Uppsala Örebro sjv reg' -> This is a healthcare region but no hospital. Look [here](https://www.xn--sjukvrdsregionmellan-0zb.se/).
* 'Johan Hartman': name of HCP -> He works in Solna.
* 'Per Karlsson Oncology Consulting': holds a position as full Professor of Oncology at Sahlgrenska Academy, combined with a position as Senior Consultant in Oncology and Chief Physician at Sahlgrenska University Hospital. Look [here](https://www.gu.se/en/research/breast-cancer-research-per-karlsson-group).
* 'TLV Tandvårds & läkemedelsförmånsverket': The Dental and Pharmaceutical Benefits Agency, TLV, is a central government agency whose remit is to determine whether a pharmaceutical product, medical device or dental care procedure shall be subsidized by the state. We also determine retail margins for all pharmacies in Sweden, regulate the substitution of medicines at the pharmacies and supervise certain areas of the pharmaceutical market. Look [here](https://www.tlv.se/in-english.html).
* 'Olof Bjarnadottir': name of HCP. Olof Bjarnadottir is affiliated with Lund University and is actually called Olöf Bjarnadottir. Look [here](https://portal.research.lu.se/en/persons/ol%C3%B6f-bjarnadottir).

Let us now take a closer look at these cases.

#### SE-Arbetsplats okänd

In [24]:
targets[targets['top_account_name'] == 'SE-Arbetsplats okänd'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
28,Christina Linder Stragliotto,HCP,Doctor,SE-99-Övrig,Capio S:t Görans Sjukhus AB,SE-Arbetsplats okänd,BC,T2,S2,0012o00002kTn1e
42,Evangelos Digkas,HCP,Doctor,SE-99-Övrig,Capio S:t Görans Sjukhus AB,SE-Arbetsplats okänd,BC,T3,S3,0012o00002iEYoc
46,Gilberto Morgan,HCP,Doctor,SE-99-Övrig,Capio S:t Görans Sjukhus AB,SE-Arbetsplats okänd,BC,T3,S3,0012o00002kTn3G
93,Nils-Olof Bengtsson,HCP,Doctor,SE-99-Övrig,Capio S:t Görans Sjukhus AB,SE-Arbetsplats okänd,BC,T3,S3,0012o00002iEWxt


We now take a look if any other information on these doctors is given.

In [25]:
targets[targets['account_name'] == 'Christina Linder Stragliotto'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
28,Christina Linder Stragliotto,HCP,Doctor,SE-99-Övrig,Capio S:t Görans Sjukhus AB,SE-Arbetsplats okänd,BC,T2,S2,0012o00002kTn1e


In [26]:
targets[targets['account_name'] == 'Evangelos Digkas'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
42,Evangelos Digkas,HCP,Doctor,SE-99-Övrig,Capio S:t Görans Sjukhus AB,SE-Arbetsplats okänd,BC,T3,S3,0012o00002iEYoc


In [27]:
targets[targets['account_name'] == 'Gilberto Morgan'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
46,Gilberto Morgan,HCP,Doctor,SE-99-Övrig,Capio S:t Görans Sjukhus AB,SE-Arbetsplats okänd,BC,T3,S3,0012o00002kTn3G


In [28]:
targets[targets['account_name'] == 'Nils-Olof Bengtsson'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
93,Nils-Olof Bengtsson,HCP,Doctor,SE-99-Övrig,Capio S:t Görans Sjukhus AB,SE-Arbetsplats okänd,BC,T3,S3,0012o00002iEWxt


No futher information than the one given by filtering for 'SE-Arbetsplats okänd' is given. But we know which hospital they work at either way. So, SE-Arbetsplats okänd can be replaced by 'Capio S:t Görans Sjukhus AB'.

#### Samverkansnämnden Uppsala Örebro sjv reg

In [29]:
targets[targets['top_account_name'] == 'Samverkansnämnden Uppsala Örebro sjv reg']

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
58,Johan Ahlgren,HCP,Doctor,SE-99-Övrig,Styrgruppen,Samverkansnämnden Uppsala Örebro sjv reg,BC,T2,S2,0012o00002iEWKi


In [30]:
targets[targets['account_name'] == 'Johan Ahlgren']

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
58,Johan Ahlgren,HCP,Doctor,SE-99-Övrig,Styrgruppen,Samverkansnämnden Uppsala Örebro sjv reg,BC,T2,S2,0012o00002iEWKi


Johan Ahlgren works at Akademiska Sjukhuset Uppsala. Look [here](https://orcid.org/0000-0001-6392-273X).

#### Johan Hartman

In [31]:
targets[targets['account_name'] == 'Johan Hartman'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
59,Johan Hartman,HCP,Doctor,SE-03-Klinisk patologi,ME Klinisk patologi och cytologi Solna,Johan Hartman,BC,T1,S1,0012o00002iEVWw


He works in Solna. ME Klinisk patologi och cytologi Solna belongs to Karolinska Universitetssjukhuset, which is already in the data as 'Karolinska Univ Sjh Solna'.

#### Per Karlsson Oncology Consulting

In [32]:
targets[targets['account_name'] == 'Per Karlsson'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
100,Per Karlsson,HCP,Doctor,SE-42-Onkologi,Per Karlsson Oncology Consulting,Per Karlsson Oncology Consulting,BC,T1,S1,0012o00002kTDQh


Per Karlsson works at Sahlgrenska Universitetssjukhuset, which is already in the data as 'Sahlgrenska Univ sjh'. Look [here](https://www.gu.se/en/about/find-staff/perkarlsson).

#### TLV Tandvårds & läkemedelsförmånsverket

In [33]:
targets[targets['top_account_name'] == 'TLV Tandvårds & läkemedelsförmånsverket'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
106,Roger Henriksson,HCP,Doctor,SE-42-Onkologi,Vetenskapliga råd Läkemedel,TLV Tandvårds & läkemedelsförmånsverket,BC,T3,S3,0012o00002iEY4D


Roger Henriksson works at Norrlands Universitetssjukhus. Look [here](https://www.umu.se/en/staff/roger-henriksson/).

#### Olof Bjarnadottir

In [34]:
targets[targets['account_name'] == 'Olof Bjarnadottir'] 

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
217,Olof Bjarnadottir,HCP,Doctor,SE-42-Onkologi,,Olof Bjarnadottir,BC,T3,S3,0012o00002iER0W


Olof Bjarnadottir, actually Olöf Bjarnadottir, is working at Lunds Universitet. Look [here](https://portal.research.lu.se/en/persons/ol%C3%B6f-bjarnadottir).

With the information found out above, we now do the necessary changes.

In [35]:
# Replace 'SE-Arbetsplats okänd' by 'Capio S:t Görans Sjukhus AB'
targets['top_account_name'] = targets['top_account_name'].str.replace("SE-Arbetsplats okänd", "Capio S:t Görans Sjukhus AB")
# Replace 'Samverkansnämnden Uppsala Örebro sjv reg' by 'Akademiska sjukhuset'
targets['top_account_name'] = targets['top_account_name'].str.replace("Samverkansnämnden Uppsala Örebro sjv reg", "Akademiska sjukhuset")
# Replace 'Johan Hartman' by 'Karolinska Univ Sjh Solna'
targets['top_account_name'] = targets['top_account_name'].str.replace("Johan Hartman", "Karolinska Univ Sjh Solna")
# Replace 'Per Karlsson Oncology Conslting' by 'Sahlgrenska Univ sjh'
targets['top_account_name'] = targets['top_account_name'].str.replace("Per Karlsson Oncology Consulting", "Sahlgrenska Univ sjh")
# Replace 'TLV Tandvårds & läkemedelsförmånsverket' by 'Norrlands Universitetssjukhus'
targets['top_account_name'] = targets['top_account_name'].str.replace("TLV Tandvårds & läkemedelsförmånsverket", "Norrlands Universitetssjukhus")
# Replace 'Olof Bjarnadottir' by 'Lunds Universitet'
targets['top_account_name'] = targets['top_account_name'].str.replace("Olof Bjarnadottir", "Lunds Universitet")

In [36]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j
...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2


Check that SE-Arbetsplats okänd, Samverkansnämnden Uppsala Örebro sjv reg, Johan Hartman, Per Karlsson Oncology Consulting, TLV Tandvårds & läkemedelsförmånsverket, Olof Bjarnadottir are no longer present.

In [37]:
targets['top_account_name'].unique().tolist()

['Södra Älvsborgs Sjukhus Borås',
 'Akademiska sjukhuset',
 'Drottningmottagningen',
 'Universitetssjukhuset Linköping',
 'Hallands sjukhus Halmstad',
 'Lunds Universitet',
 'Mälarsjukhuset',
 'Skellefteå Lasarett',
 'Universitetssjukhuset Örebro',
 'Capio S:t Görans Sjukhus AB',
 'Blekingesjukhuset i Karlskrona',
 'Södersjukhuset',
 'Uddevalla sjukhus',
 'Länssjh Sundsvall-Härnösand',
 'Sophiahemmet AB',
 'Norrlands Universitetssjukhus',
 'Sahlgrenska Univ sjh',
 'Skånes Universitetssjukhus Malmö',
 'Västmanlands sjukhus Västerås',
 'Länssjukhuset i Kalmar',
 'Skånes Universitetssjukhus Lund',
 'Karolinska Univ Sjh Solna',
 'Visby lasarett',
 'Länssjukhuset Ryhov',
 'Gävle Sjukhus',
 'Centralsjukhuset i Karlstad',
 'Centrallasarettet Växjö',
 'Falu Lasarett',
 'Skaraborgs Sjh Lidköping',
 'Helsingborgs lasarett',
 'Östersunds Sjukhus',
 'Skaraborgs Sjukhus Skövde',
 'Hallands sjukhus Varberg']

This list of hospitals does not contain the peculiar entries from before anymore.

In order to be able to assign a hospital to a BC or Melanoma region later, we need to find out what city a hospital given in `top_account_name` is located in. 
So, we now create a dictionary that maps each hospital given in the above list to its respective address. The addresses have been looked up by hand on Google Maps (21/02/22, 12:00-13:00).

In [38]:
# These addresses were looked up by hand on Google Maps - 21.02.2022, 12:00-13:00 
address_dict = {'Södra Älvsborgs Sjukhus Borås': 'Brämhultsvägen 53, 501 82 Borås, Schweden',
                'Akademiska sjukhuset': 'Sjukhusvägen, 751 85 Uppsala, Schweden',
                'Drottningmottagningen': 'Drottninggatan 68, 111 21 Stockholm, Schweden',
                'Universitetssjukhuset Linköping': 'Universitetssjukhuset, 581 85 Linköping, Schweden',
                'Hallands sjukhus Halmstad': 'Lasarettsvägen, 302 33 Halmstad, Schweden',
                'Lunds Universitet': 'Lund, Schweden',
                'Mälarsjukhuset': 'Kungsvägen 42, 633 49 Eskilstuna, Schweden',
                'Skellefteå Lasarett': 'Lasarettsvägen 29, 931 41 Skellefteå, Schweden',
                'Universitetssjukhuset Örebro': 'Södra Grev Rosengatan, 701 85 Örebro, Schweden',
                'Capio S:t Görans Sjukhus AB': 'Sankt Göransplan 1, 112 19 Stockholm, Schweden',
                'Blekingesjukhuset i Karlskrona': 'Lasarettsvägen, 371 41 Karlskrona, Schweden',
                'Södersjukhuset': 'Sjukhusbacken 10, 118 83 Stockholm, Schweden',
                'Uddevalla sjukhus': 'Fjällvägen 9, 451 53 Uddevalla, Schweden',
                'Länssjh Sundsvall-Härnösand': 'Lasarettsvägen 21, 856 43 Sundsvall, Schweden',
                'Sophiahemmet AB': 'Valhallavägen 91, 114 86 Stockholm, Schweden',
                'Norrlands Universitetssjukhus': 'Daniel Naezéns väg, 907 37 Umeå, Schweden',
                'Sahlgrenska Univ sjh': 'Blå stråket 5, 413 45 Göteborg, Schweden',
                'Skånes Universitetssjukhus Malmö': 'Carl-Bertil Laurells gata 9, 214 28 Malmö, Schweden',
                'Västmanlands sjukhus Västerås': 'Sigtunagatan, 721 89 Västerås, Schweden',
                'Länssjukhuset i Kalmar': 'Lasarettsvägen 8, 392 44 Kalmar, Schweden',
                'Skånes Universitetssjukhus Lund': 'Entrégatan 7, 222 42 Lund, Schweden',
                'Karolinska Univ Sjh Solna': 'Eugeniavägen 3, 171 64 Solna, Schweden',
                'Visby lasarett': 'S:t Göransgatan 5, 621 55 Visby, Schweden',
                'Länssjukhuset Ryhov': 'Sjukhusgatan, 553 05 Jönköping, Schweden',
                'Gävle Sjukhus': 'Lasarettsvägen 5, 803 24 Gävle, Schweden',
                'Centralsjukhuset i Karlstad': 'Rosenborgsgatan 9, 652 30 Karlstad, Schweden',
                'Centrallasarettet Växjö': 'Strandvägen 8, 352 34 Växjö, Schweden',
                'Falu Lasarett': 'Lasarettsvägen 10, 791 82 Falun, Schweden',
                'Skaraborgs Sjh Lidköping': 'Mellbygatan 11, 531 51 Lidköping, Schweden',
                'Helsingborgs lasarett': 'Charlotte Yhlens gata 10, 252 23 Helsingborg, Schweden',
                'Östersunds Sjukhus': 'Kyrkgatan 16, 831 31 Östersund, Schweden',
                'Skaraborgs Sjukhus Skövde': 'Lövängsvägen, 541 42 Skövde, Schweden',
                'Hallands sjukhus Varberg': 'Träslövsvägen 68, 432 37 Varberg, Schweden'
}

In [39]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j
...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2


We now add a new column `address` with the addresses from `address_dict`.

In [40]:
targets["address"] = targets["top_account_name"].apply(lambda x: address_dict.get(x))

In [41]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden"
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden"
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden"
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden"
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden"
...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden"
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden"
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden"
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden"


There is both Lunds Universitet and Skånes Universitetssjukhus Lund in the data. Let us take a closer look which HCP belong where.

In [42]:
targets[targets['top_account_name'] == 'Lunds Universitet']

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address
5,Ana Bosch Campos,HCP,Doctor,SE-99-Övrig,Sekt V Onkologi och patologi MV,Lunds Universitet,BC,T2,S2,0012o00002kTn92,"Lund, Schweden"
217,Olof Bjarnadottir,HCP,Doctor,SE-42-Onkologi,,Lunds Universitet,BC,T3,S3,0012o00002iER0W,"Lund, Schweden"


In [43]:
targets[targets['top_account_name'] == 'Skånes Universitetssjukhus Lund']

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address
27,Christina Haapaniemi Olsson,HCP,Doctor,SE-42-Onkologi,VO Hematologi Onkologi Strålningsfysik,Skånes Universitetssjukhus Lund,BC,T3,S3,0012o00002kTBTj,"Entrégatan 7, 222 42 Lund, Schweden"
43,Fredrika Killander,HCP,Doctor,SE-42-Onkologi,VO Hematologi Onkologi Strålningsfysik,Skånes Universitetssjukhus Lund,BC,T2,S2,0012o00002iEVtE,"Entrégatan 7, 222 42 Lund, Schweden"
73,Lars Norberg,HCP,Doctor,SE-42-Onkologi,VO Hematologi Onkologi Strålningsfysik,Skånes Universitetssjukhus Lund,BC,T3,S3,0012o00002kTA2x,"Entrégatan 7, 222 42 Lund, Schweden"
83,Maria Svensson,HCP,Doctor,SE-42-Onkologi,VO Hematologi Onkologi Strålningsfysik,Skånes Universitetssjukhus Lund,BC,T3,S3,0012o00002iEWge,"Entrégatan 7, 222 42 Lund, Schweden"
84,Marie Klintman,HCP,Doctor,SE-42-Onkologi,VO Hematologi Onkologi Strålningsfysik,Skånes Universitetssjukhus Lund,BC,T3,S3,0012o00002kTlCj,"Entrégatan 7, 222 42 Lund, Schweden"
101,Per Malmström,HCP,Doctor,SE-42-Onkologi,VO Hematologi Onkologi Strålningsfysik,Skånes Universitetssjukhus Lund,BC,T3,S3,0012o00002iERGt,"Entrégatan 7, 222 42 Lund, Schweden"
102,Kala Hatti Önnerfält,HCP,Nurse,SE-42-Onkologi,VO Hematologi Onkologi Strålningsfysik,Skånes Universitetssjukhus Lund,BC,T4,S4,0012o00002iEPkj,"Entrégatan 7, 222 42 Lund, Schweden"
103,Anna-Karin Åkesson,HCP,Nurse,SE-42-Onkologi,VO Hematologi Onkologi Strålningsfysik,Skånes Universitetssjukhus Lund,BC,NT,S4,0012o00002iEPIp,"Entrégatan 7, 222 42 Lund, Schweden"
126,Helene Almström,HCP,Nurse,SE-1Y-Gynekologisk onkologi,Allmänna onkologmottagningen,Skånes Universitetssjukhus Lund,BC,T4,S4,0012o00002iESbx,"Entrégatan 7, 222 42 Lund, Schweden"
155,Kristin Sigurjonsdottir,HCP,Doctor,SE-42-Onkologi,Onkologimottagning 1 och 4,Skånes Universitetssjukhus Lund,Melanoma,T3,S3,0012o00002iDmiB,"Entrégatan 7, 222 42 Lund, Schweden"


In [44]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden"
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden"
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden"
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden"
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden"
...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden"
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden"
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden"
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden"


### Add `locality`, `municipality`, `territory`

We create a new column `locality` that contains the city an HCP belongs to.

In [45]:
for i in range(len(targets)):
    targets.at[i, 'locality'] = targets.at[i, 'address'].split(', ')[-2].split(' ')[-1]

In [46]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address,locality
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad
...,...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping


In order to get the variable `municipality` - which unfortunately is not always identical to the brick - we merge `targets_subset` with `svenska_stader`.

In [47]:
svenska_stader

Unnamed: 0,locality,municipality
0,Abbekås,Skurup
1,Abborrberget,Strängnäs
2,Alberga,Eskilstuna
3,Alby,Ånge
4,Alfta,Ovanåker
...,...,...
1911,Övertorneå,Övertorneå
1912,Övertänger,Falu
1913,Överum,Västervik
1914,Öxabäck,Mark


In [48]:
# Merge with 'svenska_stader'
targets = pd.merge(
    targets, 
    svenska_stader, 
    on = 'locality',  
    how = 'left'
)
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address,locality,municipality
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås,Borås
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm,Stockholm
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping,Linköping
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad,Halmstad
...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö,Växjö
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping


When looking at the values of `municipality` in `targets_subset` and comparing it to the values of `brick` in `mapping`, some things are striking:

* Solna (a suburb of Stockholm) has no mapped municipality
* It turns out that Solna is part of Solna Municipality 
* The city of Solna is part of the Stockholm urban area


* The locality of Visby is assigned to the municipality of Gotland but in mapping, Visby is a brick of its own. Therefore, change the municipality of Visby from Gotland to Visby.
* The locality of Göteborg is assigned to the municipality of Göteborg but in mapping, Göteborg is split into several bricks. Therefore, change the municipality of Göteborg from Göteborg to e.g. 'Göteborg-Centrum + V'.
* The locality of Kalmar is assigned to the municipality of Kalmar but in mapping, Kalmar appears as part of Kalmar/Nybro. Therefore, change the municipality of Kalmar from Kalmar to Kalmar/Nybro.
* The locality of Solna is not present in svenska_stader. As Solna is part of the Stockholm urban area, assign the locality Solna to e.g the municipality of 'Stockholm-NO'.
* The locality of Stockholm is assigned to the municipality of Stockholm but in mapping, Stockholm is split into several bricks. Therefore, change the municipality of Stockholm from Stockholm to e.g. 'Stockholm-NO'.
* The locality of Lidköping is assigned to the municipality of Lidköping but in mapping, Lidköping appears as part of Lidköping/Skara. Therefore, change the municipality of Lidköping from Lidköping to Lidköping/Skara.
* The locality of Helsingborg is assigned to the municipality of Helsingborg but in mapping, Helsingborg appears as part of Helsingborg/Landskrona. Therefore, change the municipality of Helsingborg from Helsingborg to Helsingborg/Landskrona.

* The locality of Falun is assigned to the municipality of Falu but in mapping, Falu appears as part of Borlänge/Falun. Therefore, change the municipality of Falu from Falu to Borlänge/Falun.
* The locality of Varberg is assigned to the municipality of Varberg but in mapping, Varberg appears as part of Falkenberg/Varberg. Therefore, change the municipality of Varberg from Varberg to Falkenberg/Varberg.

In [49]:
for i in range(len(targets)):
    if targets.at[i, 'locality'] == 'Solna': 
        targets.at[i, 'municipality'] = 'Stockholm-NO' 
targets['municipality'] = targets['municipality'].replace('Gotland', 'Visby')
targets['municipality'] = targets['municipality'].replace('Göteborg', 'Göteborg-Centrum + V')
targets['municipality'] = targets['municipality'].replace('Kalmar', 'Kalmar/Nybro')
targets['municipality'] = targets['municipality'].replace('Stockholm', 'Stockholm-NO')
targets['municipality'] = targets['municipality'].replace('Lidköping', 'Lidköping/Skara')
targets['municipality'] = targets['municipality'].replace('Helsingborg', 'Helsingborg/Landskrona')
targets['municipality'] = targets['municipality'].replace('Falu', 'Borlänge/Falun')
targets['municipality'] = targets['municipality'].replace('Varberg', 'Falkenberg/Varberg')

In [50]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address,locality,municipality
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås,Borås
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm,Stockholm-NO
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping,Linköping
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad,Halmstad
...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö,Växjö
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping


We now the variable `territory`.

In [51]:
mapping

Unnamed: 0,brick,sweden_bc,sweden_me
0,02 Norrtälje,Stockholm,Stockholm ONCO
1,04 Uppsala,Uppsala,Uppsala ONCO
2,03 Enköping,Uppsala,Uppsala ONCO
3,05 Nyköping,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO
4,06 Katrineholm,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO
...,...,...,...
73,85 Kungälv,Västra Götaland-Göteborg,Västra Götaland-Göteborg ONCO
74,86 Lerum/Alingsås,Västra Götaland-Alingsås,Västra Götaland-SÄS ONCO
75,91 Malmö,Skåne-Lund,Skåne ONCO
76,92 Lund,Skåne-Lund,Skåne ONCO


In order to be able to merge `mapping` to `targets_subset`, we need to ensure that the `brick` variable in `mapping` and the `municipality` variable in `targets_subset` have the same format, i.e., the two digits at the start of any entry in `brick` must be removed.

In [52]:
mapping['brick'] = mapping['brick'].astype(str)
for i in range(len(mapping)):
    brick_split_list = mapping['brick'][i].split(' ')[1:]
    mapping.at[i, 'brick'] = ' '.join(brick_split_list)
mapping

Unnamed: 0,brick,sweden_bc,sweden_me
0,Norrtälje,Stockholm,Stockholm ONCO
1,Uppsala,Uppsala,Uppsala ONCO
2,Enköping,Uppsala,Uppsala ONCO
3,Nyköping,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO
4,Katrineholm,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO
...,...,...,...
73,Kungälv,Västra Götaland-Göteborg,Västra Götaland-Göteborg ONCO
74,Lerum/Alingsås,Västra Götaland-Alingsås,Västra Götaland-SÄS ONCO
75,Malmö,Skåne-Lund,Skåne ONCO
76,Lund,Skåne-Lund,Skåne ONCO


In [53]:
# add territory

for i in range(len(targets)):
    municipality = targets['municipality'][i]
    #territory = ''
    index = mapping[mapping['brick'] == municipality].index[0]
    if targets['indication'][i] == 'BC':
        territory = mapping.loc[index, 'sweden_bc']
    elif targets['indication'][i] == 'Melanoma':
        territory = mapping.loc[index, 'sweden_me']
    
    #print(i, territory)
    targets.at[i, 'territory'] = territory
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address,locality,municipality,territory
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås,Borås,Västra Götaland-Borås
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala,Uppsala
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm,Stockholm-NO,Stockholm
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping,Linköping,Östergötland-Linköping
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad,Halmstad,Halland-Halmstad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå,Västerbotten-Umeå ONCO
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping-Jönköping
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö,Växjö,Kronoberg-Växjö ONCO
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping ONCO


### `tier`

Hildur Helgadottir has no value for `tier`.

She is in S1, so let us take a look what tier other HCPs are in S1.

In [54]:
targets[targets['segment'] == 'S1']

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address,locality,municipality,territory
13,Anna-Karin Tzikas,HCP,Doctor,SE-42-Onkologi,Onkologenheten,Uddevalla sjukhus,BC,T1,S1,0012o00002iEd8I,"Fjällvägen 9, 451 53 Uddevalla, Schweden",Uddevalla,Uddevalla,Västra Götaland-Uddevalla
16,Anne-Kristine Andersson,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,BC,T1,S1,0012o00002iETyo,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå,Västerbotten-Umeå
17,Antonios Valachis,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Örebro,BC,T1,S1,0012o00002iEVfq,"Södra Grev Rosengatan, 701 85 Örebro, Schweden",Örebro,Örebro,Örebro-Örebro
19,Barbro Linderholm,HCP,Doctor,SE-42-Onkologi,VO Onkologi,Sahlgrenska Univ sjh,BC,T1,S1,0012o00002iETcV,"Blå stråket 5, 413 45 Göteborg, Schweden",Göteborg,Göteborg-Centrum + V,Västra Götaland-Göteborg
53,Henrik Lindman,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T1,S1,0012o00002kTAh5,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala,Uppsala
56,Jan Frisell,HCP,Doctor,SE-11-Kirurgi,Bröstcentrum,Karolinska Univ Sjh Solna,BC,T1,S1,0012o00002kTE4Y,"Eugeniavägen 3, 171 64 Solna, Schweden",Solna,Stockholm-NO,Stockholm
59,Johan Hartman,HCP,Doctor,SE-03-Klinisk patologi,ME Klinisk patologi och cytologi Solna,Karolinska Univ Sjh Solna,BC,T1,S1,0012o00002iEVWw,"Eugeniavägen 3, 171 64 Solna, Schweden",Solna,Stockholm-NO,Stockholm
60,Jonas Bergh,HCP,Doctor,SE-42-Onkologi,Bröstcentrum,Karolinska Univ Sjh Solna,BC,T1,S1,0012o00002iEfZT,"Eugeniavägen 3, 171 64 Solna, Schweden",Solna,Stockholm-NO,Stockholm
80,Maria Ekholm,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T1,S1,0012o00002iEezE,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping-Jönköping
100,Per Karlsson,HCP,Doctor,SE-42-Onkologi,Per Karlsson Oncology Consulting,Sahlgrenska Univ sjh,BC,T1,S1,0012o00002kTDQh,"Blå stråket 5, 413 45 Göteborg, Schweden",Göteborg,Göteborg-Centrum + V,Västra Götaland-Göteborg


These HCPs are either in T1 or ST. The vast majority is in T1. We now also assign Hildur Helgadottir to T1.

In [55]:
targets.loc[targets['account_name'] == 'Hildur Helgadottir', ['tier']] = 'T1'
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address,locality,municipality,territory
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås,Borås,Västra Götaland-Borås
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala,Uppsala
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm,Stockholm-NO,Stockholm
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping,Linköping,Östergötland-Linköping
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad,Halmstad,Halland-Halmstad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå,Västerbotten-Umeå ONCO
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping-Jönköping
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö,Växjö,Kronoberg-Växjö ONCO
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping ONCO


In [56]:
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address,locality,municipality,territory
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås,Borås,Västra Götaland-Borås
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala,Uppsala
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm,Stockholm-NO,Stockholm
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping,Linköping,Östergötland-Linköping
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad,Halmstad,Halland-Halmstad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå,Västerbotten-Umeå ONCO
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping-Jönköping
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö,Växjö,Kronoberg-Växjö ONCO
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping ONCO


In [57]:
# Save the prepared data frame
route0 = "../processed_data"

if not os.path.exists(route0):
    os.mkdir(route0)
    
print("saving file corresponding to targets.pkl")
targets.to_pickle(f"{route0}/targets.pkl")
pd.read_pickle(f"{route0}/targets.pkl")

saving file corresponding to targets.pkl


Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id,address,locality,municipality,territory
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås,Borås,Västra Götaland-Borås
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala,Uppsala
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm,Stockholm-NO,Stockholm
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping,Linköping,Östergötland-Linköping
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad,Halmstad,Halland-Halmstad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå,Västerbotten-Umeå ONCO
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping-Jönköping
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö,Växjö,Kronoberg-Växjö ONCO
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping,Jönköping ONCO
