# Creation of counts of HCPs, hospitals and calls

In this notebook, we calculate the following things for each BC and melanoma region:

* how many HCPs there are 
* how many hospitals there are
* how many promotional calls were made there in each month from 01-2020 to 08-2021

First, we prepare targets and calls in such a way that the HCP and hospital counts can be determined from them. Subsequently, we calculate the call counts and then the HCP counts and hospital counts.

In [1]:
import pandas as pd
import os

#pd.set_option('display.max_rows', None)
#pd.set_option('display.max_columns', None)

## Define helper functions

In [2]:
def aggregate_by_regions_and_dates(mapping, data, variable, new_column):
    """
    Aggregate the data by regions to obtain the counts for hospitals or HCPs.
    """
    counts = mapping.copy()

    data_no_duplicates = data[~data.duplicated(subset=['date', variable, 'indication'])]
    
    for date in data['date'].unique():
        
        #counts_total = pd.DataFrame()

        for i in range(len(counts['brick'])):
            brick = counts['brick'][i]
            counts.at[i, 'count_bc'] = len(data_no_duplicates[(data_no_duplicates['date'] == date) & (data_no_duplicates['municipality'] == brick) & (data_no_duplicates['indication'] == 'BC')])
            counts.at[i, 'count_me'] = len(data_no_duplicates[(data_no_duplicates['date'] == date) & (data_no_duplicates['municipality'] == brick) & (data_no_duplicates['indication'] == 'Melanoma')])
    
    # Group and sum by 'count_bc'
    counts_bc = counts.filter(items=['date', 'brick', 'sweden_bc', 'count_bc'])
    counts_bc = counts_bc.rename(columns = {'count_bc': new_column})
    counts_bc['sweden_bc'] = counts_bc['sweden_bc'].cat.remove_unused_categories(inplace = False)
    counts_bc = counts_bc.groupby(by = ['date', 'sweden_bc']).sum().reset_index()

    # Rename 'sweden_bc' to 'territory'
    counts_bc = counts_bc.rename(columns = {"sweden_bc": "territory"})

    # Group and sum by 'count_me'
    counts_me = counts.filter(items=['date', 'brick', 'sweden_me', 'count_me'])
    counts_me = counts_me.rename(columns = {'count_me': new_column})
    counts_me['sweden_me'] = counts_me['sweden_me'].cat.remove_unused_categories(inplace = False)
    counts_me = counts_me.groupby(by = ['date', 'sweden_me']).sum().reset_index()

    # Rename 'sweden_me' to 'territory'
    counts_me = counts_me.rename(columns = {"sweden_me": "territory"})

    # Concatenate counts_bc and counts_me
    counts = pd.concat([counts_bc, counts_me], ignore_index = True)

    return(counts)

In [3]:
def aggregate_by_regions(mapping, data, variable, new_column):
    """
    Aggregate the data by regions to obtain the counts for hospitals or HCPs.
    """
    counts = mapping.copy()

    data_no_duplicates = data[~data.duplicated(subset=[variable, 'indication'])]

    for i in range(len(counts['brick'])):
        brick = counts['brick'][i]
        counts.at[i, 'count_bc'] = len(data_no_duplicates[(data_no_duplicates['municipality'] == brick) & (data_no_duplicates['indication'] == 'BC')])
        counts.at[i, 'count_me'] = len(data_no_duplicates[(data_no_duplicates['municipality'] == brick) & (data_no_duplicates['indication'] == 'Melanoma')])
    
    # Group and sum by 'count_bc'
    counts_bc = counts.filter(items=['brick', 'sweden_bc', 'count_bc'])
    counts_bc = counts_bc.rename(columns = {'count_bc': new_column})
    counts_bc = counts_bc.groupby(by = 'sweden_bc').sum().reset_index()

    # Rename 'sweden_bc' to 'territory'
    counts_bc = counts_bc.rename(columns = {"sweden_bc": "territory"})

    # Group and sum by 'count_me'
    counts_me = counts.filter(items=['brick', 'sweden_me', 'count_me'])
    counts_me = counts_me.rename(columns = {'count_me': new_column})
    counts_me = counts_me.groupby(by = 'sweden_me').sum().reset_index()

    # Rename 'sweden_me' to 'territory'
    counts_me = counts_me.rename(columns = {"sweden_me": "territory"})

    # Concatenate counts_bc and counts_me
    counts = pd.concat([counts_bc, counts_me], ignore_index = True)
    
    return(counts)

## Load data

In [4]:
# Read in datasets
route0 = "../processed_data"

targets = pd.read_pickle(f"{route0}/targets.pkl")
calls = pd.read_pickle(f"{route0}/calls.pkl")
svenska_stader = pd.read_pickle(f"{route0}/svenska_stader.pkl")
mapping = pd.read_pickle(f"{route0}/mapping.pkl")

## 1. Targets: Preparation for HCP and hospital counts

In this part, we prepare the targets to be later used for the HCP and hospital counts.

In [5]:
targets = targets.drop(columns=[ 'address', 'locality', 'municipality', 'territory'])
targets

Unnamed: 0,account_name,account_record_type,account_type,primary_specialty,primary_parent_name,top_account_name,indication,tier,segment,account_id
0,Adel Bader Hamdalla,HCP,Doctor,SE-42-Onkologi,Avd E71 Hematologi onkologi,Södra Älvsborgs Sjukhus Borås,BC,T3,S3,0012o00002kTDKp
1,Aglaia Schiza,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Akademiska sjukhuset,BC,T3,S3,0012o00002kTBeA
2,Agneta Nordin Danfors,HCP,Doctor,SE-1J-Bröstonkologi,Drottningmottagningen,Drottningmottagningen,BC,T2,S2,0012o00002iEZxE
3,Ahmed Abbas Albu-Kareem,HCP,Doctor,SE-42-Onkologi,Onkologiska kliniken,Universitetssjukhuset Linköping,BC,T2,S2,0012o00002kTAKC
4,Alaa Haidar,HCP,Doctor,SE-42-Onkologi,Onkologiska enheten,Hallands sjukhus Halmstad,BC,T2,S2,0012o00002iDs1j
...,...,...,...,...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,HCP,Doctor,SE-42-Onkologi,Cancercentrum,Norrlands Universitetssjukhus,Melanoma,T2,S3,0012o00002iEaBJ
225,Anna Nyberg,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,BC,T3,S3,0012o00002iEYHR
226,Göran Carlstedt,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Centrallasarettet Växjö,Melanoma,T2,S3,0012o00002iETYR
227,Mikael Wallander,HCP,Doctor,SE-42-Onkologi,Onkologkliniken,Länssjukhuset Ryhov,Melanoma,T2,S3,0012o00002kT9l2


In order to be able to assign a hospital to a BC or Melanoma region later, we need to find out what city a hospital given in `top_account_name` is located in. 
So, we now create a dictionary that maps each hospital given in the above list to its respective address. The addresses have been looked up by hand on Google Maps (21/02/22, 12:00-13:00).

In [6]:
# These addresses were looked up by hand on Google Maps - 21.02.2022, 12:00-13:00 
address_dict = {'Södra Älvsborgs Sjukhus Borås': 'Brämhultsvägen 53, 501 82 Borås, Schweden',
                'Akademiska sjukhuset': 'Sjukhusvägen, 751 85 Uppsala, Schweden',
                'Drottningmottagningen': 'Drottninggatan 68, 111 21 Stockholm, Schweden',
                'Universitetssjukhuset Linköping': 'Universitetssjukhuset, 581 85 Linköping, Schweden',
                'Hallands sjukhus Halmstad': 'Lasarettsvägen, 302 33 Halmstad, Schweden',
                'Lunds Universitet': 'Lund, Schweden',
                'Mälarsjukhuset': 'Kungsvägen 42, 633 49 Eskilstuna, Schweden',
                'Skellefteå Lasarett': 'Lasarettsvägen 29, 931 41 Skellefteå, Schweden',
                'Universitetssjukhuset Örebro': 'Södra Grev Rosengatan, 701 85 Örebro, Schweden',
                'Capio S:t Görans Sjukhus AB': 'Sankt Göransplan 1, 112 19 Stockholm, Schweden',
                'Blekingesjukhuset i Karlskrona': 'Lasarettsvägen, 371 41 Karlskrona, Schweden',
                'Södersjukhuset': 'Sjukhusbacken 10, 118 83 Stockholm, Schweden',
                'Uddevalla sjukhus': 'Fjällvägen 9, 451 53 Uddevalla, Schweden',
                'Länssjh Sundsvall-Härnösand': 'Lasarettsvägen 21, 856 43 Sundsvall, Schweden',
                'Sophiahemmet AB': 'Valhallavägen 91, 114 86 Stockholm, Schweden',
                'Norrlands Universitetssjukhus': 'Daniel Naezéns väg, 907 37 Umeå, Schweden',
                'Sahlgrenska Univ sjh': 'Blå stråket 5, 413 45 Göteborg, Schweden',
                'Skånes Universitetssjukhus Malmö': 'Carl-Bertil Laurells gata 9, 214 28 Malmö, Schweden',
                'Västmanlands sjukhus Västerås': 'Sigtunagatan, 721 89 Västerås, Schweden',
                'Länssjukhuset i Kalmar': 'Lasarettsvägen 8, 392 44 Kalmar, Schweden',
                'Skånes Universitetssjukhus Lund': 'Entrégatan 7, 222 42 Lund, Schweden',
                'Karolinska Univ Sjh Solna': 'Eugeniavägen 3, 171 64 Solna, Schweden',
                'Visby lasarett': 'S:t Göransgatan 5, 621 55 Visby, Schweden',
                'Länssjukhuset Ryhov': 'Sjukhusgatan, 553 05 Jönköping, Schweden',
                'Gävle Sjukhus': 'Lasarettsvägen 5, 803 24 Gävle, Schweden',
                'Centralsjukhuset i Karlstad': 'Rosenborgsgatan 9, 652 30 Karlstad, Schweden',
                'Centrallasarettet Växjö': 'Strandvägen 8, 352 34 Växjö, Schweden',
                'Falu Lasarett': 'Lasarettsvägen 10, 791 82 Falun, Schweden',
                'Skaraborgs Sjh Lidköping': 'Mellbygatan 11, 531 51 Lidköping, Schweden',
                'Helsingborgs lasarett': 'Charlotte Yhlens gata 10, 252 23 Helsingborg, Schweden',
                'Östersunds Sjukhus': 'Kyrkgatan 16, 831 31 Östersund, Schweden',
                'Skaraborgs Sjukhus Skövde': 'Lövängsvägen, 541 42 Skövde, Schweden',
                'Hallands sjukhus Varberg': 'Träslövsvägen 68, 432 37 Varberg, Schweden'}

The relevant columns to get the number of hospitals and HCPs in the BC and Melanoma regions are `account_name`, `top_account_name`, `indication`. We therefore select these columns now. 

In [7]:
targets_subset = targets.filter(items=['account_name', 'top_account_name', 'indication'])
targets_subset

Unnamed: 0,account_name,top_account_name,indication
0,Adel Bader Hamdalla,Södra Älvsborgs Sjukhus Borås,BC
1,Aglaia Schiza,Akademiska sjukhuset,BC
2,Agneta Nordin Danfors,Drottningmottagningen,BC
3,Ahmed Abbas Albu-Kareem,Universitetssjukhuset Linköping,BC
4,Alaa Haidar,Hallands sjukhus Halmstad,BC
...,...,...,...
224,Ylva Holmgren Stenlund,Norrlands Universitetssjukhus,Melanoma
225,Anna Nyberg,Länssjukhuset Ryhov,BC
226,Göran Carlstedt,Centrallasarettet Växjö,Melanoma
227,Mikael Wallander,Länssjukhuset Ryhov,Melanoma


We now add a new column `address` with the addresses from `address_dict`.

In [8]:
targets_subset["address"] = targets_subset["top_account_name"].apply(lambda x: address_dict.get(x))
targets_subset

Unnamed: 0,account_name,top_account_name,indication,address
0,Adel Bader Hamdalla,Södra Älvsborgs Sjukhus Borås,BC,"Brämhultsvägen 53, 501 82 Borås, Schweden"
1,Aglaia Schiza,Akademiska sjukhuset,BC,"Sjukhusvägen, 751 85 Uppsala, Schweden"
2,Agneta Nordin Danfors,Drottningmottagningen,BC,"Drottninggatan 68, 111 21 Stockholm, Schweden"
3,Ahmed Abbas Albu-Kareem,Universitetssjukhuset Linköping,BC,"Universitetssjukhuset, 581 85 Linköping, Schweden"
4,Alaa Haidar,Hallands sjukhus Halmstad,BC,"Lasarettsvägen, 302 33 Halmstad, Schweden"
...,...,...,...,...
224,Ylva Holmgren Stenlund,Norrlands Universitetssjukhus,Melanoma,"Daniel Naezéns väg, 907 37 Umeå, Schweden"
225,Anna Nyberg,Länssjukhuset Ryhov,BC,"Sjukhusgatan, 553 05 Jönköping, Schweden"
226,Göran Carlstedt,Centrallasarettet Växjö,Melanoma,"Strandvägen 8, 352 34 Växjö, Schweden"
227,Mikael Wallander,Länssjukhuset Ryhov,Melanoma,"Sjukhusgatan, 553 05 Jönköping, Schweden"


The data frame `targets_subset` is now prepared to be used for the HCP and hospital counts.

## 2. Calls: Preparation for HCP and hospital counts

We now conduct some preparation steps with calls to be used for HCP and hospital counts later, not for the call counts.

The information from calls is meant to complement the information obtained from the targets table.
This means we suppose the hospitals stay the same, only HCPs are added.

In [9]:
hcp_list = targets['account_name'].unique().tolist()

# look only at HCPs we do not know yet from targets (and exclude HCOs)
calls_subset = calls.loc[(~calls['account_name'].isin(hcp_list)) & (calls['account_record_type'] == 'HCP')].reset_index(drop=True)

calls_subset

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
0,C004360771,HCP,0.0,2020-01-16,2020-01-16 21:17:00,David Sterner,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002kTsAl,Melanoma
1,C004360773,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Lars Sandberg,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEVS2,Melanoma
2,C004360776,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Cecilia Kamali,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iDoJT,Melanoma
3,C004360777,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Åsa Jonforsen,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEWSI,Melanoma
4,C004360780,HCP,0.0,2020-01-16,2020-01-16 21:17:00,David Goldstein,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002kTlW5,Melanoma
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,C010427094,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Pernilla Nilsson,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002kTp20,Melanoma
175,C010427095,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Sandra Sjöstrand,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEdyx,Melanoma
176,C010427096,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Pia Törnblom,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iDrMc,Melanoma
177,C010427097,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Karolina Vernmark,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEQjh,Melanoma


### I. Multiple Assignment

Looking at the data frame, we see that some HCPs have more than one address/hospital assigned to them. The HCPs are:

* Björn Sigurdsson
* Hanna Eriksson (Solna, Lund)
* Lars Larsson (Boras, Lund)
* Muhammad Kadum (Lund, Kristianstad)

Let us take a closer look at these cases.

#### Björn Sigurdsson

In [10]:
calls_subset[calls_subset['account_name'] == 'Björn Sigurdsson']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
35,C004371901,HCP,0.0,2020-06-03,2020-06-03 11:01:00,Björn Sigurdsson,"Barn- och ungdomsmedicin, Jan Waldenströms gat...",Jan Waldenströms gata 18,0.0,Person Account,,Phone,30.0,0012o00002iEYtq,Melanoma
45,C005656684,HCP,1.0,2020-08-25,2020-08-25 14:30:00,Björn Sigurdsson,SE-Arbetsplats okänd Sweden,SE-Arbetsplats okänd,0.0,Person Account,,,30.0,0012o00002iEYtq,Melanoma
65,C005889050,HCP,1.0,2020-09-29,2020-09-29 14:00:00,Björn Sigurdsson,SE-Arbetsplats okänd Sweden,SE-Arbetsplats okänd,0.0,Person Account,,Phone,30.0,0012o00002iEYtq,Melanoma
74,C009587636,HCP,1.0,2020-10-19,2020-10-19 11:00:00,Björn Sigurdsson,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iEYtq,Melanoma


In [11]:
calls_subset['address'][35]

'Barn- och ungdomsmedicin, Jan Waldenströms gata 18, MALMÖ 21428'

Row 35 tells us that Björn Sigurdsson is from Malmö.

#### Hanna Eriksson

In [12]:
calls_subset[calls_subset['account_name'] == 'Hanna Eriksson']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
95,C009537214,HCP,1.0,2021-04-06,2021-04-06 15:29:00,Hanna Eriksson,"Eugeniavägen 3, SOLNA, Stockholms län A-B 1716...",Eugeniavägen 3,0.0,Person Account,Non-promotional discussion,Voice-only Call,30.0,0012o00002iEVhC,Melanoma
97,C009318413,HCP,1.0,2021-04-20,2021-04-20 08:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Voice-only Call,30.0,0012o00002iELPE,Melanoma
105,C009318244,HCP,1.0,2021-05-05,2021-05-05 08:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Virtual Other,30.0,0012o00002iELPE,Melanoma
119,C009458860,HCP,0.0,2021-05-25,2021-05-25 15:10:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Virtual MS Teams,,0012o00002iELPE,Melanoma
120,C009485879,HCP,0.0,2021-05-27,2021-05-27 17:00:00,Hanna Eriksson,"Entregatan 7, LUND, Skåne län 22241 Sweden",Entregatan 7,0.0,Person Account,Non-promotional discussion,Virtual MS Teams,30.0,0012o00002iELPE,Melanoma
124,C009541730,HCP,0.0,2021-05-27,2021-05-27 17:00:00,Hanna Eriksson,,Entregatan 7,0.0,Person Account,,Virtual MS Teams,90.0,0012o00002iEVhC,Melanoma
131,C009602973,HCP,0.0,2021-06-02,2021-06-02 09:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Voice-only Call,,0012o00002iELPE,Melanoma
132,C009582426,HCP,1.0,2021-06-04,2021-06-04 16:30:00,Hanna Eriksson,,,0.0,Person Account,,Voice-only Call,10.0,0012o00002iEVhC,Melanoma
136,C009840057,HCP,1.0,2021-06-16,2021-06-16 08:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Virtual Other,30.0,0012o00002iELPE,Melanoma
147,C009840065,HCP,1.0,2021-06-21,2021-06-21 08:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Virtual Other,30.0,0012o00002iELPE,Melanoma


Hanna Eriksson works at Karolinska Univ Sjh Solna. Look [here](https://www.aminer.cn/profile/hanna-eriksson/560adc8b45ce1e595ff7d2c1).

#### Muhammad Kadum

In [13]:
calls_subset[calls_subset['account_name'] == 'Muhammad Kadum']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
76,C007594422,HCP,0.0,2020-11-12,2020-11-12 15:00:00,Muhammad Kadum,"J A Hedlunds Väg 5, KRISTIANSTAD, Skåne län 29...",J A Hedlunds Väg 5,0.0,Person Account,Promotional discussion,Virtual Veeva Engage,,0012o00002kTnsd,BC
115,C009443401,HCP,1.0,2021-05-21,2021-05-21 08:45:00,Muhammad Kadum,"Entregatan 7, LUND, Skåne län 22241 Sweden",Entregatan 7,0.0,Person Account,Promotional discussion,Virtual Other,30.0,0012o00002kTnsd,BC


Muhammad Kadum works at Lund University. Look [here](https://www.lunduniversity.lu.se/lucat/group/v1001367).

### II. NaN's

When looking though the data frame, we see that a lot of missing values exist for the addresses/hospitals.

In [14]:
calls_subset[calls_subset['address'] == 'nan'].sort_values(by='account_name')

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
55,C005688057,HCP,1.0,2020-09-09,2020-09-09 21:25:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,,30.0,0012o00002iERoC,Melanoma
149,C010218246,HCP,1.0,2021-08-24,2021-08-24 14:20:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,Voice-only Call,30.0,0012o00002iERoC,Melanoma
148,C009729312,HCP,1.0,2021-06-22,2021-06-22 16:30:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iERoC,Melanoma
86,C009584476,HCP,1.0,2021-02-10,2021-02-10 15:00:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,Virtual MS Teams,30.0,0012o00002iERoC,Melanoma
81,C009586775,HCP,1.0,2021-01-28,2021-01-28 16:00:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iERoC,Melanoma
77,C009586858,HCP,1.0,2020-11-23,2020-11-23 14:00:00,Bengt Tholander,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002kTDdr,Melanoma
93,C009587559,HCP,1.0,2021-03-30,2021-03-30 17:00:00,Björn Båtshake,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002kTnIb,Melanoma
68,C009587502,HCP,1.0,2020-10-12,2020-10-12 16:00:00,Björn Båtshake,,,0.0,Person Account,,Voice-only Call,30.0,0012o00002kTnIb,Melanoma
74,C009587636,HCP,1.0,2020-10-19,2020-10-19 11:00:00,Björn Sigurdsson,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iEYtq,Melanoma
102,C009587720,HCP,1.0,2021-04-27,2021-04-27 15:00:00,Eva Marie Erfurth,,,0.0,Person Account,,Voice-only Call,15.0,0012o00002iEPpz,Melanoma


Let us take a closer look at these cases.

#### Ana Sequeira De V. Dias Carneiro

In [15]:
calls_subset[calls_subset['account_name'] == 'Ana Sequeira De V. Dias Carneiro']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
55,C005688057,HCP,1.0,2020-09-09,2020-09-09 21:25:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,,30.0,0012o00002iERoC,Melanoma
81,C009586775,HCP,1.0,2021-01-28,2021-01-28 16:00:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iERoC,Melanoma
86,C009584476,HCP,1.0,2021-02-10,2021-02-10 15:00:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,Virtual MS Teams,30.0,0012o00002iERoC,Melanoma
148,C009729312,HCP,1.0,2021-06-22,2021-06-22 16:30:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iERoC,Melanoma
149,C010218246,HCP,1.0,2021-08-24,2021-08-24 14:20:00,Ana Sequeira De V. Dias Carneiro,,,0.0,Person Account,,Voice-only Call,30.0,0012o00002iERoC,Melanoma


Ana Sequeira De V. Dias Carneiro works at Lund Universitet. Look [here](https://portal.research.lu.se/en/persons/ana-carneiro).

#### Bengt Tholander

In [16]:
calls_subset[calls_subset['account_name'] == 'Bengt Tholander']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
77,C009586858,HCP,1.0,2020-11-23,2020-11-23 14:00:00,Bengt Tholander,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002kTDdr,Melanoma


Bengt Tholander works at Akademiska sjukhuset. Look [here](https://katalog.uu.se/profile/?id=N96-5347).

#### Björn Båtshake

In [15]:
calls_subset[calls_subset['account_name'] == 'Björn Båtshake']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
68,C009587502,HCP,1.0,2020-10-12,2020-10-12 16:00:00,Björn Båtshake,,,0.0,Person Account,,Voice-only Call,30.0,0012o00002kTnIb,Melanoma
93,C009587559,HCP,1.0,2021-03-30,2021-03-30 17:00:00,Björn Båtshake,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002kTnIb,Melanoma


Björn Båtshake works at Lund Universitet. Look [here](https://lu.academia.edu/Bj%C3%B6rnB%C3%A5tshake).

#### Björn Sigurdsson (already looked at, but not with respect to NaN)

In [16]:
calls_subset[calls_subset['account_name'] == 'Björn Sigurdsson']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
35,C004371901,HCP,0.0,2020-06-03,2020-06-03 11:01:00,Björn Sigurdsson,"Barn- och ungdomsmedicin, Jan Waldenströms gat...",Jan Waldenströms gata 18,0.0,Person Account,,Phone,30.0,0012o00002iEYtq,Melanoma
45,C005656684,HCP,1.0,2020-08-25,2020-08-25 14:30:00,Björn Sigurdsson,SE-Arbetsplats okänd Sweden,SE-Arbetsplats okänd,0.0,Person Account,,,30.0,0012o00002iEYtq,Melanoma
65,C005889050,HCP,1.0,2020-09-29,2020-09-29 14:00:00,Björn Sigurdsson,SE-Arbetsplats okänd Sweden,SE-Arbetsplats okänd,0.0,Person Account,,Phone,30.0,0012o00002iEYtq,Melanoma
74,C009587636,HCP,1.0,2020-10-19,2020-10-19 11:00:00,Björn Sigurdsson,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iEYtq,Melanoma


In [17]:
calls_subset['address'][35]

'Barn- och ungdomsmedicin, Jan Waldenströms gata 18, MALMÖ 21428'

Björn Sigursson works in Malmö.

#### Eva Marie Erfurth

In [18]:
calls_subset[calls_subset['account_name'] == 'Eva Marie Erfurth']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
102,C009587720,HCP,1.0,2021-04-27,2021-04-27 15:00:00,Eva Marie Erfurth,,,0.0,Person Account,,Voice-only Call,15.0,0012o00002iEPpz,Melanoma


Eva Marie Erfurth works at Lund Universitet. Look [here](https://www.lunduniversity.lu.se/lucat/user/429358f644db831460905685bbd707e1).

#### Hanna Eriksson

In [19]:
calls_subset[calls_subset['account_name'] == 'Hanna Eriksson']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
95,C009537214,HCP,1.0,2021-04-06,2021-04-06 15:29:00,Hanna Eriksson,"Eugeniavägen 3, SOLNA, Stockholms län A-B 1716...",Eugeniavägen 3,0.0,Person Account,Non-promotional discussion,Voice-only Call,30.0,0012o00002iEVhC,Melanoma
97,C009318413,HCP,1.0,2021-04-20,2021-04-20 08:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Voice-only Call,30.0,0012o00002iELPE,Melanoma
105,C009318244,HCP,1.0,2021-05-05,2021-05-05 08:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Virtual Other,30.0,0012o00002iELPE,Melanoma
119,C009458860,HCP,0.0,2021-05-25,2021-05-25 15:10:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Virtual MS Teams,,0012o00002iELPE,Melanoma
120,C009485879,HCP,0.0,2021-05-27,2021-05-27 17:00:00,Hanna Eriksson,"Entregatan 7, LUND, Skåne län 22241 Sweden",Entregatan 7,0.0,Person Account,Non-promotional discussion,Virtual MS Teams,30.0,0012o00002iELPE,Melanoma
124,C009541730,HCP,0.0,2021-05-27,2021-05-27 17:00:00,Hanna Eriksson,,Entregatan 7,0.0,Person Account,,Virtual MS Teams,90.0,0012o00002iEVhC,Melanoma
131,C009602973,HCP,0.0,2021-06-02,2021-06-02 09:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Voice-only Call,,0012o00002iELPE,Melanoma
132,C009582426,HCP,1.0,2021-06-04,2021-06-04 16:30:00,Hanna Eriksson,,,0.0,Person Account,,Voice-only Call,10.0,0012o00002iEVhC,Melanoma
136,C009840057,HCP,1.0,2021-06-16,2021-06-16 08:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Virtual Other,30.0,0012o00002iELPE,Melanoma
147,C009840065,HCP,1.0,2021-06-21,2021-06-21 08:00:00,Hanna Eriksson,"Karolinska Vägen, SOLNA, Stockholms län A-B 17...",Karolinska Vägen,0.0,Person Account,Non-promotional discussion,Virtual Other,30.0,0012o00002iELPE,Melanoma


Hanna Eriksson works at Karolinska Univ Sjh Solna. Look [here](https://www.aminer.cn/profile/hanna-eriksson/560adc8b45ce1e595ff7d2c1).

#### Helena Mörse

In [20]:
calls_subset[calls_subset['account_name'] == 'Helena Mörse']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
57,C005729169,HCP,1.0,2020-09-14,2020-09-14 13:25:00,Helena Mörse,"Lasarettsgatan 48, LUND, Skåne län 22241 Sweden",Lasarettsgatan 48,0.0,Person Account,,Email,30.0,0012o00002iEPCw,Melanoma
98,C009587977,HCP,1.0,2021-04-20,2021-04-20 13:00:00,Helena Mörse,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iEPCw,Melanoma


Helena Mörse works at Lund Universitet. Look [here](https://www.lunduniversity.lu.se/lucat/user/946a897a097432b68336d78e1f42dcb5).

#### Ingrid Rosengren

In [21]:
calls_subset[calls_subset['account_name'] == 'Ingrid Rosengren']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
92,C009588085,HCP,1.0,2021-03-18,2021-03-18 15:30:00,Ingrid Rosengren,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002iERvP,Melanoma


There is no reliable info on the Internet on where Ingrid Rosengren works.

#### Jeffrey Yachnin

In [21]:
calls_subset[calls_subset['account_name'] == 'Jeffrey Yachnin']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
108,C009466569,HCP,1.0,2021-05-17,2021-05-17 10:00:00,Jeffrey Yachnin,,,0.0,Person Account,,Voice-only Call,30.0,0012o00002iEazF,Melanoma


Jeffrey Yachnin works at Karolinska Univ Sjh Solna. Look [here](https://www.researchgate.net/profile/Jeffrey-Yachnin).

#### Johan Malmros

In [22]:
calls_subset[calls_subset['account_name'] == 'Johan Malmros']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
80,C009588246,HCP,1.0,2021-01-13,2021-01-13 11:00:00,Johan Malmros,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002kTCw2,Melanoma


Johan Malmros works at Karolinska Univ Sjh Solna. Look [here](https://www.researchgate.net/profile/Johan-Malmros).

#### Josefin Fernebro

In [23]:
calls_subset[calls_subset['account_name'] == 'Josefin Fernebro']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
100,C009588280,HCP,1.0,2021-04-23,2021-04-23 11:00:00,Josefin Fernebro,,,0.0,Person Account,,Voice-only Call,25.0,0012o00002kT8rS,Melanoma


Josefin Fernebro works at Karolinska Univ Sjh Solna. Look [here](https://www.linkedin.com/in/josefin-fernebro-559264b5/?originalSubdomain=se).

#### Karin Hallen

In [24]:
calls_subset[calls_subset['account_name'] == 'Karin Hallen']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
113,C009466447,HCP,0.0,2021-05-20,2021-05-20 17:00:00,Karin Hallen,,Sjukhusvägen 85,0.0,Person Account,,Virtual MS Teams,90.0,0012o00002kTomM,Melanoma


'Sjukhusvägen 85' tells us that she Karin Hallen works at Akademiska sjukhuset in Uppsala. Look [here](https://www.google.de/search?q=sjukhusv%C3%A4gen+85&sxsrf=APq-WBuV9rhu8CZBdzhet_VC1QrV1zADKQ%3A1645458281521&ei=abMTYrK0H4ng7_UP_9eowA4&ved=0ahUKEwjyjPPikZH2AhUJ8LsIHf8rCugQ4dUDCA4&uact=5&oq=sjukhusv%C3%A4gen+85&gs_lcp=Cgdnd3Mtd2l6EANKBAhBGABKBAhGGABQAFgAYGVoAHAAeACAAQCIAQCSAQCYAQCgAQHAAQE&sclient=gws-wiz).

#### Lars Hjorth

In [25]:
calls_subset[calls_subset['account_name'] == 'Lars Hjorth']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
169,C010445491,HCP,1.0,2021-09-15,2021-09-15 08:30:00,Lars Hjorth,,,0.0,Person Account,,Voice-only Call,15.0,0012o00002iEeIV,Melanoma


Lars Hjorth works at Lund Universitet. Look [here](https://portal.research.lu.se/en/persons/lars-hjorth).

#### Lars Larsson

In [26]:
calls_subset[calls_subset['account_name'] == 'Lars Larsson']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
121,C009485880,HCP,0.0,2021-05-27,2021-05-27 17:00:00,Lars Larsson,"Entregatan 7, LUND, Skåne län 22241 Sweden",Entregatan 7,0.0,Person Account,Non-promotional discussion,Virtual MS Teams,30.0,0012o00002iERpF,Melanoma
122,C009537186,HCP,1.0,2021-05-27,2021-05-27 17:00:00,Lars Larsson,"Brämhultsvägen 53, BORÅS, Västra Götalands län...",Brämhultsvägen 53,0.0,Person Account,Non-promotional discussion,Virtual MS Teams,30.0,0012o00002iERpF,Melanoma
123,C009541764,HCP,0.0,2021-05-27,2021-05-27 17:00:00,Lars Larsson,,Entregatan 7,0.0,Person Account,,Virtual MS Teams,90.0,0012o00002iERpF,Melanoma


Lars Larsson works at Karolinska Univ Sjh Solna. Look [here](https://ki.se/en/mtc/lars-gunnar-larsson-group).

#### Magnus Sabel

In [27]:
calls_subset[calls_subset['account_name'] == 'Magnus Sabel']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
56,C005688076,HCP,1.0,2020-09-09,2020-09-09 21:27:00,Magnus Sabel,,,0.0,Person Account,,,30.0,0012o00002iEVSK,Melanoma


Magnus Sabel works at Sahlgrenska Univ sjh. Look [here](https://www.gu.se/en/about/find-staff/magnussabel).

#### Per Nyman

In [28]:
calls_subset[calls_subset['account_name'] == 'Per Nyman']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
89,C009588533,HCP,1.0,2021-02-22,2021-02-22 09:30:00,Per Nyman,,,0.0,Person Account,,Voice-only Call,15.0,0012o00002iEaA9,Melanoma
90,C009588668,HCP,1.0,2021-03-08,2021-03-08 08:39:00,Per Nyman,,,0.0,Person Account,,Voice-only Call,15.0,0012o00002iEaA9,Melanoma


There is no reliable info on the Internet on where Per Nyman works.

#### Petter Svenberg

In [29]:
calls_subset[calls_subset['account_name'] == 'Petter Svenberg']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
153,C010298252,HCP,1.0,2021-09-02,2021-09-02 10:00:00,Petter Svenberg,,,0.0,Person Account,,Voice-only Call,10.0,0012o00002iEcMf,Melanoma


Petter Svenberg works at Karolinska Univ Sjh Solna. Look [here](https://www.researchgate.net/profile/Petter-Svenberg).

#### Tatiana von Bahr Greenwood

In [30]:
calls_subset[calls_subset['account_name'] == 'Tatiana von Bahr Greenwood']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
79,C009592406,HCP,1.0,2020-12-18,2020-12-18 16:00:00,Tatiana von Bahr Greenwood,,,0.0,Person Account,,Voice-only Call,20.0,0012o00002kTCh9,Melanoma


Tatiana von Bahr Greenwood works at Karolinska Univ Sjh Solna. Look [here](https://www.medifind.com/doctors/tatiana-von-bahr-greenwood/312644782).

### III. Check of Hospitals

Take a look at the different hospitals given in `address`.

In [22]:
calls_subset['address'].unique().tolist()

['Onkologikliniken, Centrallasarettet 1, VÄSTERÅS 72335',
 'Onkologiska kliniken, Södra Grev Rosengatan, ÖREBRO 70362',
 'Medicin- och onkologkliniken, Brämhultsvägen 53, BORÅS 50455',
 'Kirurgkliniken, Lasarettsvägen, KARLSKRONA 37141',
 'Cancercentrum, Akutvägen, UMEÅ 90737',
 'Södra Älvsborgs Sjukhus Borås, Brämhultsvägen 53, BORÅS 50455',
 'Onkologkliniken, Sjukhusvägen 85, UPPSALA 75185',
 'Barn- och ungdomsmedicin, Jan Waldenströms gata 18, MALMÖ 21428',
 'Barn- och ungdomskliniken, Lasarettsvägen 21, SUNDSVALL 85643',
 'Barn- och ungdomskliniken, Lasarettsvägen 1, KALMAR 39244',
 'KFUE, Akademiska sjukhuset Ing 78 5 tr, UPPSALA 75185',
 'Apoteket Berguven, Lasarettsvägen, HALMSTAD 30233',
 'Barncancerenheten Q6:04, Eugeniavägen 23, SOLNA 17164',
 'SE-Arbetsplats okänd Sweden',
 'Akademiska sjukhuset Ing 78 5 tr, UPPSALA, Landstinget i Uppsala län 75185 Sweden',
 'Sjukhusbacken 10, STOCKHOLM, Stockholms läns landsting 11861 Sweden',
 'nan',
 'Lasarettsgatan 48, LUND, Skåne län 22

Looking at the addresses/hospitals, there are several things that come to attention:

* There are unknown hospitals: 'Hantverkargatan 25 B, STOCKHOLM, Stockholms län A-B 11221 Sweden', J A Hedlunds Väg 5, KRISTIANSTAD, Skåne län 29133 Sweden', 'Hietalahdenkatu 2-4, Vaasa, Vaasa 65130 Finland'. (even a Finnish hospital is given)
* Some hospitals occur several times, but with different addresses. For instance, the hospital in Borås goes by the names of Medicin- och onkologkliniken, Brämhultsvägen 53, BORÅS 50455',  'Södra Älvsborgs Sjukhus Borås, Brämhultsvägen 53, BORÅS 50455' and  'Brämhultsvägen 53, BORÅS, Västra Götalands län 50455 Sweden'. 
* Remark: The 'Apoteket Berguven, Lasarettsvägen, HALMSTAD 30233' refers to Hallands sjukhus Halmstad.

Let us now take a look at these cases.

In [23]:
calls_subset[calls_subset['address'] == 'Hantverkargatan 25 B, STOCKHOLM, Stockholms län A-B 11221 Sweden']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
78,C007811437,HCP,0.0,2020-12-02,2020-12-02 11:00:00,Marit Jenset,"Hantverkargatan 25 B, STOCKHOLM, Stockholms lä...",Hantverkargatan 25 B,0.0,Person Account,,Virtual MS Teams,60.0,0012o00002iEXzz,BC
85,C008324018,HCP,0.0,2021-02-09,2021-02-09 10:00:00,Marit Jenset,"Hantverkargatan 25 B, STOCKHOLM, Stockholms lä...",Hantverkargatan 25 B,0.0,Person Account,,Virtual MS Teams,50.0,0012o00002iEXzz,BC
94,C009187916,HCP,0.0,2021-03-31,2021-03-31 15:08:00,Marit Jenset,"Hantverkargatan 25 B, STOCKHOLM, Stockholms lä...",Hantverkargatan 25 B,0.0,Person Account,,Email,30.0,0012o00002iEXzz,BC
96,C009187924,HCP,0.0,2021-04-18,2021-04-18 12:10:00,Marit Jenset,"Hantverkargatan 25 B, STOCKHOLM, Stockholms lä...",Hantverkargatan 25 B,0.0,Person Account,,Email,30.0,0012o00002iEXzz,BC
112,C009388980,HCP,0.0,2021-05-19,2021-05-19 15:00:00,Marit Jenset,"Hantverkargatan 25 B, STOCKHOLM, Stockholms lä...",Hantverkargatan 25 B,0.0,Person Account,,Virtual MS Teams,30.0,0012o00002iEXzz,BC
133,C009652889,HCP,0.0,2021-06-14,2021-06-14 15:00:00,Marit Jenset,"Hantverkargatan 25 B, STOCKHOLM, Stockholms lä...",Hantverkargatan 25 B,0.0,Person Account,,Virtual Other,30.0,0012o00002iEXzz,BC


Marit Jenset is not an HCP. Look [here](https://www.linkedin.com/in/marit-jenset-65570212/?originalSubdomain=se).

In [24]:
calls_subset[calls_subset['address'] == 'J A Hedlunds Väg 5, KRISTIANSTAD, Skåne län 29133 Sweden']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
75,C007594421,HCP,0.0,2020-11-12,2020-11-12 15:00:00,Lena Axelsson,"J A Hedlunds Väg 5, KRISTIANSTAD, Skåne län 29...",J A Hedlunds Väg 5,0.0,Person Account,Promotional discussion,Virtual Veeva Engage,,0012o00002kTBuS,BC
76,C007594422,HCP,0.0,2020-11-12,2020-11-12 15:00:00,Muhammad Kadum,"J A Hedlunds Väg 5, KRISTIANSTAD, Skåne län 29...",J A Hedlunds Väg 5,0.0,Person Account,Promotional discussion,Virtual Veeva Engage,,0012o00002kTnsd,BC


This address belongs to Centralsjukhuset Kristianstad. 

In [25]:
calls_subset[calls_subset['account_name'] == 'Lena Axelsson']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
75,C007594421,HCP,0.0,2020-11-12,2020-11-12 15:00:00,Lena Axelsson,"J A Hedlunds Väg 5, KRISTIANSTAD, Skåne län 29...",J A Hedlunds Väg 5,0.0,Person Account,Promotional discussion,Virtual Veeva Engage,,0012o00002kTBuS,BC


Lena Axelsson works at Lund university. Look [here](https://portal.research.lu.se/en/persons/lena-axelsson).

In [26]:
calls_subset[calls_subset['account_name'] == 'Muhammad Kadum']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
76,C007594422,HCP,0.0,2020-11-12,2020-11-12 15:00:00,Muhammad Kadum,"J A Hedlunds Väg 5, KRISTIANSTAD, Skåne län 29...",J A Hedlunds Väg 5,0.0,Person Account,Promotional discussion,Virtual Veeva Engage,,0012o00002kTnsd,BC
115,C009443401,HCP,1.0,2021-05-21,2021-05-21 08:45:00,Muhammad Kadum,"Entregatan 7, LUND, Skåne län 22241 Sweden",Entregatan 7,0.0,Person Account,Promotional discussion,Virtual Other,30.0,0012o00002kTnsd,BC


Muhammad Kadum works at Lund university. Look [here](https://www.lunduniversity.lu.se/lucat/group/v1001367).

In [27]:
calls_subset[calls_subset['address'] == 'Hietalahdenkatu 2-4, Vaasa, Vaasa 65130 Finland']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
160,C010353070,HCP,0.0,2021-09-08,2021-09-08 11:00:00,Jonathan Lind,"Hietalahdenkatu 2-4, Vaasa, Vaasa 65130 Finland",Hietalahdenkatu 2-4,0.0,Person Account,Promotional discussion,Virtual MS Teams,30.0,0012o00002iEYBQ,BC


In [28]:
calls_subset[calls_subset['account_name'] == 'Jonathan Lind']

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
160,C010353070,HCP,0.0,2021-09-08,2021-09-08 11:00:00,Jonathan Lind,"Hietalahdenkatu 2-4, Vaasa, Vaasa 65130 Finland",Hietalahdenkatu 2-4,0.0,Person Account,Promotional discussion,Virtual MS Teams,30.0,0012o00002iEYBQ,BC


In our project, we are only looking at Sweden. So, Jonathan Lind and the corresponding Finnish hospital are removed.
In addition, there is no reliable info on the internet on where Jonathan Lind is working (as BC HCP in Sweden or Finland).

In order to make sure that two different addresses which refer to the same hospital are identical, we create `address_mapping_dict`, a dictionary that assigns the same address to addresses which refer to the same hospital. These addresses are the same also used above for the targets data frame.

In [29]:
address_mapping_dict = {
    'Onkologikliniken, Centrallasarettet 1, VÄSTERÅS 72335': 'Sigtunagatan, 721 89 Västerås, Schweden',
    'Onkologiska kliniken, Södra Grev Rosengatan, ÖREBRO 70362': 'Södra Grev Rosengatan, 701 85 Örebro, Schweden',
    'Medicin- och onkologkliniken, Brämhultsvägen 53, BORÅS 50455': 'Brämhultsvägen 53, 501 82 Borås, Schweden',
    'Kirurgkliniken, Lasarettsvägen, KARLSKRONA 37141': 'Lasarettsvägen, 371 41 Karlskrona, Schweden',
    'Cancercentrum, Akutvägen, UMEÅ 90737': 'Daniel Naezéns väg, 907 37 Umeå, Schweden',
    'Södra Älvsborgs Sjukhus Borås, Brämhultsvägen 53, BORÅS 50455': 'Brämhultsvägen 53, 501 82 Borås, Schweden',
    'Onkologkliniken, Sjukhusvägen 85, UPPSALA 75185': 'Sjukhusvägen, 751 85 Uppsala, Schweden',
    'Barn- och ungdomsmedicin, Jan Waldenströms gata 18, MALMÖ 21428': 'Carl-Bertil Laurells gata 9, 214 28 Malmö, Schweden',
    'Barn- och ungdomskliniken, Lasarettsvägen 21, SUNDSVALL 85643': 'Lasarettsvägen 21, 856 43 Sundsvall, Schweden',
    'Barn- och ungdomskliniken, Lasarettsvägen 1, KALMAR 39244': 'Lasarettsvägen 8, 392 44 Kalmar, Schweden',
    'KFUE, Akademiska sjukhuset Ing 78 5 tr, UPPSALA 75185': 'Sjukhusvägen, 751 85 Uppsala, Schweden',
    'Apoteket Berguven, Lasarettsvägen, HALMSTAD 30233': 'Lasarettsvägen, 302 33 Halmstad, Schweden',
    'Barncancerenheten Q6:04, Eugeniavägen 23, SOLNA 17164': 'Eugeniavägen 3, 171 64 Solna, Schweden',
    'Akademiska sjukhuset Ing 78 5 tr, UPPSALA, Landstinget i Uppsala län 75185 Sweden': 'Sjukhusvägen, 751 85 Uppsala, Schweden',
    'Sjukhusbacken 10, STOCKHOLM, Stockholms läns landsting 11861 Sweden': 'Sjukhusbacken 10, 118 83 Stockholm, Schweden',
    #'nan',
    'Lasarettsgatan 48, LUND, Skåne län 22241 Sweden': 'Entrégatan 7, 222 42 Lund, Schweden',
    'S:t Göransgatan 5, VISBY, Gotlands kommun 62155 Sweden': 'S:t Göransgatan 5, 621 55 Visby, Schweden',
    'Eugeniavägen 23, SOLNA, Stockholms läns landsting 17164 Sweden': 'Eugeniavägen 3, 171 64 Solna, Schweden',
    'Sankt Göransplan 1, STOCKHOLM, Stockholms läns landsting 11281': 'Sankt Göransplan 1, 112 19 Stockholm, Schweden',
    'Rosenborgsgatan 2, KARLSTAD, Landstinget i Värmland 65230 Sweden': 'Rosenborgsgatan 9, 652 30 Karlstad, Schweden',
    'J A Hedlunds Väg 5, KRISTIANSTAD, Skåne län 29133 Sweden':  'Lund, Schweden', 
    'Mellbygatan 11-13, LIDKÖPING, Västra Götalands län 53151 Sweden': 'Mellbygatan 11, 531 51 Lidköping, Schweden',
    'Kungsvägen, ESKILSTUNA, Södermanlands län D 63340 Sweden': 'Kungsvägen 42, 633 49 Eskilstuna, Schweden',
    'Blå Stråket 6, GÖTEBORG, Västra Götalands län 41346 Sweden': 'Blå stråket 5, 413 45 Göteborg, Schweden',
    'Eugeniavägen 3, SOLNA, Stockholms län A-B 17164 Sweden': 'Eugeniavägen 3, 171 64 Solna, Schweden',
    'Karolinska Vägen, SOLNA, Stockholms län A-B 17165 Sweden': 'Eugeniavägen 3, 171 64 Solna, Schweden',
    'Rosenborgsgatan 2, KARLSTAD, Värmlands län S 65230 Sweden': 'Rosenborgsgatan 9, 652 30 Karlstad, Schweden',
    'Akutvägen, UMEÅ, Västerbottens län AC 90737 Sweden': 'Daniel Naezéns väg, 907 37 Umeå, Schweden',
    'Entregatan 7, LUND, Skåne län 22241 Sweden': 'Entrégatan 7, 222 42 Lund, Schweden',
    'Lasarettsvägen, KARLSKRONA, Blekinge län K 37141 Sweden': 'Lasarettsvägen, 371 41 Karlskrona, Schweden',
    'Charlotte Yhlens gata 10, HELSINGBORG, Skåne län 25223 Sweden': 'Charlotte Yhlens gata 10, 252 23 Helsingborg, Schweden',
    'Strandvägen 8, VÄXJÖ, Kronobergs län G 35234 Sweden': 'Strandvägen 8, 352 34 Växjö, Schweden',
    'Blå stråket 6, GÖTEBORG, Västra Götalands län 41346 Sweden': 'Blå stråket 5, 413 45 Göteborg, Schweden',
    'Brämhultsvägen 53, BORÅS, Västra Götalands län 50455 Sweden': 'Brämhultsvägen 53, 501 82 Borås, Schweden',
    'Lövängsvägen, SKÖVDE, Västra Götalands län 54142 Sweden': 'Lövängsvägen, 541 42 Skövde, Schweden',
    'Valhallavägen 91, STOCKHOLM, Stockholms län A-B 11428 Sweden': 'Valhallavägen 91, 114 86 Stockholm, Schweden',
    'Lasarettsvägen 1, KALMAR, Kalmar län H 39244 Sweden': 'Lasarettsvägen 8, 392 44 Kalmar, Schweden',
    'Centrallasarettet 1, VÄSTERÅS, Västmanlands län U 72335 Sweden': 'Sigtunagatan, 721 89 Västerås, Schweden',
    'Lasarettsvägen 21, SUNDSVALL, Västernorrlands län Y 85643 Sweden': 'Lasarettsvägen 21, 856 43 Sundsvall, Schweden',
    'Garnisonsvägen 10, LINKÖPING, Östergötlands län E 58750 Sweden': 'Universitetssjukhuset, 581 85 Linköping, Schweden'}

We now assign the new addresses to the old addresses.

In [30]:
calls_subset['address'] = calls_subset['address'].apply(lambda x: address_mapping_dict.get(x))

In addition, we do the following steps:
* correct multiple assigments
* plug in information for NaN's in `address` and delete HCPs for whom no information could be found
* correct unknown hospitals (delete Marit Jenset, correct hospital of Lena Axelsson, delete Jonathan Lind)

In [31]:
# correct multiple assigments
calls_subset.loc[calls_subset['account_name'] == 'Björn Sigurdsson', ['address']] = 'Carl-Bertil Laurells gata 9, 214 28 Malmö, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Hanna Eriksson', ['address']] = 'Eugeniavägen 3, 171 64 Solna, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Muhammad Kadum', ['address']] = 'Lund, Schweden'

# plug in information for nan's
calls_subset.loc[calls_subset['account_name'] == 'Ana Sequeira De V. Dias Carneiro', ['address']] = 'Lund, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Bengt Tholander', ['address']] = 'Sjukhusvägen, 751 85 Uppsala, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Björn Båtshake', ['address']] = 'Lund, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Eva Marie Erfurth', ['address']] = 'Lund, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Helena Mörse', ['address']] = 'Lund, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Jeffrey Yachnin', ['address']] = 'Eugeniavägen 3, 171 64 Solna, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Johan Malmros', ['address']] = 'Eugeniavägen 3, 171 64 Solna, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Josefin Fernebro', ['address']] = 'Eugeniavägen 3, 171 64 Solna, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Karin Hallen', ['address']] = 'Sjukhusvägen, 751 85 Uppsala, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Lars Hjorth', ['address']] = 'Lund, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Lars Larsson', ['address']] = 'Eugeniavägen 3, 171 64 Solna, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Magnus Sabel', ['address']] = 'Blå stråket 5, 413 45 Göteborg, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Petter Svenberg', ['address']] = 'Eugeniavägen 3, 171 64 Solna, Schweden'
calls_subset.loc[calls_subset['account_name'] == 'Tatiana von Bahr Greenwood', ['address']] = 'Eugeniavägen 3, 171 64 Solna, Schweden'
# delete 'Ingrid Rosengren'
calls_subset = calls_subset[calls_subset['account_name'] != 'Ingrid Rosengren']
# delete 'Per Nyman'
calls_subset = calls_subset[calls_subset['account_name'] != 'Per Nyman']

# correct unknown hospitals
# delete Marit Jenset
calls_subset = calls_subset[calls_subset['account_name'] != 'Marit Jenset']
# correct hospital for Lena Axelsson
calls_subset.loc[calls_subset['account_name'] == 'Lena Axelsson', ['address']] = 'Lund, Schweden'
# delete Jonathan Lind
calls_subset = calls_subset[calls_subset['account_name'] != 'Jonathan Lind']

calls_subset

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
0,C004360771,HCP,0.0,2020-01-16,2020-01-16 21:17:00,David Sterner,"Sigtunagatan, 721 89 Västerås, Schweden",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002kTsAl,Melanoma
1,C004360773,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Lars Sandberg,"Sigtunagatan, 721 89 Västerås, Schweden",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEVS2,Melanoma
2,C004360776,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Cecilia Kamali,"Sigtunagatan, 721 89 Västerås, Schweden",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iDoJT,Melanoma
3,C004360777,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Åsa Jonforsen,"Sigtunagatan, 721 89 Västerås, Schweden",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEWSI,Melanoma
4,C004360780,HCP,0.0,2020-01-16,2020-01-16 21:17:00,David Goldstein,"Sigtunagatan, 721 89 Västerås, Schweden",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002kTlW5,Melanoma
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,C010427094,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Pernilla Nilsson,"Universitetssjukhuset, 581 85 Linköping, Schweden",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002kTp20,Melanoma
175,C010427095,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Sandra Sjöstrand,"Universitetssjukhuset, 581 85 Linköping, Schweden",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEdyx,Melanoma
176,C010427096,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Pia Törnblom,"Universitetssjukhuset, 581 85 Linköping, Schweden",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iDrMc,Melanoma
177,C010427097,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Karolina Vernmark,"Universitetssjukhuset, 581 85 Linköping, Schweden",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEQjh,Melanoma


The relevant columns to get the number of hospitals and HCPs in the BC and Melanoma regions are `account_name`, `address`, `indication`. We therefore select these columns now.

In [32]:
calls_subset = calls_subset.filter(items=['account_name', 'address', 'indication'])
calls_subset

Unnamed: 0,account_name,address,indication
0,David Sterner,"Sigtunagatan, 721 89 Västerås, Schweden",Melanoma
1,Lars Sandberg,"Sigtunagatan, 721 89 Västerås, Schweden",Melanoma
2,Cecilia Kamali,"Sigtunagatan, 721 89 Västerås, Schweden",Melanoma
3,Åsa Jonforsen,"Sigtunagatan, 721 89 Västerås, Schweden",Melanoma
4,David Goldstein,"Sigtunagatan, 721 89 Västerås, Schweden",Melanoma
...,...,...,...
174,Pernilla Nilsson,"Universitetssjukhuset, 581 85 Linköping, Schweden",Melanoma
175,Sandra Sjöstrand,"Universitetssjukhuset, 581 85 Linköping, Schweden",Melanoma
176,Pia Törnblom,"Universitetssjukhuset, 581 85 Linköping, Schweden",Melanoma
177,Karolina Vernmark,"Universitetssjukhuset, 581 85 Linköping, Schweden",Melanoma


The data frame `calls_subset` is now prepared to be used for the HCP and hospital counts.

## 3. Calls: Count of calls for all the BC and Melanoma regions

In [33]:
targets_list = targets['account_name'].unique().tolist()

calls_to_targets = calls[calls['account_name'].isin(targets_list)].reset_index(drop=True)
calls_to_targets

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
0,C004360772,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Michael Sihver,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEVSM,Melanoma
1,C004360774,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Cecilia Nilsson,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEZpE,Melanoma
2,C004360775,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Zuzana Lovasová,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEYvH,Melanoma
3,C004360778,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Paul Holmer,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002kTA44,Melanoma
4,C004360779,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Helena Granstam Björneklett,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEeBl,Melanoma
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,C010414398,HCP,0.0,2021-09-14,2021-09-14 12:00:00,Elin Jänes,"Lasarettsvägen 21, SUNDSVALL, Västernorrlands ...",Lasarettsvägen 21,0.0,Person Account,Promotional discussion,Virtual MS Teams,30.0,0012o00002iEVqj,Melanoma
245,C010432790,HCP,0.0,2021-09-14,2021-09-14 12:00:00,Anna-Karin Wennstig,"Lasarettsvägen 21, SUNDSVALL, Västernorrlands ...",Lasarettsvägen 21,0.0,Person Account,Promotional discussion,Virtual MS Teams,60.0,0012o00002kTCww,Melanoma
246,C010432734,HCP,0.0,2021-09-14,2021-09-14 12:00:00,Petra Flygare,"Lasarettsvägen 21, SUNDSVALL, Västernorrlands ...",Lasarettsvägen 21,0.0,Person Account,Promotional discussion,Virtual MS Teams,60.0,0012o00002iEU5t,Melanoma
247,C010432747,HCP,0.0,2021-09-14,2021-09-14 12:00:00,Elin Jänes,"Lasarettsvägen 21, SUNDSVALL, Västernorrlands ...",Lasarettsvägen 21,0.0,Person Account,Promotional discussion,Virtual MS Teams,60.0,0012o00002iEVqj,Melanoma


In [34]:
# take all calls at the moment, can later filter for 'promotional discussion' 
calls_to_targets = calls_to_targets[calls_to_targets['call_focus'] == 'Promotional discussion'].reset_index(drop=True)
# filter out all calls after 2021-08-31
calls_to_targets = calls_to_targets[calls_to_targets['date'] < "2021-09-01"]
calls_to_targets

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
0,C004360772,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Michael Sihver,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEVSM,Melanoma
1,C004360774,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Cecilia Nilsson,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEZpE,Melanoma
2,C004360775,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Zuzana Lovasová,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEYvH,Melanoma
3,C004360778,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Paul Holmer,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002kTA44,Melanoma
4,C004360779,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Helena Granstam Björneklett,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEeBl,Melanoma
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,C010242106,HCP,0.0,2021-08-27,2021-08-27 10:00:00,Maria Ekholm,"Sjukhusgatan, JÖNKÖPING, Jönköpings län F 5518...",Sjukhusgatan,0.0,Person Account,Promotional discussion,Virtual Other,10.0,0012o00002iEezE,BC
113,C010242107,HCP,0.0,2021-08-27,2021-08-27 10:00:00,Christine Lundgren,"Sjukhusgatan, JÖNKÖPING, Jönköpings län F 5518...",Sjukhusgatan,0.0,Person Account,Promotional discussion,Virtual Other,10.0,0012o00002kT9YL,BC
114,C010242108,HCP,0.0,2021-08-27,2021-08-27 10:00:00,Ida Spång Rosén,"Sjukhusgatan, JÖNKÖPING, Jönköpings län F 5518...",Sjukhusgatan,0.0,Person Account,Promotional discussion,Virtual Other,10.0,0012o00002kTEOj,BC
115,C010242038,HCP,1.0,2021-08-27,2021-08-27 10:09:00,Henrik Lindman,"Sjukhusvägen 85, UPPSALA, Uppsala län C 75185 ...",Sjukhusvägen 85,0.0,Person Account,Promotional discussion,Voice-only Call,30.0,0012o00002kTAh5,BC


In [35]:
calls_to_targets = calls_to_targets.filter(items=['date', 'account_name', 'address', 'call_focus', 'indication'])
calls_to_targets

Unnamed: 0,date,account_name,address,call_focus,indication
0,2020-01-16,Michael Sihver,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Promotional discussion,Melanoma
1,2020-01-16,Cecilia Nilsson,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Promotional discussion,Melanoma
2,2020-01-16,Zuzana Lovasová,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Promotional discussion,Melanoma
3,2020-01-16,Paul Holmer,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Promotional discussion,Melanoma
4,2020-01-16,Helena Granstam Björneklett,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Promotional discussion,Melanoma
...,...,...,...,...,...
112,2021-08-27,Maria Ekholm,"Sjukhusgatan, JÖNKÖPING, Jönköpings län F 5518...",Promotional discussion,BC
113,2021-08-27,Christine Lundgren,"Sjukhusgatan, JÖNKÖPING, Jönköpings län F 5518...",Promotional discussion,BC
114,2021-08-27,Ida Spång Rosén,"Sjukhusgatan, JÖNKÖPING, Jönköpings län F 5518...",Promotional discussion,BC
115,2021-08-27,Henrik Lindman,"Sjukhusvägen 85, UPPSALA, Uppsala län C 75185 ...",Promotional discussion,BC


In [36]:
# We already know the targets' addresses from targets_subsets
# create a dictionary from account_name and address
address_dict_targets = dict(zip(targets_subset.account_name, targets_subset.address))
address_dict_targets

{'Adel Bader Hamdalla': 'Brämhultsvägen 53, 501 82 Borås, Schweden',
 'Aglaia Schiza': 'Sjukhusvägen, 751 85 Uppsala, Schweden',
 'Agneta Nordin Danfors': 'Drottninggatan 68, 111 21 Stockholm, Schweden',
 'Ahmed Abbas Albu-Kareem': 'Universitetssjukhuset, 581 85 Linköping, Schweden',
 'Alaa Haidar': 'Lasarettsvägen, 302 33 Halmstad, Schweden',
 'Ana Bosch Campos': 'Lund, Schweden',
 'Andreas Nearchou': 'Kungsvägen 42, 633 49 Eskilstuna, Schweden',
 'Ulrika Bergqvist': 'Lasarettsvägen 29, 931 41 Skellefteå, Schweden',
 'Ann Charlotte Dreifaldt': 'Södra Grev Rosengatan, 701 85 Örebro, Schweden',
 'Elisabeth Ryd Ausén': 'Sankt Göransplan 1, 112 19 Stockholm, Schweden',
 'Marie Santonsson': 'Lasarettsvägen, 371 41 Karlskrona, Schweden',
 'Anna Nordenskjöld': 'Brämhultsvägen 53, 501 82 Borås, Schweden',
 'Anna von Wachenfeldt Väppling': 'Sjukhusbacken 10, 118 83 Stockholm, Schweden',
 'Anna-Karin Tzikas': 'Fjällvägen 9, 451 53 Uddevalla, Schweden',
 'Anna-Karin Wennstig': 'Lasarettsvägen 21

In [37]:
calls_to_targets['address'] = calls_to_targets['account_name'].apply(lambda x: address_dict_targets.get(x))
calls_to_targets

Unnamed: 0,date,account_name,address,call_focus,indication
0,2020-01-16,Michael Sihver,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma
1,2020-01-16,Cecilia Nilsson,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma
2,2020-01-16,Zuzana Lovasová,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma
3,2020-01-16,Paul Holmer,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma
4,2020-01-16,Helena Granstam Björneklett,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma
...,...,...,...,...,...
112,2021-08-27,Maria Ekholm,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC
113,2021-08-27,Christine Lundgren,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC
114,2021-08-27,Ida Spång Rosén,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC
115,2021-08-27,Henrik Lindman,"Sjukhusvägen, 751 85 Uppsala, Schweden",Promotional discussion,BC


We create a new column `locality` that contains the city an HCP belongs to.

In [38]:
for i in range(len(calls_to_targets)):
    calls_to_targets.at[i, 'locality'] = calls_to_targets.at[i, 'address'].split(', ')[-2].split(' ')[-1]
    
calls_to_targets

Unnamed: 0,date,account_name,address,call_focus,indication,locality
0,2020-01-16,Michael Sihver,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås
1,2020-01-16,Cecilia Nilsson,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås
2,2020-01-16,Zuzana Lovasová,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås
3,2020-01-16,Paul Holmer,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås
4,2020-01-16,Helena Granstam Björneklett,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås
...,...,...,...,...,...,...
112,2021-08-27,Maria Ekholm,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping
113,2021-08-27,Christine Lundgren,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping
114,2021-08-27,Ida Spång Rosén,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping
115,2021-08-27,Henrik Lindman,"Sjukhusvägen, 751 85 Uppsala, Schweden",Promotional discussion,BC,Uppsala


In order to add `municipality` - which unfortunately is not always identical to the brick - we merge `calls_to_targets` with `svenska_stader`.

In [39]:
# Merge with 'svenska_stader'
calls_to_targets = pd.merge(
    calls_to_targets, 
    svenska_stader, 
    on = 'locality',  
    how = 'left'
)
calls_to_targets

Unnamed: 0,date,account_name,address,call_focus,indication,locality,municipality
0,2020-01-16,Michael Sihver,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
1,2020-01-16,Cecilia Nilsson,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
2,2020-01-16,Zuzana Lovasová,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
3,2020-01-16,Paul Holmer,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
4,2020-01-16,Helena Granstam Björneklett,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
...,...,...,...,...,...,...,...
112,2021-08-27,Maria Ekholm,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
113,2021-08-27,Christine Lundgren,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
114,2021-08-27,Ida Spång Rosén,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
115,2021-08-27,Henrik Lindman,"Sjukhusvägen, 751 85 Uppsala, Schweden",Promotional discussion,BC,Uppsala,Uppsala


When looking at the values of `municipality` in `calls_to_targets` and comparing it to the values of `brick` in `mapping`, some things are striking:

* Solna (a suburb of Stockholm) has no mapped municipality
* It turns out that Solna is part of Solna Municipality 
* The city of Solna is part of the Stockholm urban area


* The locality of Visby is assigned to the municipality of Gotland but in mapping, Visby is a brick of its own. Therefore, change the municipality of Visby from Gotland to Visby.
* The locality of Göteborg is assigned to the municipality of Göteborg but in mapping, Göteborg is split into several bricks. Therefore, change the municipality of Göteborg from Göteborg to e.g. 'Göteborg-Centrum + V'.
* The locality of Kalmar is assigned to the municipality of Kalmar but in mapping, Kalmar appears as part of Kalmar/Nybro. Therefore, change the municipality of Kalmar from Kalmar to Kalmar/Nybro.
* The locality of Solna is not present in svenska_stader. As Solna is part of the Stockholm urban area, assign the locality Solna to e.g the municipality of 'Stockholm-NO'.
* The locality of Stockholm is assigned to the municipality of Stockholm but in mapping, Stockholm is split into several bricks. Therefore, change the municipality of Stockholm from Stockholm to e.g. 'Stockholm-NO'.
* The locality of Lidköping is assigned to the municipality of Lidköping but in mapping, Lidköping appears as part of Lidköping/Skara. Therefore, change the municipality of Lidköping from Lidköping to Lidköping/Skara.
* The locality of Helsingborg is assigned to the municipality of Helsingborg but in mapping, Helsingborg appears as part of Helsingborg/Landskrona. Therefore, change the municipality of Helsingborg from Helsingborg to Helsingborg/Landskrona.

In [40]:
for i in range(len(calls_to_targets)):
    if calls_to_targets.at[i, 'locality'] == 'Solna': #['municipality'] = 'Solna'
        calls_to_targets.at[i, 'municipality'] = 'Stockholm-NO' # instead of 'Solna'
calls_to_targets['municipality'] = calls_to_targets['municipality'].replace('Gotland', 'Visby')
calls_to_targets['municipality'] = calls_to_targets['municipality'].replace('Göteborg', 'Göteborg-Centrum + V')
calls_to_targets['municipality'] = calls_to_targets['municipality'].replace('Kalmar', 'Kalmar/Nybro')
calls_to_targets['municipality'] = calls_to_targets['municipality'].replace('Stockholm', 'Stockholm-NO')
calls_to_targets['municipality'] = calls_to_targets['municipality'].replace('Lidköping', 'Lidköping/Skara')
calls_to_targets['municipality'] = calls_to_targets['municipality'].replace('Helsingborg', 'Helsingborg/Landskrona')

calls_to_targets

Unnamed: 0,date,account_name,address,call_focus,indication,locality,municipality
0,2020-01-16,Michael Sihver,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
1,2020-01-16,Cecilia Nilsson,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
2,2020-01-16,Zuzana Lovasová,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
3,2020-01-16,Paul Holmer,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
4,2020-01-16,Helena Granstam Björneklett,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
...,...,...,...,...,...,...,...
112,2021-08-27,Maria Ekholm,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
113,2021-08-27,Christine Lundgren,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
114,2021-08-27,Ida Spång Rosén,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
115,2021-08-27,Henrik Lindman,"Sjukhusvägen, 751 85 Uppsala, Schweden",Promotional discussion,BC,Uppsala,Uppsala


In [41]:
calls_to_targets['date'] = calls_to_targets['date'].to_numpy().astype('datetime64[M]')
calls_to_targets

Unnamed: 0,date,account_name,address,call_focus,indication,locality,municipality
0,2020-01-01,Michael Sihver,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
1,2020-01-01,Cecilia Nilsson,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
2,2020-01-01,Zuzana Lovasová,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
3,2020-01-01,Paul Holmer,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
4,2020-01-01,Helena Granstam Björneklett,"Sigtunagatan, 721 89 Västerås, Schweden",Promotional discussion,Melanoma,Västerås,Västerås
...,...,...,...,...,...,...,...
112,2021-08-01,Maria Ekholm,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
113,2021-08-01,Christine Lundgren,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
114,2021-08-01,Ida Spång Rosén,"Sjukhusgatan, 553 05 Jönköping, Schweden",Promotional discussion,BC,Jönköping,Jönköping
115,2021-08-01,Henrik Lindman,"Sjukhusvägen, 751 85 Uppsala, Schweden",Promotional discussion,BC,Uppsala,Uppsala


In [42]:
calls_to_targets['date'].unique()

array(['2020-01-01T00:00:00.000000000', '2020-02-01T00:00:00.000000000',
       '2020-03-01T00:00:00.000000000', '2020-05-01T00:00:00.000000000',
       '2020-09-01T00:00:00.000000000', '2020-10-01T00:00:00.000000000',
       '2020-11-01T00:00:00.000000000', '2021-02-01T00:00:00.000000000',
       '2021-03-01T00:00:00.000000000', '2021-04-01T00:00:00.000000000',
       '2021-05-01T00:00:00.000000000', '2021-06-01T00:00:00.000000000',
       '2021-08-01T00:00:00.000000000'], dtype='datetime64[ns]')

In order to be able to merge `mapping` to `calls_to_targets`, we need to ensure that the `brick` variable in `mapping` and the `municipality` variable in `calls_to_targets` have the same format, i.e., the two digits at the start of any entry in `brick` must be removed.

In [43]:
mapping['brick'] = mapping['brick'].astype(str)
for i in range(len(mapping)):
    brick_split_list = mapping['brick'][i].split(' ')[1:]
    mapping.at[i, 'brick'] = ' '.join(brick_split_list)
mapping

Unnamed: 0,brick,sweden_bc,sweden_me
0,Norrtälje,Stockholm,Stockholm ONCO
1,Uppsala,Uppsala,Uppsala ONCO
2,Enköping,Uppsala,Uppsala ONCO
3,Nyköping,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO
4,Katrineholm,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO
...,...,...,...
73,Kungälv,Västra Götaland-Göteborg,Västra Götaland-Göteborg ONCO
74,Lerum/Alingsås,Västra Götaland-Alingsås,Västra Götaland-SÄS ONCO
75,Malmö,Skåne-Lund,Skåne ONCO
76,Lund,Skåne-Lund,Skåne ONCO


In [44]:
# create mappings table for all dates
counts = mapping.copy()
counts_by_date = pd.DataFrame()
for date in calls_to_targets['date'].unique():
    counts_ex = counts.copy()
    counts_ex['date'] = date
    counts_by_date = pd.concat([counts_by_date, counts_ex], ignore_index=True)
counts_by_date

Unnamed: 0,brick,sweden_bc,sweden_me,date
0,Norrtälje,Stockholm,Stockholm ONCO,2020-01-01
1,Uppsala,Uppsala,Uppsala ONCO,2020-01-01
2,Enköping,Uppsala,Uppsala ONCO,2020-01-01
3,Nyköping,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO,2020-01-01
4,Katrineholm,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO,2020-01-01
...,...,...,...,...
1009,Kungälv,Västra Götaland-Göteborg,Västra Götaland-Göteborg ONCO,2021-08-01
1010,Lerum/Alingsås,Västra Götaland-Alingsås,Västra Götaland-SÄS ONCO,2021-08-01
1011,Malmö,Skåne-Lund,Skåne ONCO,2021-08-01
1012,Lund,Skåne-Lund,Skåne ONCO,2021-08-01


We can now create the call counts for the BC and Melanoma regions using the function `aggregate_by_regions_and_dates()`.

In [45]:
calls_counts = aggregate_by_regions_and_dates(counts_by_date, calls_to_targets, 'address', 'calls')
calls_counts

Unnamed: 0,date,territory,calls
0,2020-01-01,Blekinge,0.0
1,2020-01-01,Dalarna,0.0
2,2020-01-01,Gävleborg-Gävle,0.0
3,2020-01-01,Halland-Halmstad,0.0
4,2020-01-01,Halland-Varberg-Falkenberg,0.0
...,...,...,...
736,2021-08-01,Västmanland-Västerås ONCO,0.0
737,2021-08-01,Västra Götaland-Göteborg ONCO,1.0
738,2021-08-01,Västra Götaland-SÄS ONCO,0.0
739,2021-08-01,Örebro-Örebro ONCO,0.0


In [46]:
calls_counts['calls'].sum()

117.0

In [60]:
# Save the new dataset
    
if not os.path.exists(route0):
    os.mkdir(route0)

print("saving file corresponding to call_counts.pkl")
calls_counts.to_pickle(f"{route0}/call_counts.pkl")
pd.read_pickle(f"{route0}/call_counts.pkl")

saving file corresponding to call_counts.pkl


Unnamed: 0,date,territory,calls
0,2020-01-01,Blekinge,0.0
1,2020-01-01,Dalarna,0.0
2,2020-01-01,Gävleborg-Gävle,0.0
3,2020-01-01,Halland-Halmstad,0.0
4,2020-01-01,Halland-Varberg-Falkenberg,0.0
...,...,...,...
736,2021-08-01,Västmanland-Västerås ONCO,0.0
737,2021-08-01,Västra Götaland-Göteborg ONCO,1.0
738,2021-08-01,Västra Götaland-SÄS ONCO,0.0
739,2021-08-01,Örebro-Örebro ONCO,0.0


## 4. HCP, hospital and calls counts per BC and Melanoma region

In [47]:
targets_subset

Unnamed: 0,account_name,top_account_name,indication,address
0,Adel Bader Hamdalla,Södra Älvsborgs Sjukhus Borås,BC,"Brämhultsvägen 53, 501 82 Borås, Schweden"
1,Aglaia Schiza,Akademiska sjukhuset,BC,"Sjukhusvägen, 751 85 Uppsala, Schweden"
2,Agneta Nordin Danfors,Drottningmottagningen,BC,"Drottninggatan 68, 111 21 Stockholm, Schweden"
3,Ahmed Abbas Albu-Kareem,Universitetssjukhuset Linköping,BC,"Universitetssjukhuset, 581 85 Linköping, Schweden"
4,Alaa Haidar,Hallands sjukhus Halmstad,BC,"Lasarettsvägen, 302 33 Halmstad, Schweden"
...,...,...,...,...
224,Ylva Holmgren Stenlund,Norrlands Universitetssjukhus,Melanoma,"Daniel Naezéns väg, 907 37 Umeå, Schweden"
225,Anna Nyberg,Länssjukhuset Ryhov,BC,"Sjukhusgatan, 553 05 Jönköping, Schweden"
226,Göran Carlstedt,Centrallasarettet Växjö,Melanoma,"Strandvägen 8, 352 34 Växjö, Schweden"
227,Mikael Wallander,Länssjukhuset Ryhov,Melanoma,"Sjukhusgatan, 553 05 Jönköping, Schweden"


We create a new column `locality` that contains the city an HCP belongs to.

In [48]:
# For now, only continue with 'targets_subset'
for i in range(len(targets_subset)):
    targets_subset.at[i, 'locality'] = targets_subset.at[i, 'address'].split(', ')[-2].split(' ')[-1]
    
targets_subset

Unnamed: 0,account_name,top_account_name,indication,address,locality
0,Adel Bader Hamdalla,Södra Älvsborgs Sjukhus Borås,BC,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås
1,Aglaia Schiza,Akademiska sjukhuset,BC,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala
2,Agneta Nordin Danfors,Drottningmottagningen,BC,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm
3,Ahmed Abbas Albu-Kareem,Universitetssjukhuset Linköping,BC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping
4,Alaa Haidar,Hallands sjukhus Halmstad,BC,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad
...,...,...,...,...,...
224,Ylva Holmgren Stenlund,Norrlands Universitetssjukhus,Melanoma,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå
225,Anna Nyberg,Länssjukhuset Ryhov,BC,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping
226,Göran Carlstedt,Centrallasarettet Växjö,Melanoma,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö
227,Mikael Wallander,Länssjukhuset Ryhov,Melanoma,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping


In order to get the corresponding municipality - which unfortunately is not always identical to the brick - we merge `targets_subset` with `svenska_stader`.

In [49]:
# Merge with 'svenska_stader'
targets_subset = pd.merge(
    targets_subset, 
    svenska_stader, 
    on = 'locality',  
    how = 'left'
)
targets_subset

Unnamed: 0,account_name,top_account_name,indication,address,locality,municipality
0,Adel Bader Hamdalla,Södra Älvsborgs Sjukhus Borås,BC,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås,Borås
1,Aglaia Schiza,Akademiska sjukhuset,BC,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala
2,Agneta Nordin Danfors,Drottningmottagningen,BC,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm,Stockholm
3,Ahmed Abbas Albu-Kareem,Universitetssjukhuset Linköping,BC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping,Linköping
4,Alaa Haidar,Hallands sjukhus Halmstad,BC,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad,Halmstad
...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,Norrlands Universitetssjukhus,Melanoma,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå
225,Anna Nyberg,Länssjukhuset Ryhov,BC,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping
226,Göran Carlstedt,Centrallasarettet Växjö,Melanoma,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö,Växjö
227,Mikael Wallander,Länssjukhuset Ryhov,Melanoma,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping


When looking at the values of `municipality` in `targets_subset` and comparing it to the values of `brick` in `mapping`, some things are striking:

* Solna (a suburb of Stockholm) has no mapped municipality
* It turns out that Solna is part of Solna Municipality 
* The city of Solna is part of the Stockholm urban area


* The locality of Visby is assigned to the municipality of Gotland but in mapping, Visby is a brick of its own. Therefore, change the municipality of Visby from Gotland to Visby.
* The locality of Göteborg is assigned to the municipality of Göteborg but in mapping, Göteborg is split into several bricks. Therefore, change the municipality of Göteborg from Göteborg to e.g. 'Göteborg-Centrum + V'.
* The locality of Kalmar is assigned to the municipality of Kalmar but in mapping, Kalmar appears as part of Kalmar/Nybro. Therefore, change the municipality of Kalmar from Kalmar to Kalmar/Nybro.
* The locality of Solna is not present in svenska_stader. As Solna is part of the Stockholm urban area, assign the locality Solna to e.g the municipality of 'Stockholm-NO'.
* The locality of Stockholm is assigned to the municipality of Stockholm but in mapping, Stockholm is split into several bricks. Therefore, change the municipality of Stockholm from Stockholm to e.g. 'Stockholm-NO'.
* The locality of Lidköping is assigned to the municipality of Lidköping but in mapping, Lidköping appears as part of Lidköping/Skara. Therefore, change the municipality of Lidköping from Lidköping to Lidköping/Skara.
* The locality of Helsingborg is assigned to the municipality of Helsingborg but in mapping, Helsingborg appears as part of Helsingborg/Landskrona. Therefore, change the municipality of Helsingborg from Helsingborg to Helsingborg/Landskrona.

In [50]:
for i in range(len(targets_subset)):
    if targets_subset.at[i, 'locality'] == 'Solna': #['municipality'] = 'Solna'
        targets_subset.at[i, 'municipality'] = 'Stockholm-NO' # instead of 'Solna'
targets_subset['municipality'] = targets_subset['municipality'].replace('Gotland', 'Visby')
targets_subset['municipality'] = targets_subset['municipality'].replace('Göteborg', 'Göteborg-Centrum + V')
targets_subset['municipality'] = targets_subset['municipality'].replace('Kalmar', 'Kalmar/Nybro')
targets_subset['municipality'] = targets_subset['municipality'].replace('Stockholm', 'Stockholm-NO')
targets_subset['municipality'] = targets_subset['municipality'].replace('Lidköping', 'Lidköping/Skara')
targets_subset['municipality'] = targets_subset['municipality'].replace('Helsingborg', 'Helsingborg/Landskrona')

targets_subset

Unnamed: 0,account_name,top_account_name,indication,address,locality,municipality
0,Adel Bader Hamdalla,Södra Älvsborgs Sjukhus Borås,BC,"Brämhultsvägen 53, 501 82 Borås, Schweden",Borås,Borås
1,Aglaia Schiza,Akademiska sjukhuset,BC,"Sjukhusvägen, 751 85 Uppsala, Schweden",Uppsala,Uppsala
2,Agneta Nordin Danfors,Drottningmottagningen,BC,"Drottninggatan 68, 111 21 Stockholm, Schweden",Stockholm,Stockholm-NO
3,Ahmed Abbas Albu-Kareem,Universitetssjukhuset Linköping,BC,"Universitetssjukhuset, 581 85 Linköping, Schweden",Linköping,Linköping
4,Alaa Haidar,Hallands sjukhus Halmstad,BC,"Lasarettsvägen, 302 33 Halmstad, Schweden",Halmstad,Halmstad
...,...,...,...,...,...,...
224,Ylva Holmgren Stenlund,Norrlands Universitetssjukhus,Melanoma,"Daniel Naezéns väg, 907 37 Umeå, Schweden",Umeå,Umeå
225,Anna Nyberg,Länssjukhuset Ryhov,BC,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping
226,Göran Carlstedt,Centrallasarettet Växjö,Melanoma,"Strandvägen 8, 352 34 Växjö, Schweden",Växjö,Växjö
227,Mikael Wallander,Länssjukhuset Ryhov,Melanoma,"Sjukhusgatan, 553 05 Jönköping, Schweden",Jönköping,Jönköping


The table `mapping` is already in the correct format, i.e., the `brick` variable in `mapping` and the `municipality` variable in `targets_subset` have the same format, i.e., the two digits at the start of any entry in `brick` must be removed.

We can now create the hospital and HCP counts for the BC and Melanoma regions using the function `aggregate_by_regions()`.

In [51]:
hospital_counts = aggregate_by_regions(mapping, targets_subset, 'address', 'hospitals')
hospital_counts

Unnamed: 0,territory,hospitals
0,Blekinge,1.0
1,Dalarna,0.0
2,Gävleborg-Gävle,0.0
3,Halland-Halmstad,1.0
4,Halland-Varberg-Falkenberg,0.0
5,Jämtland,1.0
6,Jönköping-Jönköping,1.0
7,Jönköping-Nässjö-Eksjö,0.0
8,Jönköping-Värnamo,0.0
9,Kalmar,1.0


In [53]:
hcp_counts = aggregate_by_regions(mapping, targets_subset, 'account_name', 'hcps')
hcp_counts

Unnamed: 0,territory,hcps
0,Blekinge,5.0
1,Dalarna,0.0
2,Gävleborg-Gävle,0.0
3,Halland-Halmstad,1.0
4,Halland-Varberg-Falkenberg,0.0
5,Jämtland,2.0
6,Jönköping-Jönköping,6.0
7,Jönköping-Nässjö-Eksjö,0.0
8,Jönköping-Värnamo,0.0
9,Kalmar,7.0


In [68]:
# Save the new dataset

if not os.path.exists(route0):
    os.mkdir(route0)
    
print("saving file corresponding to hospital_counts.pkl")
hospital_counts.to_pickle(f"{route0}/hospital_counts.pkl")
pd.read_pickle(f"{route0}/hospital_counts.pkl")

saving file corresponding to hospital_counts.pkl


Unnamed: 0,territory,hospitals
0,Blekinge,1.0
1,Dalarna,0.0
2,Gävleborg-Gävle,0.0
3,Halland-Halmstad,1.0
4,Halland-Varberg-Falkenberg,0.0
5,Jämtland,1.0
6,Jönköping-Jönköping,1.0
7,Jönköping-Nässjö-Eksjö,0.0
8,Jönköping-Värnamo,0.0
9,Kalmar,1.0


In [54]:
# Save the new dataset
    
if not os.path.exists(route0):
    os.mkdir(route0)

print("saving file corresponding to hcp_counts.pkl")
hcp_counts.to_pickle(f"{route0}/hcp_counts.pkl")
pd.read_pickle(f"{route0}/hcp_counts.pkl")

saving file corresponding to hcp_counts.pkl


Unnamed: 0,territory,hcps
0,Blekinge,5.0
1,Dalarna,0.0
2,Gävleborg-Gävle,0.0
3,Halland-Halmstad,1.0
4,Halland-Varberg-Falkenberg,0.0
5,Jämtland,2.0
6,Jönköping-Jönköping,6.0
7,Jönköping-Nässjö-Eksjö,0.0
8,Jönköping-Värnamo,0.0
9,Kalmar,7.0
