# Preparation of Calls

### Calls data BC & Melanoma 2020-2021.xlsx
This notebook prepares the calls data from the sheet `report1631874037203` in `Calls data BC & Melanoma 2020-2021.xlsx`.

In [1]:
# Load required packages
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os

## Load data

In [2]:
# Read in data frame
calls = pd.read_excel("../../0_raw_data/novartis_data/Calls data BC & Melanoma 2020-2021.xlsx", 'report1631874037203')

# Look at entire data frame
calls

Unnamed: 0,Call: Call Name,Account: Account Record Type,Is Parent Call?,Date,Datetime,Call: Last Modified Date,Call: Last Modified By,Owner Name,Account: Name,Address,...,Call Subtype,Attestation,Call: Owner Role,Duration (in minutes),Account: Account ID,Account: Salesforce ID,Status,Account: Country,Indication,Account: External ID
0,C004204162,HCO,1.0,2020-01-16,2020-01-16 21:17:00,2020-07-01,Data Migration,Jonas Ljunggren,Onkologikliniken,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",...,Group Call up to 5 persons,Face to Face,ONC-OLD,30.0,0012o00002iEAh0,0012o00002iEAh0AAG,Submitted,SE,Melanoma,WSEH00103558
1,C004360771,HCP,0.0,2020-01-16,2020-01-16 21:17:00,2020-06-30,Data Migration,Jonas Ljunggren,David Sterner,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",...,Group Call up to 5 persons,Face to Face,ONC-OLD,30.0,0012o00002kTsAl,0012o00002kTsAlAAK,Submitted,SE,Melanoma,WSEM00024846
2,C004360772,HCP,0.0,2020-01-16,2020-01-16 21:17:00,2020-06-30,Data Migration,Jonas Ljunggren,Michael Sihver,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",...,Group Call up to 5 persons,Face to Face,ONC-OLD,30.0,0012o00002iEVSM,0012o00002iEVSMAA4,Submitted,SE,Melanoma,WSEM00088101
3,C004360773,HCP,0.0,2020-01-16,2020-01-16 21:17:00,2020-06-30,Data Migration,Jonas Ljunggren,Lars Sandberg,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",...,Group Call up to 5 persons,Face to Face,ONC-OLD,30.0,0012o00002iEVS2,0012o00002iEVS2AAO,Submitted,SE,Melanoma,WSEM00086131
4,C004360774,HCP,0.0,2020-01-16,2020-01-16 21:17:00,2020-06-30,Data Migration,Jonas Ljunggren,Cecilia Nilsson,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",...,Group Call up to 5 persons,Face to Face,ONC-OLD,30.0,0012o00002iEZpE,0012o00002iEZpEAAW,Submitted,SE,Melanoma,WSEM00068109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
625,Oncology Call Report,,,NaT,NaT,NaT,,,,,...,,,,,,,,,,
626,"Copyright (c) 2000-2021 salesforce.com, inc. A...",,,NaT,NaT,NaT,,,,,...,,,,,,,,,,
627,Confidential Information - Do Not Distribute,,,NaT,NaT,NaT,,,,,...,,,,,,,,,,
628,Generated By: Aswathi Padman 2021-09-17 12:20,,,NaT,NaT,NaT,,,,,...,,,,,,,,,,


## Preparatory steps

In [3]:
# Remove all rows with index > 623
calls = calls.loc[0:623, :] 

In [4]:
# Rename all columns
calls = calls.rename(columns = {"Call: Call Name": "call_name", "Account: Account Record Type": "account_record_type",
                               "Is Parent Call?": "parent_call", "Date": "date", "Datetime": "datetime", 
                               "Call: Last Modified Date": "last_modified_date", "Call: Last Modified By": "last_modified_by", 
                               "Owner Name": "owner_name", "Account: Name": "account_name", "Address": "address", 
                               "Address Line 1": "address_line_1", "Attendees": "attendees", "Attendee Type": "attendee_type", 
                               "Call Focus": "call_focus", "Call Method": "call_method", "Call Subtype": "call_subtype", 
                               "Attestation": "attestation", "Call: Owner Role": "owner_role", "Duration (in minutes)": 
                               "duration_in_minutes", "Account: Account ID": "account_id", "Account: Salesforce ID": 
                               "salesforce_id", "Status": "status", "Account: Country": "country", "Indication": "indication", 
                               "Account: External ID": "external_id"})

In [5]:
# Drop irrelevant columns
calls.drop(["last_modified_date", "last_modified_by", "owner_name", "call_subtype", "attestation", "owner_role", 
            "salesforce_id", "status", "country", "external_id"], axis = 1, inplace = True)

In [6]:
# Cast to appropriate data type
calls["call_name"] = calls["call_name"].astype('str')
calls["account_record_type"] = calls["account_record_type"].astype('category')
calls["parent_call"] = calls["parent_call"].astype('category')
calls["date"] = pd.to_datetime(calls["date"], format = '%Y.%m.%d')
calls["datetime"] = pd.to_datetime(calls["datetime"], format = '%Y.%m.%d %H:%M')
calls["account_name"] = calls["account_name"].astype('str')
calls["address"] = calls["address"].astype('str')
calls["address_line_1"] = calls["address_line_1"].astype('str')
calls["attendees"] = calls["attendees"].astype('float') # float necessary since int does not work due to NAs
calls["attendee_type"] = calls["attendee_type"].astype('category')
calls["call_focus"] = calls["call_focus"].astype('category')
calls["call_method"] = calls["call_method"].astype('category')
calls["duration_in_minutes"] = calls["duration_in_minutes"].astype('float') # float necessary since int does not work due to NAs
calls["account_id"] = calls["account_id"].astype('str')
calls["indication"] = calls["indication"].astype('category')

In [7]:
calls

Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
0,C004204162,HCO,1.0,2020-01-16,2020-01-16 21:17:00,Onkologikliniken,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,11.0,Group Account,Promotional discussion,Face to Face,30.0,0012o00002iEAh0,Melanoma
1,C004360771,HCP,0.0,2020-01-16,2020-01-16 21:17:00,David Sterner,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002kTsAl,Melanoma
2,C004360772,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Michael Sihver,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEVSM,Melanoma
3,C004360773,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Lars Sandberg,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEVS2,Melanoma
4,C004360774,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Cecilia Nilsson,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEZpE,Melanoma
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
619,C010427095,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Sandra Sjöstrand,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEdyx,Melanoma
620,C010427096,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Pia Törnblom,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iDrMc,Melanoma
621,C010427097,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Karolina Vernmark,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEQjh,Melanoma
622,C010427098,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Maria Östlund,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEdj0,Melanoma


In [8]:
# Save the prepared data frame
route0 = "../processed_data"

if not os.path.exists(route0):
    os.mkdir(route0)
    
print("saving file corresponding to calls.pkl")
calls.to_pickle(f"{route0}/calls.pkl")
pd.read_pickle(f"{route0}/calls.pkl")

saving file corresponding to calls.pkl


Unnamed: 0,call_name,account_record_type,parent_call,date,datetime,account_name,address,address_line_1,attendees,attendee_type,call_focus,call_method,duration_in_minutes,account_id,indication
0,C004204162,HCO,1.0,2020-01-16,2020-01-16 21:17:00,Onkologikliniken,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,11.0,Group Account,Promotional discussion,Face to Face,30.0,0012o00002iEAh0,Melanoma
1,C004360771,HCP,0.0,2020-01-16,2020-01-16 21:17:00,David Sterner,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002kTsAl,Melanoma
2,C004360772,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Michael Sihver,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEVSM,Melanoma
3,C004360773,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Lars Sandberg,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEVS2,Melanoma
4,C004360774,HCP,0.0,2020-01-16,2020-01-16 21:17:00,Cecilia Nilsson,"Onkologikliniken, Centrallasarettet 1, VÄSTERÅ...",Centrallasarettet 1,0.0,Person Account,Promotional discussion,Face to Face,30.0,0012o00002iEZpE,Melanoma
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
619,C010427095,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Sandra Sjöstrand,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEdyx,Melanoma
620,C010427096,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Pia Törnblom,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iDrMc,Melanoma
621,C010427097,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Karolina Vernmark,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEQjh,Melanoma
622,C010427098,HCP,0.0,2021-09-15,2021-09-15 12:00:00,Maria Östlund,"Garnisonsvägen 10, LINKÖPING, Östergötlands lä...",Garnisonsvägen 10,0.0,Person Account,Promotional discussion,Virtual MS Teams,45.0,0012o00002iEdj0,Melanoma
