# Doctors' locations

- Each location will be assigned to the number of doctors in this location, thus it will be represented with a histogram.

In [50]:
import json
import collections
import datetime
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from scipy.stats import chi2_contingency, fisher_exact

# Define the path to your JSON file
customers_file_path = 'dataset/customers.json'
companies_file_path = 'dataset/companies.json'
visits_file_path = 'dataset/visits.json'


company_id = '5b98d0eafb6fc01dae4341c5'

# Open the file and load the contents as a Python object
with open(visits_file_path) as f:
    visits = json.load(f)

with open(companies_file_path) as f:
    companies = json.load(f)

with open(customers_file_path) as f:
    customers = json.load(f)

# Now you can access the data just like any other Python object


In [51]:
# ========== ----- ========== Service Functions ========== ----- ========== #

def getUserName(id):
    for company in companies:
        if (company['_id']['$oid']) == company_id:
            for user in company['users']:
                if user['_id']['$oid'] == id:
                    return user['name']

    return 'Not Found'


def getCustomerTargetFrequency(ids):

    target_frequencies = []
    for customer in customers:
        if customer['_id']['$oid'] in ids:
            if 'targeted_frequency' in customer and customer['targeted_frequency'] != None:
                if int(customer['targeted_frequency']) > 0:
                    target_frequencies.append(
                        int(customer['targeted_frequency']))
                else:
                    target_frequencies.append(1)
            else:
                target_frequencies.append(1)
    return target_frequencies


def getUserVisits(reps_ids, start_date, end_date):

    reps_visits = {}

    for visit in visits:
        if visit['user']['id']['$oid'] in reps_ids:

            createdAt = datetime.datetime(
                int(visit['createdAt']['$date'][0:4]), int(visit['createdAt']['$date'][5:7]), int(visit['createdAt']['$date'][8:10]))

        if createdAt > start_date and createdAt < end_date:
            if visit['user']['id']['$oid'] in reps_visits:
                reps_visits[visit['user']['id']['$oid']] += 1
            else:
                reps_visits[visit['user']['id']['$oid']] = 1

    return reps_visits

# ========== ----- ========== End ========== ----- ========== #


In [52]:
# ========== ----- ========== Get Sales Rep ========== ----- ========== #

sales_reps = [] # list of sales reps
supervisors = {}  # map between user_id & supervisor_id

for company in companies:
    if (company['_id']['$oid']) == company_id:
        for user in company['users']:
            if ('supervisor_id' in user and user['supervisor_id'] != None and 'isSalesRep' in user and 'isActive' in user and 'country' in user):
                if (user['isSalesRep'] == True and user['isActive'] == True and user['country'] == "EGYPT"):
                    sales_reps.append(user)
                    if user['_id']['$oid'] not in supervisors:
                        supervisors[user['_id']['$oid']] = {
                            'id': user['supervisor_id']['$oid']}

# ========== ----- ========== End ========== ----- ========== #


In [53]:
# ========== ----- ========== Get Sales Rep's Customers ========== ----- ========== #

users_ids = list(supervisors.keys())
reps_customers = {} # map between reps & customers

for company in companies:
    if (company['_id']['$oid']) == company_id:
        for customer in company['customers']:
            for user in customer['users']:
                if user['$oid'] in users_ids:
                    if user['$oid'] in reps_customers:
                        reps_customers[user['$oid']].append(
                            customer['customer_id']['$oid'])
                    else:
                        reps_customers[user['$oid']] = [
                            customer['customer_id']['$oid']]


# ========== ----- ========== End ========== ----- ========== #


In [88]:
# ========== ----- ========== Create Dataset ========== ----- ========== #

start_dates = [datetime.datetime(2022, 11, 1), datetime.datetime(2022, 12, 1), datetime.datetime(
    2023, 1, 1), datetime.datetime(2023, 2, 1), datetime.datetime(2023, 3, 1), datetime.datetime(2023, 4, 1)]

end_dates = [datetime.datetime(2022, 12, 1), datetime.datetime(2023, 1, 1), datetime.datetime(
    2023, 2, 1), datetime.datetime(2023, 3, 1), datetime.datetime(2023, 4, 1), datetime.datetime(2023, 5, 1)]


reps_tragetFreq = {}
for key, value in zip(list(reps_customers.keys()), list(reps_customers.values())):
    reps_tragetFreq[key] = sum(getCustomerTargetFrequency(value))

dataset = []

for start_date, end_date in zip(start_dates, end_dates):

    reps_visits = getUserVisits(list(supervisors.keys()), start_date, end_date)
    for sales_rep in sales_reps:
        row = []
        row.append(sales_rep['name'])
        row.append(getUserName(supervisors[sales_rep['_id']['$oid']]['id']))
        row.append(start_date)
        row.append(len(reps_customers[sales_rep['_id']['$oid']]))
        row.append(reps_tragetFreq[sales_rep['_id']['$oid']])

        if sales_rep['_id']['$oid'] in reps_visits:
            row.append(reps_visits[sales_rep['_id']['$oid']])
            temp_visits = reps_visits[sales_rep['_id']['$oid']]
        else:
            row.append(0)
            temp_visits = 0

        if (temp_visits /
            reps_tragetFreq[sales_rep['_id']['$oid']])*1 > .75:
                row.append(1)
        else:
                row.append(0)
                
        dataset.append(row)

# ========== ----- ========== End ========== ----- ========== #


In [96]:
# ========== ----- ========== Bayesian Model ========== ----- ========== #

train_data = pd.DataFrame(dataset, columns=["Rep", "Supervisor", "Month",
                                            "Customers", "Target_Frequency",  "Visits", "Coverage"])

train_data = train_data[train_data['Visits'] != 0]

from scipy.stats import chi2_contingency
# create a contingency table of the Coverage and Supervisor columns
contingency_table = pd.crosstab(
    train_data['Supervisor'], train_data['Coverage'])

# perform the chi-squared test of independence
stat, pval, dof, expected_values = chi2_contingency(contingency_table)

# print the results
print("Chi-Squared Statistic: ", stat)
print("p-value: ", pval)
print("Degrees of Freedom: ", dof)
print("Expected Values: ")
print(expected_values)

# A p-value is a measure of the evidence against the null hypothesis in a statistical test.
# In this case, it's the result from the chi-squared test of independence that was performed to assess whether the
#  Coverage and Supervisor columns are independent or not. The p-value is the probability of observing a test statistic
#  at least as extreme as the one computed from the sample data, assuming that the null hypothesis is true.
#  In general, if the p-value is less than the chosen significance level (often 0.05), then we reject the null hypothesis
#  and conclude that there is evidence for an association between the two variables. Conversely, if the p-value is greater
#  than the significance level, we fail to reject the null hypothesis and conclude that there is no evidence for an
#  association. In your case, the p-value is 0.0004629592186297742, which is much smaller than 0.05, suggesting strong
#  evidence against the null hypothesis of no association. Therefore, you can reject the null hypothesis and conclude
#  that there is a significant association between the Coverage and Supervisor columns in your dataset.

# ========== ----- ========== End ========== ----- ========== #

Chi-Squared Statistic:  33.70972743202966
p-value:  4.58524965488053e-05
Degrees of Freedom:  8
Expected Values: 
[[ 7.88018433 10.11981567]
 [13.13364055 16.86635945]
 [ 7.88018433 10.11981567]
 [ 7.88018433 10.11981567]
 [10.50691244 13.49308756]
 [13.13364055 16.86635945]
 [12.69585253 16.30414747]
 [10.50691244 13.49308756]
 [11.38248848 14.61751152]]


In [99]:
# ========== ----- ========== Supervisor Performance ========== ----- ========== #

supervisor_performance = {}

for data in dataset:
    if data[1] in supervisor_performance:
        supervisor_performance[data[1]].append(data[6])
    else:
        supervisor_performance[data[1]] = [data[6]]

for name in list(supervisor_performance.keys()):
    print(name, " performance: ", (sum(
        supervisor_performance[name]) / len(supervisor_performance[name]))*100)

# ========== ----- ========== End ========== ----- ========== #


Eslam Nasr  performance:  58.333333333333336
Ahmed Abou ElFotoh  performance:  76.66666666666667
Nehad Elgenedy  performance:  46.666666666666664
Hazem Zidan  performance:  63.33333333333333
Ali Morshed  performance:  27.77777777777778
Ahmed Adel  performance:  77.77777777777779
Abdelrahman  Mohamed  performance:  55.55555555555556
Yousab Andrawis  performance:  16.666666666666664
Tamer Mahmoud  performance:  75.0
