##Description
This script processes two CSV files, one containing information on new dental offices and one containing information on referrals made by those offices. It calculates the number of new offices and referrals made by territory and by rep, and outputs the results to Excel files.

##How to Use
Ensure that the two input CSV files are in the same directory as the script.
Install the required Python packages: pandas and openpyxl.
Run the script.
##Output
The script outputs the following Excel files to the same directory as the script:

"New Offices and Referrals by Territory.xlsx": Contains the number of new offices and referrals made by territory.
"New Offices Details.xlsx": Contains a detailed list of all new offices, including their dentist ID, practice name, number of referrals, and territory.
"New Offices and Referrals by Rep.xlsx": Contains the number of new offices and referrals made by rep.

In [None]:
pip install xlsxwriter

In [19]:
import pandas as pd
from datetime import date

# Read in the old offices file
old_offices = pd.read_csv('oldnewoffices.csv')

# Read in the recent report file
recent_report = pd.read_csv('ReferralWithFinancialReport (15).csv')

# Rename columns for consistency
recent_report.rename(columns={'textBox2':'#ref', 'textBox46':'Territory', 'textBox45':'AccountType', 'textBox461':'DentistID', 'sourceDataTextBox': 'PracticeName',
                              'textBox46': 'Territory', 'textBox462': 'FullAddress'}, inplace=True)
old_offices.rename(columns={'textBox2':'#ref', 'textBox46':'Territory', 'textBox45':'AccountType', 'textBox461':'DentistID', 'sourceDataTextBox': 'PracticeName',
                              'textBox46': 'Territory', 'textBox462': 'FullAddress'}, inplace=True)

# Split the address into its components
def split_address(df):
    df['Address'] = df['FullAddress'].apply(lambda x: x.split(',')[0] if isinstance(x, str) else '')
    df['City'] = df['FullAddress'].apply(lambda x: x.split(',')[1] if isinstance(x, str) and len(x.split(',')) > 1 else '')
    df['State'] = df['FullAddress'].apply(lambda x: x.split(',')[2] if isinstance(x, str) and len(x.split(',')) > 2 else '')
    df['Zip'] = df['FullAddress'].apply(lambda x: x.split(',')[3] if isinstance(x, str) and len(x.split(',')) > 3 else '')

split_address(recent_report)

# Extract date components from effectiveDateDataTextBox1 column
recent_report['Year'] = pd.to_datetime(recent_report['effectiveDateDataTextBox1']).dt.year
recent_report['Day'] = pd.to_datetime(recent_report['effectiveDateDataTextBox1']).dt.day
recent_report['Month'] = pd.to_datetime(recent_report['effectiveDateDataTextBox1']).dt.month


In [31]:
# Extract relevant columns and drop duplicates
recent_new_offices = recent_report.loc[recent_report['AccountType']=='Rep Account', ['DentistID', 'Territory', 'PracticeName', 'Address', 'City', 'State', 'Zip', '#ref']].drop_duplicates()
old_new_offices = old_offices.loc[:, ['DentistID', 'Territory', '#ref', 'PracticeName']].drop_duplicates()

# Get total number of referrals by territory
total_referrals_by_territory = recent_new_offices.groupby(['Territory'])['#ref'].sum().reset_index()

# Determine new offices since last report
new_offices_since_last_report = recent_new_offices[~recent_new_offices['DentistID'].isin(old_new_offices['DentistID'])]

# Get the referrals for the new offices since last report
referrals_for_new_offices = recent_report[recent_report['DentistID'].isin(new_offices_since_last_report['DentistID'])]

# Group by territory and count the number of new offices and referrals from new offices
new_offices_and_referrals = new_offices_since_last_report.groupby(['Territory']).agg({'#ref': 'sum', 'PracticeName': 'nunique'})

# Rename the columns
new_offices_and_referrals.rename(columns={'#ref': '#Referrals from New Offices', 'PracticeName': '# of New Offices'}, inplace=True)

# Print the new offices and referrals by territory
print("\nNew Offices and Referrals by Territory:")
print(new_offices_and_referrals)

# Create an empty list to store the new offices' details
new_offices_list = []

# Define the employee-territory dictionary
employee_territory = {'Kim': ['Illinois', 'Michigan', 'Ohio', 'Indiana', 'Kentucky', 'Wisconsin', 'Minnesota', 'Iowa', 'Missouri'],
                      'Jennifer': ['Pennsylvania - Philly'],'Jeana':['New Jersey'],'Gina',['Gina Bogardus','Illinois','Florida']}

# Iterate through the new offices since last report
new_offices_by_rep = {}
for idx, row in new_offices_since_last_report.iterrows():
    # Get the dentist ID for the current new office
    dentist_id = row['DentistID']
    # Get the practice name for the current new office
    practice_name = row['PracticeName']
    # Get the territory for the current new office
    territory = row['Territory']
    # Count the number of referrals for the current new office
    num_referrals = recent_new_offices[recent_new_offices['DentistID'] == dentist_id]['#ref'].sum()

    # Iterate through the employees in the dictionary
    for employee, territories in employee_territory.items():
        # If the territory is in the employee's list of territories
        if territory in territories:
            # If the employee is not in the new_offices_by_rep dictionary yet, add them with empty lists
            if employee not in new_offices_by_rep:
                new_offices_by_rep[employee] = {'New Offices': [], 'Referrals': []}
            # Add the new office and referral information to the employee's list in the dictionary
            new_offices_by_rep[employee]['New Offices'].append((dentist_id, practice_name, territory))
            new_offices_by_rep[employee]['Referrals'].append(num_referrals)
            # Break out of the loop since we've already found the matching employee
            break

# Create a dictionary to store the total number of new offices and referrals by rep
new_offices_and_referrals_by_rep = {}

# Iterate through the new_offices_by_rep dictionary and calculate the totals for each employee
for employee, offices_and_referrals in new_offices_by_rep.items():
    num_new_offices = len(offices_and_referrals['New Offices'])
    num_referrals = sum(offices_and_referrals['Referrals'])
    new_offices_and_referrals_by_rep[employee] = {'New Offices': num_new_offices, 'Referrals': num_referrals}

# Convert the new_offices_and_referrals_by_rep dictionary to a dataframe and print it
new_offices_and_referrals_by_rep_df = pd.DataFrame(new_offices_and_referrals_by_rep).T.reset_index()
new_offices_and_referrals_by_rep_df.rename(columns={'index': 'Rep'}, inplace=True)
new_offices_and_referrals_by_rep_df = new_offices_and_referrals_by_rep_df[['Rep', 'New Offices', 'Referrals']]
print('\nNew Offices and Referrals by Rep:')
print(new_offices_and_referrals_by_rep_df.to_string(index=False))



New Offices and Referrals by Territory:
            #Referrals from New Offices  # of New Offices
Territory                                                
New Jersey                            1                 1

New Offices and Referrals by Rep:
  Rep  New Offices  Referrals
Jeana            1          1


In [33]:
new_offices_since_last_report

Unnamed: 0,DentistID,Territory,PracticeName,Address,City,State,Zip,#ref
242,103169,New Jersey,DENTAL HEALTH ASSOCIATES EDISON,1907 OAK TREE ROAD SUITE # 204,EDISON,NJ,8820,1


In [37]:
# Create a Pandas Excel writer using XlsxWriter engine
writer = pd.ExcelWriter('output.xlsx', engine='xlsxwriter')

# Export new offices and referrals by territory to Excel
new_offices_and_referrals.to_excel(writer, sheet_name='Offices by Territory', index=True)

# Export list of individual offices to the second sheet
new_offices_since_last_report.to_excel(writer, sheet_name='List of Individual Offices', index=False)

# Group by rep and aggregate the new offices and new referrals by territory
new_offices_and_referrals_by_rep = new_offices_since_last_report.groupby(['PracticeName', 'Territory']).agg({'#ref': 'sum', 'DentistID': 'nunique'}).reset_index()
new_offices_and_referrals_by_rep.rename(columns={'#ref': '#Referrals from New Offices', 'PracticeName': '# of New Offices'}, inplace=True)

# Export new offices and referrals by rep to the third sheet
new_offices_and_referrals_by_rep.to_excel(writer, sheet_name='New Offices &Referrals by Rep', index=False)

# Save the Excel file
writer.save()
