## Advising Foot Traffic Data

This algo works through all the foot traffic data at each of the locations. From this data, we can clearly see how many students are calling, emailing, or walking into the office throughout the year. 

In [None]:
import pandas as pd
import numpy as np
import re
import sklearn

In [None]:
######################################################################
#### BEFORE YOU DOWNLOAD THE CSV'S, YOU NEED TO CONVERT ALL OF THE ###
#### 'DATE' COLUMNS TO MM/DD/YY. THIS WILL ALLOW THE TO_DATETIME() ###
#### METHOD TO BE PROPERLY APPLIED ###################################
######################################################################

#Import df

boa = pd.read_csv("BOA Foot Traffic.csv", encoding = 'windows-1254')
boe = pd.read_csv("BOE Foot Traffic.csv", encoding = 'windows-1254')
bsc = pd.read_csv("BSC Foot Traffic.csv", encoding = 'windows-1254')
bom = pd.read_csv("BOM Foot Traffic.csv")

In [None]:
def modify_df(df, location):

    #Modify headers 
    cols = [i.upper() for i in list(df.columns)]
    cols = [i.replace('\n', ' ') for i in cols]
    df.columns = cols

    #Rename 'TIME IN' column
    df = df.rename(columns = {'TIME IN' : 'TIME OF CONTACT'})

    #Create 'LOCATION' and 'TIME RANGE' columns
    df['LOCATION'] = location
    df['TIME RANGE'] = ''

    #Convert # from float to int
    df['#'] = df['#'].astype(int)

    #If df does not have 'ATHLETE' already present, insert empty 
    #'ATHLETE' column
    if 'ATHLETE' not in list(df.columns):
        df['ATHLETE'] = ''
    else:
        df

    #Reorganize columns
    cols = ['DATE', '#', 'NAME', 'TIME OF CONTACT', 'TIME RANGE', 'LOCATION',
            'APPT', 'DISTANCE', 'CURRENT STUDENT', 'NEW STUDENT', 'RETURNING STUDENT', 
            'HIGH SCHOOL', 'WORKFORCE', 'VETERAN', 'ENROLL','ADD/DROP', 'QUESTIONS', 
            'MAJOR CHANGE', 'DEGREE CHECK', 'SUSPENSION', 'ATHLETE', 'ADVISOR SIGN', 'ADV TIME']
    df = df[cols]

    #Sort through all rows and eliminate 'NaN'. More of an aesthetic thing for me.

    for i in list(df.columns[6:21]):
        temp = df[i]
        ls = []
        for j in list(temp):
            if j in ['x', 'X', 'Phone', 'In Person', 'Zoom', 'Email', 'Central Adv']:
                ls.append(j)
            else:
                ls.append('')
        df[i] = ls

    #Return modified df
    return df

In [None]:
#Apply the program to each of the sign-in sheets. 
boa2 = modify_df(boa, "BOA")
boe2 = modify_df(boe, "BOE")
bsc2 = modify_df(bsc, "BSC")
bom2 = modify_df(bom, "BOM")

In [None]:
#Combine the sign in sheets into a single list.
fin_ls = [boa2, boe2, bsc2, bom2]

#Then use the concat() method to combine each of the sign-in sheets to a single dataframe.
fin_mashup = pd.concat(fin_ls).reset_index(drop = True)

#Convert 'DATE' column to datetime object. If you have errors thrown in reference to this
#code, it is because the dates from one of your sites are not formatted correctly. If you wish
#you can simply submit " errors = 'coerce' " after the fin_mashup['DATE'] in the to_datetime()
#method. However, doing so will simply convert the dates Pandas cannot read to 'NaT', which 
#is unhelpful. Better to go back in and fix the dates. 

fin_mashup['DATE'] = pd.to_datetime(fin_mashup['DATE'], errors = 'coerce')

#Create dictionary for month
month_dict = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6:'Jun', \
              7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}

#Comprehension for Month
mon = [month_dict.get(i) for i in list(fin_mashup['DATE'].dt.month)]

#Create dictionary for days of the week
day_of_week = {0: 'Mon', 1:'Tues', 2:'Wed', 3:'Thur', 4:'Fri', 5:'Sat', 6:'Sun'}

#Comprehension for Days
dow = [day_of_week.get(i) for i in list(fin_mashup['DATE'].dt.weekday)]

#Combine new columns with old fin_mashup df
fin_mashup['MONTH'], fin_mashup['DAY'] = mon, dow

#Export mashup to csv
mashup = fin_mashup.to_csv('mashup.csv', index = False)

In [None]:
#The program below creates a new dataframe, aggregated by whatever column you insert.
def create_dataframe(df, column):
    temp = pd.DataFrame(df.groupby(column)[column].count()) \
            .rename(columns = {column:'Count'})\
            .sort_values('Count', ascending = False).reset_index()
    temp['% Contribution'] = round(temp['Count'] / temp['Count'].sum() * 100, 2)
    return temp

**Number Seen by Each Advisor**

In [None]:
create_dataframe(fin_mashup, "ADVISOR SIGN")

**Distance Students Seen and in What Modality**

In [None]:
distance = create_dataframe(fin_mashup, "DISTANCE")

ls = []

for i in list(distance['DISTANCE']):
    if i not in ['Phone', 'Email', 'Central Adv']:
        ls.append('Appt or In-Person')
    else:
        ls.append(i)

distance['DISTANCE'] = ls

distance

**Appointment Type**

In [None]:
appt = create_dataframe(fin_mashup, 'APPT')

ap = []

for i in list(appt['APPT']):
    if i in ['Phone', 'Zoom', 'In Person']:
        ap.append(i)
    else:
        ap.append('Walk In')

appt['APPT'] = ap

appt

**Data by Site: How Many Students Assisted by Location**

In [None]:
create_dataframe(fin_mashup, "LOCATION")

mask1 = fin_mashup['LOCATION'].isin(['BSC', 'BOM'])

appt_types = fin_mashup[mask1].reset_index()

pd.DataFrame(appt_types.groupby('DISTANCE')['index'].count())\
  .reset_index()\
  .rename(columns = {'index':'Num Of Students'})

**Day of Week Students Arrive**

In [None]:
dow = create_dataframe(fin_mashup, 'DAY')
day_of_week = {'Mon':0 , 'Tues':1, 'Wed':2, 'Thur':3, 'Fri':4, 'Sat':5, 'Sun':6}
filt = [day_of_week.get(i) for i in dow['DAY']]
dow['FILTER'] = filt
dow = dow.sort_values('FILTER').drop('FILTER', axis = 1).reset_index(drop = True)


## The section below is the new code for the new dashboard setup

In [None]:
#Start setting up the dashboard with the new sign-in sheet setup. (1.26.22)

dash = pd.read_csv('Advising Foot Traffic Aug 25 2021 - Present.csv')

#Change the '#' column to be the range of 1 to the end of the df. 
dash['#'] = range(1,len(dash)+1)

#Edit out all of the NaNs in the df. Again, an ascthetic thing for me.
ls = []
for i in dash['LOCATION'].unique():
    temp = dash[dash['LOCATION'] == i]
    ls.append(modify_df(temp, i))

dash = pd.concat(ls)

#Begin to make the individual id's for each student's sign in.
#First we have to create an origin date.
dash['ORIGIN DATE'] = '1/1/1900'

#Second, we need to convert the 'DATE' and 'ORIGIN DATE' to a datetime object
dash['ORIGIN DATE'], dash['DATE'] = pd.to_datetime(dash['ORIGIN DATE']), pd.to_datetime(dash['DATE'])

#Now, to create the numerical day representation for the date as counted from 1/1/1900, 
#we subtract the current date from the origin date.
dash['NEW DATE'] = dash['DATE'] - dash['ORIGIN DATE']

#To complete our id, we create a comprehension adding the numerical day with the '#' column.
#This creates a unique id for every student who has walked into Butler Advising, either in person
#or via phone, email, or zoom.
ids = [str(dash['NEW DATE'][i].days) + str(dash['#'][i]) for i in range(len(dash['NEW DATE']))]

#Create ID column with the new ids and drop the 'NEW DATE' and 'ORIGIN DATE'
dash['ID'] = ids
dash = dash.drop(['NEW DATE', 'ORIGIN DATE'], axis = 1)

In [None]:
#First, convert 'Time of Contact' to datetime object. There are some debugging errors here.
#We need to make sure that all the times are writen as timeframe objects in the csv. A simple
#table creation in csv, filter, scroll to the bottom and any unique methods of entering times
#will be revealed. Those all need to be identified and fixed. Where there are empty rows, 
#typically this is where an advisor was entering a group enrollment and just didn't want to 
#enter a bunch of times. In such cases, I copy the time closest to it and run with that.
#usually somewhere in the neighborhood of 40 students. 

dash['TIME OF CONTACT'] = pd.to_datetime(dash['TIME OF CONTACT'])

dash = dash[dash['TIME OF CONTACT'].isnull() == False].reset_index(drop = True)

In [None]:
#Insert 'TIME RANGE' column

times = []
for i in dash['TIME OF CONTACT']:
    times.append(str(i.hour) + ':00-' + str(i.hour) + ':59')
    
#Add the times object as the column of 'TIME RANGE'. Now I have the complete dataset that I can
#start altering into a usable dataset for a dashboard.

dash['TIME RANGE'] = times

## Below are the final steps to convert the data to be ready to convert to dashboards

In [None]:
#Set up the data to convert to a dashboard

def dashboard_setup(data, columns, col_name):
    #Sort through the columns, identifying the index and replacing
    #the 'x'|'X' with column name. This for loop loops through each
    #individual column, then through each individual row of each 
    #column. The enumerate() creates a Series with each column that 
    #has an index and then the value. We convert the dict created with
    #each column to a data frame and then stack those data frames into
    #a list. 
    ls = []
    for i in columns:
        temp = data[i]
        ls2 = {}
        for j, k in enumerate(temp):
            if k == 'x' or k == 'X':
                ls2[j] = i
        df = pd.DataFrame.from_dict(ls2, orient = 'index')
        ls.append(df)
        
    #Now we use the pd.concat() to convert the list to a data frame. Drop
    #duplicates because some rows have multiple 'X's in them, which creates
    #two entries at the same location.
    df2 = pd.concat(ls).reset_index().rename(columns = {0: col_name})\
                       .sort_values('index')
    df2 = df2.drop_duplicates('index')
    
    #Now we need to identify and store all of the indeces that do not 
    #appear in the data frame created with df2. 
    missing = {}
    for i in list(data.index):
        if i not in list(df2['index']):
            missing[i] = ''
    
    #Convert the missing indeces to a dataframe that has the same
    #columns as df2.
    missing = pd.DataFrame.from_dict(missing, orient = 'index')\
                .rename(columns = {0: col_name}).reset_index()
    
    #Append the two data frames together, sort the values from 
    #lowest to highest by the 'index' column (not the .index())
    #then drop the 'index' column and reset_index().
    final = df2.append(missing).sort_values('index')\
               .drop('index', axis = 1).reset_index(drop = True)
    return final

In [None]:
#Setup 'Type of Student' column
stype = dashboard_setup(dash, ['CURRENT STUDENT', 'NEW STUDENT',
                               'RETURNING STUDENT', 
                               'WORKFORCE', 'VETERAN', 'ATHLETE'], 'Type of Student')

In [None]:
#Setup 'Reason for Visit' column
reason = dashboard_setup(dash, ['ENROLL', 'ADD/DROP', 'QUESTIONS', 
                                'MAJOR CHANGE', 'DEGREE CHECK', 
                                'SUSPENSION'], 'Reason for Visit')

In [None]:
#Add new columns to the dataframe
dash['TYPE OF STUDENT'], dash['REASON FOR VISIT'] = stype, reason

In [None]:
#Alter HIGH SCHOOL column x's to 'HIGH SCHOOL' because the program we 
#wrote above was creating duplicates. This is because the student workers,
#advising staff, and front office manager vary on when they mark a student 
#only as high school and mark a student as "current" or "new" *and* 
#high school. This was causing the original program to create duplicate 
#indeces. So in order to track both 'current' and 'high school' attributes,
#I created this column.

dash['MOD HIGH SCHOOL'] =  ['HIGH SCHOOL' if i == 'x' or i == 'X' else '' for i in dash['HIGH SCHOOL']]


In [None]:
#Just as we did above for the mashup, we need to create columns for month
#and day. Below is that process.

month_dict = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6:'Jun', \
              7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}

#Comprehension for Month
mon = [month_dict.get(i) for i in list(dash['DATE'].dt.month)]

#Create dictionary for days of the week
day_of_week = {0: 'Mon', 1:'Tues', 2:'Wed', 3:'Thur', 4:'Fri', 5:'Sat', 6:'Sun'}

#Comprehension for Days
dow = [day_of_week.get(i) for i in list(dash['DATE'].dt.weekday)]

#Combine new columns with old dash df
dash['MONTH'], dash['DAY'] = mon, dow

In [None]:
%%capture --no-display 

final_dash = dash[[ 'ID', 'DATE', 'NAME', 'TIME RANGE', 'MONTH', 'DAY', 'LOCATION',
                    'APPT', 'DISTANCE', 'TYPE OF STUDENT', 'REASON FOR VISIT', 
                    'MOD HIGH SCHOOL', 'ADVISOR SIGN', 'ADV TIME']]

#The final setup was to create a column that helps us filter advisors by 
#the employment type (i.e. 'EMP TYPE')
sign = {'KL':'28hr', 'KZ':'28hr', 'CJ':'40hr', 'SLB':'28hr','SV':'28hr',
         'BG':'40hr', 'AKP':'28hr', 'JW':'40hr', 'AS':'40hr', 'SP':'40hr',
         'SB':'20hr', 'DS':'Boss', 'AP':'40hr', 'SS': 'Not Sure', 'MZ':'Adm',
         'JD':'Adm', 'MP':'Adm', 'BM':'40hr', 'CS':'Not Sure', 'RM':'40hr',
         'SH':'40hr', 'KLA':'28hr', 'GR':'Adj', 'DR':'40hr', 'TB':'28hr', 
         'JEC':'28hr', 'KB':'28hr', 'KA':'28hr', '':'N/A'}

#Since there are times the cell is left blank, pandas records that as a float
#variable, which throws and error when doing a comprehension. Therefore, the 
#code below deals with that problem by filling all of the NaNs with a blank
#string.
final_dash['ADVISOR SIGN'] = final_dash['ADVISOR SIGN'].fillna('')

s = [i.upper() for i in final_dash['ADVISOR SIGN']]

final_dash['ADVISOR SIGN'] = s

#Link dictionary we created to the advisor signatures and create new 
#columns with this new attribute
s2 = [sign.get(i) for i in final_dash['ADVISOR SIGN']]

final_dash['Emp Type'] = s2

In [None]:
final_dash


In [None]:
#Export dashbaord to csv
dashboard = final_dash.to_csv('Dashboard Setup.csv', index = False)

## Below is the code for the old dashboard setup

In [None]:
#Students per month

month = create_dataframe(df, 'Month')
num = [4, 8, 12, 2, 1, 7, 6, 3, 5, 11, 10, 9]
month['num'] = num
month = month.sort_values('num', axis = 0).reset_index(drop = True)[['Month', 'Count', '% Contribution']]

#Students seen per week over the last year, sorted from WK 1 to WK 4b. New year is indicated with the week 
#number followed by the letter 'b'.

wks = pd.DataFrame(df.groupby('Week of Enrollment')['Week of Enrollment'].count()).rename(columns = {'Week of Enrollment':'Num Per Week'}).reset_index()
wks['Week Num'] = wks['Week of Enrollment'].str.split(" ").str[1]
a = [0, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 52, 1, 19, 20, 21, 22, 23, 24, 25, \
     26, 27, 28, 53, 2, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 54, 3, 39, 40, 41, 42, \
     43, 44, 45, 46, 47, 48, 55, 4, 49, 50, 51, 5, 6, 7, 8]
wks['num'] = a
wks = wks.sort_values('num', axis = 0).reset_index(drop = True)
wks = wks[['Week of Enrollment', 'Num Per Week']]

#Day of Week 

dofw = create_dataframe(df, 'Day of WK')
num = [5, 1, 4, 6, 7, 2, 3]
dofw['num'] = num
dofw = dofw.sort_values('num', axis = 0).reset_index(drop = True)[['Day of WK', 'Count', '% Contribution']]

#Students Seen by Time of Day

tm = create_dataframe(df, 'Time Range')
num = [3, 4, 5, 6, 7, 8, 9, 10, 11, 1, 2]
tm['num'] = num
tm = tm.sort_values('num', axis = 0).reset_index(drop = True)
tm = tm[['Time Range', 'Count', '% Contribution']]

#Student assisted by method of contact.

meth_of_cont = create_dataframe(df, 'Contact Method')

#Reason for visit

reason = create_dataframe(df, 'Reason for Visit')

In [None]:
#This function requires that the first column is the "Location," then it is set up to create a pivot 
#table that has multiple other potential columns under "columns" parameter. Or maybe another way to 
#look at it is that columns[0] is set to be the index of the pivot table, no matter how you look at 
#it.

def loc_and_col(df, columns):
    assert isinstance(columns, list)
    newdf = pd.DataFrame(df.groupby(columns).size()) \
                        .rename(columns = {0: 'Num of Students'}).reset_index()
    newdf = newdf.pivot_table(values = 'Num of Students', index = columns[0], columns = columns[1:]).fillna('')
    return newdf

by_month = loc_and_col(df, columns = ['Location', 'Month'])

by_reason = loc_and_col(df, columns = ['Location', 'Reason for Visit'])

## Dashboard Setup

The code below is for setting up the data for the dashboard. It has been cleaned, organized, and programmed so that a usable dataframe is put out.

In [None]:
bsc2 = setup_bsc(bsc)
boe2 = setup_boe(boe)
boa2 = setup_boa(boa)
bom2 = setup_bom(bom)
dist2 = setup_dist(dist)

ls_fin = [boa2, boe2, bsc2, bom2, dist2]

In [None]:
#First, one of the things you need to make sure of is that all of the dates are 
#formatted into the '%m/%d' format.  That way, the code below works. So this process
#happens after you run the code above, open up the 'mashup.csv' and format the date,
#and resave. Then run the code below.

rev_mash = pd.read_csv('mashup.csv')

#In order to create the ids I've been using for a few years now, in python, it is a
#little more convoluted (for me at least as I don't know a more streamlined way 
#to execute it yet). 

#First, we have to create a column with the origin date, and then convert it to a 
#datetime object. 

rev_mash['ORIGIN_DATE'] = '1/1/1900'
rev_mash['ORIGIN_DATE'] = pd.to_datetime(rev_mash['ORIGIN_DATE'])

#Then, create a list of the locations that we will use to cycle though
#for our for loop.

loc = list(rev_mash['LOCATION'].unique())

#This for loop first creates a temporary df filtered by the loction.
#Then, for each temporary dataframe, we have to convert the 'DATE' 
#column into a datetime object and notify pandas it is in the format 
#of '%m/%d'.
#Then we create a list of the 'DATE' column from the temporary df and 
#cycle through those dates using a for loop. This for loop with go through
#each date and change the default year, which is 1900, to 2021.  
#Finally, we insert the converted dates as the new 'DATE' column and save that
#to 'ls'. That will create five lists of dfs, each corresponding to one of the 
#locations we used to filter each temporary df. 

ls = []
for i in loc:  
    temp = rev_mash[rev_mash['LOCATION'] == i]
    temp['DATE'] = pd.to_datetime(temp['DATE'], format='%m/%d')
    dates = list(temp['DATE'])
    ls2 = []
    for j in dates:
        ls2.append(j.replace(year = 2021))
    temp['DATE'] = ls2
    ls.append(temp)

#With the list of dfs, we use the pd.concat() to draw them all back together.

newdf = pd.concat(ls)

#Now, we are set up to subtract the 2021 date in the 'DATE' column from the 
#1/1/1900 date in the 'ORIGIN_DATE' column.

newdf['NEW_DATE'] = newdf['DATE'] - newdf['ORIGIN_DATE']

#Now, as part of the ids I've created over the last few years, I needed to change
#the '#' to a list of continuous integers.

newdf['#'] = list(range(1, len(newdf)+1))

#Now for the final touch. We create the id. The unique id for every student
#who 'walked' into Butler either virtually or phyically.
#This is done by accessing the days attribute from the *timedelta* object
#created by the 'NEW_DATE' column, converting it to a string variable,
#and concatenating it with the number in the corresponding row from the '#'
#column. If you do not convert these to strings, it will just add the two 
#integers. 

ids = [str(newdf['NEW_DATE'][i].days) + str(newdf['#'][i]) for i in range(len(newdf['NEW_DATE']))]
newdf['ID'] = ids
newdf = newdf.drop(['ORIGIN_DATE', 'NEW_DATE'], axis = 1)

#Now, we create ranges for the 'TIME RANGE' column.

newdf['TIME OF CONTACT'] = pd.to_datetime(newdf['TIME OF CONTACT'])

newdf = newdf[newdf['TIME OF CONTACT'].isnull() == False].reset_index(drop = True)

#If for some reason I ever want to figure out how to take a datetime object and
#isolate the hour and minute and format it into a string '%h:%m', below is how I 
#figured out how to do it before realizing it was a waste of my time for 
#what I needed to do for this dataset.

#times = {}
#for i, j in enumerate(newdf['TIME OF CONTACT']):
#    times[i] = j.time()

#times2 = []
#for i in range(len(times)):
#    if list(times.values())[i].minute < 10:
#        times2.append(str(list(times.values())[i].hour) + ":" + '0' + str(list(times.values())[i].minute))
#    else:
#        times2.append(str(list(times.values())[i].hour) + ":" + str(list(times.values())[i].minute))

#Below is the way to create the time ranges. Very simple.

times = []
for i in newdf['TIME OF CONTACT']:
    if i.hour == 8:
        times.append('8:00-8:59')
    elif i.hour == 9:
        times.append('9:00-9:59')
    elif i.hour == 10:
        times.append('10:00-10:59')
    elif i.hour == 11:
        times.append('11:00-11:59')
    elif i.hour == 12:
        times.append('12:00-12:59')
    elif i.hour == 13:
        times.append('1:00-1:59')
    elif i.hour == 14:
        times.append('2:00-2:59')
    elif i.hour == 15:
        times.append('3:00-3:59')
    elif i.hour == 16:
        times.append('4:00-4:59')
    elif i.hour == 17:
        times.append('5:00-5:59')
    elif i.hour == 18:
        times.append('6:00-6:59')
    elif i.hour == 19:
        times.append('7:00-7:59')
    elif i.hour == 1:
        times.append('1:00-1:59')
    elif i.hour == 2:
        times.append('2:00-2:59')
    elif i.hour == 3:
        times.append('3:00-3:59')
    elif i.hour == 4:
        times.append('4:00-4:59')
    elif i.hour == 5:
        times.append('5:00-5:59')
    elif i.hour == 6:
        times.append('6:00-6:59')
    elif i.hour == 7:
        times.append('7:00-7:59')

#Add the times object as the column of 'TIME RANGE'. Now I have the complete dataset that I can
#start altering into a usable dataset for a dashboard.

newdf['TIME RANGE'] = times

In [None]:
newdf = newdf[['ID', 'DATE', '#', 'NAME', 'TIME OF CONTACT', 'TIME RANGE', 'LOCATION',
               'INTEROFFICE', 'INSTITUTIONAL', 'COMMITTEE', 'PHONE', 'EMAIL', 'TEXT',
               'VIRTUAL F2F', 'REN', 'GUEST STUDENT', 'ADVISING@BUTLERCC', 'AVISO',
               'OTHER', 'CONTINUING', 'NEW STUDENT', 'VETERAN', 'HIGH SCHOOL', 'SUSPENSION', 
               'WIA/TAA', 'ENROLLMENT', 'DEGREE CHECK', 'ADD/DROP', 'QUESTIONS', 
               'MAJOR CHANGE', 'FOLLOW UP', 'APPOINTMENT', 'ATHLETE',"INT'L", 
               'PAYMENT', 'BASE PASS', 'ADVISOR SIGN', 'ADV TIME']]

#Create function that will allow us to quickly go through each column
#and consolidate them into aggregated groups.

def dashboard_setup(df, columns, col_name):
    ls = []
    for j in columns:
        temp = df[j]
        ls2 = []
        for i in temp:
            if i == 'x' or i == 'X':
                ls2.append(i)
            else:
                ls2.append('')
        df2 = pd.DataFrame(pd.Series(ls2))
        df2 = pd.DataFrame(list(zip(list(df2[df2[0].isin(['x', 'X'])].index), \
                 [j] * len(df2))))
        ls.append(df2)

    dash_df = pd.concat(ls).sort_values(0).reset_index(drop = True)

    missing = [i for i in range(len(newdf)) if i not in list(dash_df[0])]
    
    miss_df = pd.DataFrame(missing)
    miss_df[1] = ''
    final = dash_df.append(miss_df).drop_duplicates(0) \
                   .sort_values(0).reset_index(drop = True)
    final[col_name] = final[1]
    final = final.drop([0, 1], axis = 1)
    return final

#Use function we just created to consolidate each of the columns into 
#single columns.

contact = dashboard_setup(newdf, ['PHONE', 'EMAIL', 'TEXT', 'VIRTUAL F2F'], 'CONTACT METHOD')
bu_contacts = dashboard_setup(newdf, ['INTEROFFICE', 'INSTITUTIONAL', 'COMMITTEE'], 'BUTLER CONTACTS')
source = dashboard_setup(newdf, ['REN', 'GUEST STUDENT', 'ADVISING@BUTLERCC', 'AVISO','OTHER'], 'SOURCE')
stype = dashboard_setup(newdf, ['CONTINUING', 'NEW STUDENT', 'VETERAN', 'HIGH SCHOOL', 'SUSPENSION', 'WIA/TAA', 'ATHLETE', "INT'L"], 'TYPE OF STUDENT')
reason = dashboard_setup(newdf, ['ENROLLMENT', 'DEGREE CHECK', 'ADD/DROP', 'QUESTIONS', 'MAJOR CHANGE', 'PAYMENT', 'BASE PASS'], 'REASON FOR VISIT')

#Join the some of the columns from the 'newdf' with a new df, 'dash_df'.

dash_df = newdf[['ID', 'DATE', 'NAME', 'TIME RANGE', 'LOCATION', 'ADVISOR SIGN']]
dash_df['CONTACT METHOD'], dash_df['BUTLER CONTACTS'], dash_df['SOURCE'], dash_df['TYPE OF STUDENT'], dash_df['REASON FOR VISIT'] = contact, bu_contacts, source, stype, reason

In [None]:
#Pull year and month from datetime object.

dash_df['YEAR'] = dash_df['DATE'].dt.year
dash_df['MONTH'] = dash_df['DATE'].dt.month_name().str.slice(stop = 3)

#Create dictionary for day of week and implement.

dofw = {0:'M', 1:'T', 2:'W', 3:'R', 4:'F', 5:'S', 6:'SU'}
dash_df['DAY OF WK'] = dash_df['DATE'].dt.dayofweek
days = [dofw.get(i) for i in dash_df['DAY OF WK']]
dash_df['DAY OF WK'] = days

#Craete week of enrollment column. I am considering changing this from
#what I've been doing for the last year or so to something that aligns with 
#industry standards.

dash_df['WEEK OF ENROLLMENT'] = dash_df['DATE'].dt.isocalendar().week

#Final column creation. Create a modified column from the 'Butler Contact'
#column so that we can easily filter by 'Butler Contact' or 'Student Contact.'

butler_con_mod = []

for i in dash_df['BUTLER CONTACTS']:
    if i == '':
        butler_con_mod.append('Student Contact')
    else:
        butler_con_mod.append('Butler Contact')

dash_df['BUTLER CONTACTS MOD'] = butler_con_mod

#Final dash_df sorted.

dash_df = dash_df[['ID', 'DATE', 'YEAR', 'MONTH', 'DAY OF WK', 
                   'WEEK OF ENROLLMENT', 'NAME', 'TIME RANGE', 'LOCATION',
                   'CONTACT METHOD', 'BUTLER CONTACTS', 'BUTLER CONTACTS MOD', 
                   'SOURCE', 'TYPE OF STUDENT', 'REASON FOR VISIT', 
                   'ADVISOR SIGN']]

dash_df