In [None]:
"""
The purpose of this script is to automate SLA tasks. This includes creating folders for each month and establishment,
creating emails and letters from templates, and more. 


Process:
1) Copy list of items from online agenda into text doc as shown below
2) I will likely want to quickly remove certain text from this, such as section descriptions like 'New Liquor License Applications,' or other info that will not go into the final outputs like the description of the license type. 
3) The most important thing will likely be keeping each entry to one line in the input file as I will likely use line breaks to parse each item. 
4) If this doesn't work, another option might be the period (.) followed by blank spaces. 
5) DBA format seems to be in parentheses after the name. 
6) I will have to manually add lawyers names.
7) Agenda number is the number in the list itself. 

----------------------------------------------------------------------------
1 - Open and read lines of text file
    - https://www.geeksforgeeks.org/reading-writing-text-files-python/
2 - Iterate through each line and build a dataframe / table from the relevant contents
3 - Filter out rows that begin with numeric. These should be the agenda items. 
4 - split at period and get first item, which is the numeric agenda item number. 
5 - split again and get the business name
6 - split again and get the address


TO DO 
Create Main Address substring 
Create Final string for files / folder names

"""

In [3]:
import pandas as pd
import datetime as dt
from datetime import datetime
import openpyxl
import os

In [4]:
agenda_pull = r"C:\Users\MN03\Desktop\Calvin Docs\SLA\Automation Work\Example_Agenda_Pull.txt"

In [5]:
os.path.isfile(agenda_pull)

True

In [6]:
#r+ = read/write access mode
agenda = open(agenda_pull, 'r+')

In [7]:
#Readlines creates a list, where each index contains a line of text, which in this case is a single establishment. 
contents = agenda.readlines()

In [10]:
print(contents[1])

2.    Hwa Yuan Szechuan, 42-44 E B'way (op/alt: add additional floors/method of operation: add karaoke)



In [88]:
# BUILD OUT DATAFRAME from Text File
# df = pd.DataFrame(columns = ['agenda_number', 'business_name', 'dba', 'address'])

df = pd.DataFrame(columns = ['line'])

In [89]:
# This creates a dataframe where each row is a line from the agenda pull

agenda_df = pd.DataFrame(contents, columns=['line'])

In [90]:
#df.loc[0,:]
#First character of first string in first row.
agenda_df.loc[0][0][0] 

'I'

In [91]:
# This line creates new column that contains a bool series with 'True' for every line that starts with a a digit
# Lines that do not contain a digit (agenda number) will be removed.
agenda_df['entry_row']= agenda_df['line'].str[0].str.isdigit()

In [92]:
# Filters out rows that do not contain an agenda item (start with a digit). 
# Because the entry_row column is boolean, just calling it as as a filter will remove False entries.  

agenda_df = agenda_df[agenda_df.entry_row]

In [93]:
# Create new column with agenda number only for each row
agenda_df['agenda_number'] = agenda_df.loc[:,'line'].str.split(pat=".").str[0]

In [94]:
agenda_df.columns

Index(['line', 'entry_row', 'agenda_number'], dtype='object')

In [95]:
agenda_df

Unnamed: 0,line,entry_row,agenda_number
1,"2. Hwa Yuan Szechuan, 42-44 E B'way (op/alt...",True,2
2,"3. Bridgeview Hotel, 50 Bowery \n",True,3
4,"4. Forsythia (JDS Restaurant LLC), 9 Stanto...",True,4
5,"5. Roberta's (Avenue A Pizza LLC), 15 Ave A...",True,5
6,"6. Lucky Lindon, 21 Essex St (op)\n",True,6
7,"7. Avenue C Hospitality LLC, 102 Ave C (op)\n",True,7
8,"8. Bar and Events 14th Street LLC, 124 E 14...",True,8
9,"9. Lucky Star (Parkside 3 NYC LLC), 135 Div...",True,9
10,"10. Moneygoround Inc, 235 Eldridge St (op)\n",True,10
11,"11. Sally Can Wait LLC, 252 Broome St (op)\n",True,11


In [96]:
# Create column with all agenda item info except for the agenda item number
agenda_df['agenda_info_no_number'] = agenda_df.loc[:,'line'].str.split(pat=".").str[1]

In [97]:
agenda_df

Unnamed: 0,line,entry_row,agenda_number,agenda_info_no_number
1,"2. Hwa Yuan Szechuan, 42-44 E B'way (op/alt...",True,2,"Hwa Yuan Szechuan, 42-44 E B'way (op/alt: ..."
2,"3. Bridgeview Hotel, 50 Bowery \n",True,3,"Bridgeview Hotel, 50 Bowery \n"
4,"4. Forsythia (JDS Restaurant LLC), 9 Stanto...",True,4,"Forsythia (JDS Restaurant LLC), 9 Stanton ..."
5,"5. Roberta's (Avenue A Pizza LLC), 15 Ave A...",True,5,"Roberta's (Avenue A Pizza LLC), 15 Ave A (..."
6,"6. Lucky Lindon, 21 Essex St (op)\n",True,6,"Lucky Lindon, 21 Essex St (op)\n"
7,"7. Avenue C Hospitality LLC, 102 Ave C (op)\n",True,7,"Avenue C Hospitality LLC, 102 Ave C (op)\n"
8,"8. Bar and Events 14th Street LLC, 124 E 14...",True,8,"Bar and Events 14th Street LLC, 124 E 14th..."
9,"9. Lucky Star (Parkside 3 NYC LLC), 135 Div...",True,9,"Lucky Star (Parkside 3 NYC LLC), 135 Divis..."
10,"10. Moneygoround Inc, 235 Eldridge St (op)\n",True,10,"Moneygoround Inc, 235 Eldridge St (op)\n"
11,"11. Sally Can Wait LLC, 252 Broome St (op)\n",True,11,"Sally Can Wait LLC, 252 Broome St (op)\n"


In [98]:
# This partitions the string into:
# 1: business name
# 2: comma
# 3: address, and license type and notes
# Once partitioned the 1st and 3rd columns are output. 

agenda_df['b_name'] = agenda_df.loc[:,'agenda_info_no_number'].str.partition(sep=",", expand=True)[0]
agenda_df['address'] = agenda_df.loc[:,'agenda_info_no_number'].str.partition(sep=",", expand=True)[2]


In [99]:
# This creates a column showing the text in the first parentheses. The second set is not important because these will always be
# notes on the liquor licence, which aren't important for this exercise. 
agenda_df['address_sup'] = agenda_df['address'].str.extract('\(([^)]+)')

In [100]:
# The first set of parentheses contains either an address supplement, such as 'basement',
# or information about the liquor license. 
# Strings containing 'op' or 'wb' are filtered out in the code below so that only address supplementary info remains. 

agenda_df['address_sup3'] = agenda_df['address_sup'].str.contains('op|wb', na=True)

In [153]:
agenda_df

Unnamed: 0,line,entry_row,agenda_number,agenda_info_no_number,b_name,address,address_sup,address_sup3,prim_address,b_tradename,b_llc_name
1,"2. Hwa Yuan Szechuan, 42-44 E B'way (op/alt...",True,2,"Hwa Yuan Szechuan, 42-44 E B'way (op/alt: ...",Hwa Yuan Szechuan,42-44 E B'way (op/alt: add additional floors/...,,True,42-44 E B'way,Hwa Yuan Szechuan,
2,"3. Bridgeview Hotel, 50 Bowery \n",True,3,"Bridgeview Hotel, 50 Bowery \n",Bridgeview Hotel,50 Bowery \n,,True,50 Bowery,Bridgeview Hotel,
4,"4. Forsythia (JDS Restaurant LLC), 9 Stanto...",True,4,"Forsythia (JDS Restaurant LLC), 9 Stanton ...",Forsythia (JDS Restaurant LLC),9 Stanton St (upgrade to op)\n,,True,9 Stanton St,Forsythia,JDS Restaurant LLC
5,"5. Roberta's (Avenue A Pizza LLC), 15 Ave A...",True,5,"Roberta's (Avenue A Pizza LLC), 15 Ave A (...",Roberta's (Avenue A Pizza LLC),15 Ave A (op)\n,,True,15 Ave A,Roberta's,Avenue A Pizza LLC
6,"6. Lucky Lindon, 21 Essex St (op)\n",True,6,"Lucky Lindon, 21 Essex St (op)\n",Lucky Lindon,21 Essex St (op)\n,,True,21 Essex St,Lucky Lindon,
7,"7. Avenue C Hospitality LLC, 102 Ave C (op)\n",True,7,"Avenue C Hospitality LLC, 102 Ave C (op)\n",Avenue C Hospitality LLC,102 Ave C (op)\n,,True,102 Ave C,Avenue C Hospitality LLC,
8,"8. Bar and Events 14th Street LLC, 124 E 14...",True,8,"Bar and Events 14th Street LLC, 124 E 14th...",Bar and Events 14th Street LLC,124 E 14th St (op)\n,,True,124 E 14th St,Bar and Events 14th Street LLC,
9,"9. Lucky Star (Parkside 3 NYC LLC), 135 Div...",True,9,"Lucky Star (Parkside 3 NYC LLC), 135 Divis...",Lucky Star (Parkside 3 NYC LLC),135 Division St (wb)\n,,True,135 Division St,Lucky Star,Parkside 3 NYC LLC
10,"10. Moneygoround Inc, 235 Eldridge St (op)\n",True,10,"Moneygoround Inc, 235 Eldridge St (op)\n",Moneygoround Inc,235 Eldridge St (op)\n,,True,235 Eldridge St,Moneygoround Inc,
11,"11. Sally Can Wait LLC, 252 Broome St (op)\n",True,11,"Sally Can Wait LLC, 252 Broome St (op)\n",Sally Can Wait LLC,252 Broome St (op)\n,,True,252 Broome St,Sally Can Wait LLC,


In [102]:
# Replace all values in the address supplement column identified above as having 'op' or 'wb' with an empty string.
agenda_df['address_sup'] = agenda_df['address_sup'].mask(agenda_df['address_sup3'], "")

In [103]:
# This line splits out the address and creates a new string with everything to the left of the first 
# opening parenthesis, which is the primary address. 

agenda_df['prim_address'] = agenda_df['address'].str.split(pat="(").str[0]

In [104]:
# This line removes '\n' characters from each row
#df = df.replace('\n','', regex=True)
agenda_df['prim_address'] = agenda_df['prim_address'].replace('\n','', regex=True)

In [143]:
# This line splits out the business name and creates a new string with everything to the left of the first 
# opening parenthesis, which is the business trade name.
agenda_df['b_tradename'] = agenda_df['b_name'].str.split(pat="(").str[0]

In [152]:
# This creates a column showing the text in the first parentheses. The second set is not important because these will always be
# notes on the liquor licence, which aren't important for this exercise. 
agenda_df['b_llc_name'] = agenda_df['b_name'].str.extract('\(([^)]+)')

# This replaces NAN with empty string
agenda_df['b_llc_name'] =agenda_df['b_llc_name'].fillna('')

In [62]:
# Remove leading and trailing characters in Series/Index.
# agenda_df = agenda_df['line'].str.lstrip()

In [110]:
# Current month (number and name) and year. This will be used to create top level folder. 
month_name = str(datetime.now().strftime("%B"))
month_num = str(datetime.now().month)
year = str(datetime.now().year)

In [120]:
# This line creates the top level directory with the month, year, and 'SLA'

top_level_dir = month_num + '-' + month_name + ' ' + year + ' SLA'
print(top_level_dir)

8-August 2021 SLA


In [141]:
# Creating a new directory
# FIRST - test single directory
# SECOND - Loop through dataframe and create folder for each row inside of top level folder created with month and year. 

# FIND DESKTOP PATH:
# https://stackoverflow.com/questions/34275782/how-to-get-desktop-location


#https://stackoverflow.com/questions/39730688/os-mkdir-error-system-cannot-find-the-specified-path/39731195
    
    
desktop = os.path.expanduser("~/Desktop")
top_folder = 'SLA_OUTPUT'


filepath = os.path.join(desktop, top_folder, top_level_dir)


os.makedirs(filepath)
print(filepath) 



C:\Users\MN03/Desktop\SLA_OUTPUT\8-August 2021 SLA
 <class 'str'>


In [158]:
agenda_df.columns

Index(['line', 'entry_row', 'agenda_number', 'agenda_info_no_number', 'b_name',
       'address', 'address_sup', 'address_sup3', 'prim_address', 'b_tradename',
       'b_llc_name'],
      dtype='object')

In [190]:
# Strip whitespace from left and right of column. Consider doing this for other columns     
agenda_df['b_tradename'] = agenda_df.b_tradename.str.strip() 


In [212]:
# 1 Make new folder path for each establishment
# 2 This will be the primary address followed by a dash followed by the trade name if it exists, else the LLC name
#      These will follow this pattern: '45 Avenue B - Lamias Fish Market'
#
for index, row in agenda_df.iterrows():
    est_filepath = ''
    if row.b_tradename != '':
        est_filepath = row.prim_address + ' - ' + row.b_tradename
    else:
        est_filepath = row.prim_address + ' - ' + row.b_llc_name
     
    fin_filepath = os.path.join(desktop, top_folder, top_level_dir, est_filepath)
    os.makedirs(fin_filepath)
        
        

In [None]:
"""
TO DO
 - Create code to remove illegal characters from strings
 - Start scripting work to input values into template letters
 - Create output for excel, which can serve a number of functions:
     - a place to input additional info like lawyers name
     - track other SLA items
 - Create similar script that takes as an input an excel file instead of a text file.
 - Methodize Script so that it is a function and the only input is the text file. 

"""