# Update GRIT articles on the website from Stan's spreadsheet

Prerequisites: 

Download content_grit.json from website.

Download content_grit_category from website.

In [1]:
import pandas as pd
import re
import json
import os
from datetime import datetime
from calendar import month_name

month_mapping = {
    '1': 'january',
    '2': 'february',
    '3': 'march',
    '4': 'april',
    '5': 'may',
    '6': 'june',
    '7': 'july',
    '8': 'august',
    '9': 'september',
    '10': 'october',
    '11': 'november',
    '12': 'december'
}

## read the exported json

In [2]:
with open('content_grit.json', mode='r') as file:
    all_issues = json.load(file)

with open('content_grit_category.json') as file:
    categories = json.load(file)

In [3]:
categories_id_to_name = {category['id']:category['title'] for category in categories}
categories_name_to_id = {v:k for k,v in categories_id_to_name.items()}

In [4]:
categories_name_to_id

{'Architectural Control Committee': 2,
 'ANN MTG / REPORTS': 3,
 'ART': 5,
 'BERNCO': 6,
 'BERNCO(NOISE)': 7,
 'BERNCO(ROADS)': 8,
 'COMMUNITY': 9,
 'COVENANTS GENERAL': 10,
 'DOGS/PETS': 11,
 'DRONES': 12,
 'ELENA GALLEGOS': 13,
 'ENVIRONMENT': 14,
 'FIRE': 15,
 'FIRE (GOATS)': 16,
 'FLORA/PLANTS/TREES': 17,
 'GEOLOGY': 18,
 'GRIT': 19,
 'HISTORY/ARCHAEOLOGY': 20,
 'HUNTING': 21,
 'INVASIVE PLANTS/TREES': 22,
 'LANDSCAPING': 23,
 'OUTDOOR LIGHTS': 24,
 'PROPERTY MAINT': 25,
 'RADON': 26,
 'RENTALS': 27,
 'RVs/BOATS': 28,
 'SAFETY/SECURITY': 29,
 'SEPTIC SYSTEMS / GRAY WATER': 30,
 'SHHA': 31,
 'SOLAR': 32,
 'TRASH BINS': 33,
 'TRASH GENERAL': 34,
 'WILDLIFE(BATS)': 35,
 'WILDLIFE(BEARS)': 36,
 'WILDLIFE(BIRDS)': 37,
 'WILDLIFE(COYOTES)': 38,
 'WILDLIFE(DEER)': 39,
 'WILDLIFE(GENERAL)': 40,
 'WILDLIFE(INSECTS/SPIDERS)': 41,
 'WILDLIFE(RODENTS)': 42,
 'WILDLIFE(SNAKES/REPTILES)': 43,
 'WILDLIFE(WILDCATS)': 44}

In [5]:
issue = all_issues[0]
issue

{'id': 352,
 'title': 'October, 2024 GRIT',
 'slug': 'october-2024-grit',
 'is_enabled': 1,
 'published_at': None,
 'expired_at': None,
 'content_group': None,
 'grit_date': '2024-10-01 20:26:42',
 'file': '/grit/2024/Oct 2024 Color GRIT.pdf',
 'image': ['/grit/2024/oct_2024_thumbnail.png'],
 'content': '<p>In the October 2024 issue of the GRIT, the SHHA President issued a call for new Board members, as well as a reminder that leaving food outside – whether in trash or containers – was a danger to bears, as they would likely have to be trapped and euthanized. The latter message was also mirrored in an article about Sandia Heights sightings of a mother bear and her three cubs. October’s monthly human-interest feature centered on resident Mary Cleo Fernandez. Author Claudia Mitchell chronicled Mary Cleo’s exceptional support for Casa Angelica, a home for individuals with profound developmental disabilities. In addition, there was a stirring memorial to Wade Jackson, by authors Katrina Sa

In [6]:
# Import the new entries from Stan, saved as CSV

df = pd.read_csv('Articles_pre2017.csv')
df = df.astype('str')

df['slug'] = df.apply(lambda row: month_mapping[row['Month']] + '-' + str(row['Year']) + '-grit', axis=1)

In [14]:
issue

{'id': 352,
 'title': 'October, 2024 GRIT',
 'slug': 'october-2024-grit',
 'is_enabled': 1,
 'published_at': None,
 'expired_at': None,
 'content_group': None,
 'grit_date': '2024-10-01 20:26:42',
 'file': '/grit/2024/Oct 2024 Color GRIT.pdf',
 'image': ['/grit/2024/oct_2024_thumbnail.png'],
 'content': '<p>In the October 2024 issue of the GRIT, the SHHA President issued a call for new Board members, as well as a reminder that leaving food outside – whether in trash or containers – was a danger to bears, as they would likely have to be trapped and euthanized. The latter message was also mirrored in an article about Sandia Heights sightings of a mother bear and her three cubs. October’s monthly human-interest feature centered on resident Mary Cleo Fernandez. Author Claudia Mitchell chronicled Mary Cleo’s exceptional support for Casa Angelica, a home for individuals with profound developmental disabilities. In addition, there was a stirring memorial to Wade Jackson, by authors Katrina Sa

In [28]:
# iterate through unique slugs and create a json with the new article data
issues_to_update = []

for slug in df['slug'].unique():
    issue = [issue for issue in all_issues if issue['slug']==slug][0]
    print('Updating: ' + issue['title'])
    df_issue = df[df['slug']==slug]
    new_article_list = []
    for i, row in df_issue.iterrows():
        article_dict = {}
        article_dict['title'] = row['Title']
        article_dict['author'] = row['First Author']
        article_dict['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        article_dict['category'] = [str(categories_name_to_id[row['TOPIC']])]
        new_article_list.append(article_dict)
    
    # Select fields to include in update JSON that's uploaded to website
    issue_update = {}
    issue_update['id'] = issue['id']
    issue_update['slug'] = slug
    issue_update['articles'] = new_article_list
    #issue['articles'] = new_article_list
    
    issues_to_update.append(issue_update)


# Save the dictionary as a JSON file
with open('updated_articles.json', 'w') as json_file:
    json.dump(issues_to_update, json_file)

Updating: January 2015 GRIT
Updating: February 2015 GRIT
Updating: March 2015 GRIT
Updating: April 2015 GRIT
Updating: May 2015 GRIT
Updating: June 2015 GRIT
Updating: July 2015 GRIT
Updating: August 2015 GRIT
Updating: September 2015 GRIT
Updating: October 2015 GRIT
Updating: November 2015 GRIT
Updating: December 2015 GRIT
Updating: January 2016 GRIT
Updating: February 2016 GRIT
Updating: March 2016 GRIT
Updating: April 2016 GRIT
Updating: May 2016 GRIT
Updating: June 2016 GRIT
Updating: July 2016 GRIT
Updating: August 2016 GRIT
Updating: September 2016 GRIT
Updating: October 2016 GRIT
Updating: November 2016 GRIT
Updating: December 2016 GRIT


In [27]:
issues_to_update

[{'id': 118,
  'slug': 'january-2015-grit',
  'articles': [{'title': 'SHHAAnnual Meeting announcements',
    'author': 'SHHA',
    'created_at': '2024-10-12 08:51:06',
    'category': ['3']},
   {'title': 'Covenant Violation',
    'author': 'Dick Wavrik',
    'created_at': '2024-10-12 08:51:06',
    'category': ['10']},
   {'title': 'Bernalillo County Ordinance Regarding Animals',
    'author': 'E&S',
    'created_at': '2024-10-12 08:51:06',
    'category': ['11']},
   {'title': 'Light Pollution',
    'author': 'Dick Wavrik',
    'created_at': '2024-10-12 08:51:06',
    'category': ['24']},
   {'title': 'SHHAHOA Complaince',
    'author': 'SHHA',
    'created_at': '2024-10-12 08:51:06',
    'category': ['31']},
   {'title': 'Trash and Recycle Bins',
    'author': 'Dick Wavrik',
    'created_at': '2024-10-12 08:51:06',
    'category': ['33']}]},
 {'id': 117,
  'slug': 'february-2015-grit',
  'articles': [{'title': 'SHHAAnnual Meeting announcements',
    'author': 'SHHA',
    'created_at

In [17]:
issues_to_update = []

for issue in all_issues:
    print(issue['slug'])

#issue['articles'] = slug_articles_dict

october-2024-grit
september-2024-grit
august-2024-grit
july-2024-grit
june-2024-grit
may-2024-grit
april-2024-grit
march-2024-grit
february-2024-grit
january-2024-grit
december-2023-grit
november-2023-grit
2023-october-grit
september-2023-grit
august-2023-grit-revised
august-2023-july-grit
july-2023-grit
june-2023-grit
may-2023-grit
april-2023-grit
grit-03-2023-2
grit-02-2023
grit-01-2023
grit-12-2022
grit-11-2022
grit-10-2022
grit-09-2022
grit-08-2022
grit-07-2022
grit-06-2022
grit-05-2022
grit-04-2022
grit-03-2022
grit-02-2022
grit-01-2022
grit-12-2021
grit-11-2021
grit-10-2021
grit-09-2021
grit-08-2021
grit-07-2021
grit-06-2021
grit-05-2021
grit-04-2021
grit-02-2021
grit-02-2021-2
grit-01-2021
grit-12-2020
grit-11-2020
grit-10-2020
grit-09-2020
grit-08-2020
grit-07-2020
grit-06-2020
grit-05-2020
grit-04-2020
grit-03-2020
grit-02-2020
grit-01-2020
grit-12-2019
grit-11-2019
grit-10-2019
grit-09-2019
grit-08-2019
grit-07-2019
grit-06-2019
grit-05-2019
grit-04-2019
grit-03-2019
grit-02-

In [None]:

for i, row in df.iterrows():
    # First, generate the slug
    slug = month_mapping[row['Month']]+'-'+row['Year']+'-grit'
    print(slug)
    
    
    for issue in all_issues:
        if issue['slug']==slug:
            issue['articles'] = []
    # Then, update the article list

In [None]:
text_folder = '/Users/heidi/Documents/SHHA/GRIT/GRIT_archive_AI_summaries/'

updated_data = []
for item in current_data:
    pdf = item['file']
    year = item['file'].split('-')[-1].split('_')[0] 
    month = item['file'].split('-')[-1].split('_')[1][0:2]
    
    # create new item with minimum info
    new_item = {}
    new_item['id'] = item['id']
    
    # Edit image path
    new_item['image'] = pdf[:-4] + '.png'

    # Edit text path
    '''
    text_file = os.path.basename(item['file'])[:-4]+'_summary.txt'  
    text_file_path = os.path.join(text_folder,year,text_file)
    if os.path.exists(text_file_path):
        text = open(text_file_path).read()
        item['content'] = text
    '''
    updated_data.append(new_item)

In [None]:
filename = f"content_grit_{datetime.now().strftime('%y%m%d%H%M')}.json"
with open(filename, 'w') as f:
    json.dump(updated_data, f)

In [None]:
month

In [None]:
text_file

In [None]:
text

## parse each entry and create an updated json

In [None]:
# update the existing entries
updated_data = []
existing_YM_entries = []
for item in current_data:
    year,month = extract_year_month(item['slug'])
    item['file'] = '/grit/'+year+'/SHHA-GRIT-'+year+'_'+month+'.pdf'
    updated_data.append(item)
    existing_YM_entries.append(year+month)

In [None]:
#with open('updated_data.json', 'w') as json_file:
#    json.dump(updated_data, json_file)

In [None]:
df = pd.DataFrame(updated_data)

df['Title'] = df['title']
df['Slug'] = df['slug']
df['Enabled Date'] = df['grit_date']
df['File'] = df['file'] 
df['ID'] = df['id']
df = df[['ID','Title','Slug','Enabled Date','File']]

#df.to_csv('updated_data.csv')

In [None]:
df.to_csv('updated_data.csv')

## Add in new data

In [None]:
# Now, add data that doesn't yet exist

# Find the entries that are in PDF but not in JSON
new_YM_entries = []
for root, dirs, files in os.walk('./GRIT_archive'):
    for f in files:
        if f.endswith('pdf'):
            f = f.split('-')[-1].split('.')[0]
            year,month = f.split('_')
            if year+month not in existing_YM_entries:
                new_YM_entries.append((year,month))
new_YM_entries = sorted(new_YM_entries, key=lambda x: (x[0], x[1]), reverse=True)
new_YM_entries = [(int(y),int(m)) for y,m in new_YM_entries]

starting_id = max([int(item['id']) for item in current_data])
data_new = []    
for i, (year, month) in enumerate(new_YM_entries):
    month_name_str = month_name[month]
    grit_date = f"{year}-{month:02d}-01 00:00:00"
    file_path = f"/grit/{year}/SHHA-GRIT-{year}_{month:02d}.pdf"

    entry = {
        'id': starting_id + i,
        'title': f"{month_name_str} {year} GRIT",
        'slug': f"{month_name_str.lower()}-{year}-grit",
        'is_enabled': 1,
        'published_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'expired_at': None,
        'content_group': None,
        'grit_date': grit_date,
        'file': file_path
    }

    data_new.append(entry)

In [None]:
df = pd.DataFrame(data_new)

df['Title'] = df['title']
df['Slug'] = df['slug']
df['Enabled Date'] = df['grit_date']
df['File'] = df['file'] 
df['ID'] = df['id']
df = df[['ID','Title','Slug','Enabled Date','File']]

df.to_csv('data_new.csv')

# Finally, fix the URL issue

In [None]:
df = pd.read_csv('content_grit_combined_badURL.csv')

In [None]:
df['File'] = df['File'].apply(lambda x: x[1:] if isinstance(x, str) else x)
df

In [None]:
df.to_csv('content_fixed_filepaths.csv')