### This notebook consist of code for creating the html files for the website each time data is updated.

##### Set-up

In [13]:

import json
import os
from pathlib import Path
import glob
import shutil
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from jinja2.exceptions import UndefinedError

In [None]:
def delete_ds_store(path):

    ds_store_files = glob.glob(os.path.join(path, '**', '.DS_Store'), recursive=True)
    

    for file_path in ds_store_files:
        os.remove(file_path)
        print(f"Deleted: {file_path}")
        

current_directory = os.getcwd()


delete_ds_store(current_directory)


In [None]:
# Copying people directories

def copy_folders(source, destination):
    try:
        shutil.copytree(source, destination)
        print("Folders copied successfully!")
    except shutil.Error as e:
        print(f"Error copying folders: {e}")
    except OSError as e:
        print(f"Error creating destination directory: {e}")

source_dir = "../temp/group-data/members"
destination_dir = "../group-data/members"

for folder_name in os.listdir(source_dir):
    folder_path = os.path.join(source_dir, folder_name)
    if os.path.isdir(folder_path):
        json_path = os.path.join(folder_path, "jsons", "basic_info.json")
        if os.path.exists(json_path):
            with open(json_path) as f:
                data = json.load(f)
                display_info = data.get("display", {})
                if display_info.get("dti", True):
                    destination_folder = os.path.join(destination_dir, folder_name)
                    copy_folders(folder_path, destination_folder)

In [None]:
# Copying common article content
source_folder = '../temp/group-data/website_data'
destination_folder = '../group-data/website_data'

def copy_json_files(source_folder, destination_folder):
    source_content_folder = os.path.join(source_folder, 'content')
    destination_content_folder = os.path.join(destination_folder, 'content')
    destination_website_files_folder = os.path.join(destination_folder, 'website_files')

    os.makedirs(destination_content_folder, exist_ok=True)
    os.makedirs(destination_website_files_folder, exist_ok=True)

    for filename in os.listdir(source_content_folder):
        if filename.endswith('.json'):
            file_path = os.path.join(source_content_folder, filename)
            with open(file_path, 'r') as file:
                try:
                    json_data = json.load(file)
                    platforms = json_data.get('platforms')
                    if platforms and 'dti' in platforms:
                        destination_path = os.path.join(destination_content_folder, filename)
                        shutil.copy(file_path, destination_path)
                        print(f"Copied file: {filename} from '{source_content_folder}' to '{destination_content_folder}'")

                        process_cover_image(json_data, source_folder, destination_website_files_folder)
                        process_content_images(json_data, source_folder, destination_website_files_folder)

                except json.JSONDecodeError:
                    print(f"Error decoding JSON in file: {file_path}")

def process_cover_image(json_data, source_folder, destination_website_files_folder):
    cover_image_path = json_data.get('cover_image')
    if cover_image_path:
        source_image_path = os.path.join(source_folder, cover_image_path)
        if os.path.exists(source_image_path):
            destination_image_path = os.path.join(destination_website_files_folder, 'images', 'article_content', os.path.basename(cover_image_path))
            os.makedirs(os.path.dirname(destination_image_path), exist_ok=True)
            shutil.copy(source_image_path, destination_image_path)
            print(f"Copied file: {cover_image_path} from '{source_folder}' to '{destination_image_path}'")

def process_content_images(json_data, source_folder, destination_website_files_folder):
    content = json_data.get('content')
    if content:
        for key, value in content.items():
            if key.startswith('img'):
                image_path = value
                if image_path:
                    source_image_path = os.path.join(source_folder, image_path)
                    if os.path.exists(source_image_path):
                        destination_image_path = os.path.join(destination_website_files_folder, 'images', 'article_content', os.path.basename(image_path))
                        os.makedirs(os.path.dirname(destination_image_path), exist_ok=True)
                        shutil.copy(source_image_path, destination_image_path)
                        print(f"Copied file: {image_path} from '{source_folder}' to '{destination_image_path}'")

copy_json_files(source_folder, destination_folder)


In [None]:
#Function for creating proper html file names
def page_link(a):
    if ' ' in a:
        return a.replace(' ', '_')
    else:
        return a

In [14]:
# Reading info.json
MEMBERS_DIR = Path("../../group-data/members")
info_json_list = []
for single_info_file_path in MEMBERS_DIR.glob("*/info.json"):
    with open(single_info_file_path, 'r') as f_info:
        member_data = json.load(f_info)
    info_json_list.append(member_data)
info_json_df = pd.DataFrame(info_json_list)
info_json_df

Unnamed: 0,first_name,last_name,image_path,cover_image_path,id,introduction,nick_name
0,Sofia,Biriouk,media/images/sofia.jpg,media/images/cover.jpg,sofia_biriouk,,
1,Vicente,Amado Olivo,media/images/ESD_headshot.jpg,media/images/cover.jpg,vicente_amado,I am a graduate student at Michigan State Univ...,
2,Kevin,Cawley,media/images/KevinCawleyFall2022.jpg,media/images/cover.jpg,kevin_cawley,,
3,Yuki,Matsumura,media/images/yuki_face.png,media/images/cover.jpg,yuki_matsumura,"Hello, I am a first-year physics graduate stud...",
4,Wolfgang,Kerzendorf,media/images/wolfgang.jpg,media/images/cover.jpg,wolfgang_kerzendorf,I am an astrophysicist deeply intrigued by nuc...,
5,Sona,Chitchyan,media/images/sona_photo.jpg,media/images/cover.jpg,sona_chitchyan,Sona is a Master's student in astrophysics pri...,
6,Richard,Dow,media/images/richard.jpg,media/images/cover.jpg,richard_dow,"Hi, my name is Tripp, and I'm a computer scien...",Tripp


In [32]:
# Reading experiences.json
experiences_json_list = []
for single_member_exp_file_path in MEMBERS_DIR.rglob("experiences.json"):
    with open(single_member_exp_file_path, "r") as f_exp:
        member_exp_data = json.load(f_exp)
    info_json_file_path = single_member_exp_file_path.parent.parent/"info.json"
    with open(info_json_file_path, "r") as file_info:
        member_info_data = json.load(file_info)
    mem_id = member_info_data["id"]
    for single_exp in member_exp_data:
        single_exp["id"] = mem_id
        experiences_json_list.append(single_exp)
experiences_json_df = pd.DataFrame(experiences_json_list)[
    [
        "id",
        "role",
        "start_date",
        "end_date",
        "institution",
        "group",
    ]
]
experiences_json_df

Unnamed: 0,id,role,start_date,end_date,institution,group
0,sofia_biriouk,STEAM Learning and Education Intern,2023-01-01,2023-12-31,Holland Public Museum,
1,sofia_biriouk,Undergraduate Learning Assistant for Intro to ...,2023-01-01,2023-12-31,New York University,
2,sofia_biriouk,Professorial Assistant,2021-01-01,2023-12-31,Michigan State University,
3,kevin_cawley,Software Intern,2022-01-01,2023-12-31,Northrop Grumman Corporation,
4,kevin_cawley,Professorial Assistant,2020-01-01,2021-12-31,Michigan State University,
5,wolfgang_kerzendorf,Principal Investigator,2013-01-01,,TARDIS,
6,wolfgang_kerzendorf,Assistant Professor,2019-08-15,,Michigan State University,
7,wolfgang_kerzendorf,Senior Research Associate,2019-01-01,2019-12-31,New York University,
8,wolfgang_kerzendorf,ESO Fellow,2014-10-01,2018-12-31,European Southern Observatory,
9,wolfgang_kerzendorf,Postdoctoral Fellow,2011-10-01,2014-09-30,University of Toronto,


In [None]:
class MemberData:
    def __init__(self, member_id):
        self.member_id = member_id
        self.jsons_path = self._find_jsons_path(member_id)

    def _find_jsons_path(self, member_id):
        members_dir = "../group-data/members/"

        for dir_name in os.listdir(members_dir):
            dir_path = os.path.join(members_dir, dir_name)
            if os.path.isdir(dir_path):
                jsons_dir = os.path.join(dir_path, "jsons")
                basic_info_file = os.path.join(jsons_dir, "basic_info.json")
                if os.path.isfile(basic_info_file):
                    with open(basic_info_file, "r") as f:
                        basic_info = json.load(f)
                    if basic_info.get("id") == member_id:
                        return jsons_dir
        return "Couldn't find {member_id}"

    def _load_json(self, json_file):
        file_path = os.path.join(self.jsons_path, json_file)
        with open(file_path, "r") as f:
            data = json.load(f)
        return data

    def _check_display(self, data):
        if isinstance(data, dict):
            if "display" in data:
                display = data["display"]
                if isinstance(display, bool):
                    return display
                elif isinstance(display, dict):
                    if display["kg"] == True:
                        return True
            for value in data.values():
                if self._check_display(value):
                    return True
        elif isinstance(data, list):
            for item in data:
                if self._check_display(item):
                    return True
        return False


    def awards(self):
        data = self._load_json("awards.json")
        return data


    def basic_info(self):
        data = self._load_json("basic_info.json")
        if "start_date" in data:
            data["group_start_date"] = data.pop("start_date")
        if "end_date" in data:
            data["group_end_date"] = data.pop("end_date")
        return data


    def education(self):
        data = self._load_json("education.json")
        return data

    def experience(self):
        data = self._load_json("experience.json")
        return data

    def outreach(self):
        data = self._load_json("outreach.json")
        return data

    def documents(self):
        data = self._load_json("docs.json")
        return data

    def publications(self):
        data = self._load_json("publications.json")
        return data

    def projects(self):
        data = self._load_json("projects.json")
        return data

    def social_links(self):
        data = self._load_json("social_links.json")
        return data

    def website_media(self):
        data = self._load_json("website_media.json")
        return data


In [None]:

member=MemberData("harshul_gupta")
basic=member.basic_info()
basic


In [None]:
class ContentData:
    def __init__(self, article_id):
        self.article_id = article_id
        self.json_path = self._find_jsons_path(article_id)

    def _find_jsons_path(self, article_id):
        content_dir = "../group-data/website_data/content/"
        for file_name in os.listdir(content_dir):
            file_path = os.path.join(content_dir, file_name)
            if os.path.isfile(file_path):
                with open(file_path, "r") as f:
                    content_data = json.load(f)
                if content_data.get("article_id") == article_id:
                    return file_path

        raise ValueError(f"Article ID '{article_id}' not found.")

    def load_json(self):
        with open(self.json_path, "r") as f:
            data = json.load(f)
        return data

In [None]:
#Creating an instance of the Environment class that looks for templates. Page_link is set to the global variable so that it can be accessed by all templates
environment = Environment(loader=FileSystemLoader('templates/'),extensions=['jinja2.ext.loopcontrols'])
environment.globals['page_link'] = page_link
environment.globals['MemberData'] = MemberData
environment.globals['ContentData'] = ContentData

In [None]:
people_id_data = {"id": [], "category": []}
non_mem_data = {"id": [], "category": []}
members_directory = '../group-data/members/'

for member_dir in os.listdir(members_directory):
    member_path = os.path.join(members_directory, member_dir)
    if os.path.isdir(member_path):
        jsons_directory = os.path.join(member_path, 'jsons')
        if os.path.isdir(jsons_directory):
            for json_file in os.listdir(jsons_directory):
                if json_file.endswith('info.json'):
                    json_path = os.path.join(jsons_directory, json_file)
                    with open(json_path, 'r') as file:
                        info = json.load(file)
                        if info.get('display') and info['display'].get('dti'):
                            if info['display']['dti']:
                                people_id_data['id'].append(info.get('id'))
                                people_id_data['category'].append(info.get('category'))
                        else:
                            non_mem_data['id'].append(info.get('id'))
                            non_mem_data['category'].append(info.get('category'))

people_df = pd.DataFrame(people_id_data)
non_mem_df = pd.DataFrame(non_mem_data)

sorting_order = [
    "Faculty", "Postdoctoral Researchers", "Graduate Students",
    "Undergraduate Students", "Researchers", "Research Software Engineers"
]

people_df['sorting_order'] = people_df['category'].apply(lambda x: sorting_order.index(x) if x in sorting_order else -1)

people_df  = people_df.sort_values(['sorting_order', 'id'], ascending=[True, True])
people_df.drop('sorting_order', axis=1, inplace=True)

In [None]:
non_mem_df

In [None]:
content_id_data = {"article_id": [], "category": [], "date": [], "tags": []}
content_directory = '../group-data/website_data/content'

for json_file in os.listdir(content_directory):
    if json_file.endswith('.json'):
        json_path = os.path.join(content_directory, json_file)
        with open(json_path, 'r') as file:
            info = json.load(file)
            if info.get('display'):
                content_id_data['article_id'].append(info.get('article_id'))
                content_id_data['category'].append(info.get('category'))
                content_id_data['date'].append(info.get('article_date'))
                content_id_data['tags'].append(info.get('tags'))

content_df = pd.DataFrame(content_id_data)
content_df['date'] = pd.to_datetime(content_df['date'], format='%m-%d-%Y')
content_df = content_df.groupby('category').apply(lambda x: x.sort_values('date', ascending=False)).reset_index(drop=True)

In [None]:
research_content_unsorted = content_df[content_df['tags'].apply(lambda x: any('research' in tag for tag in x))]
research_content = research_content_unsorted.groupby('category').apply(lambda x: x.sort_values('date', ascending=False)).reset_index(drop=True)

In [None]:
content_df

In [None]:
news_content_unsorted = content_df[content_df['tags'].apply(lambda x: any('news' in tag for tag in x))]
news_content = news_content_unsorted.sort_values(by="date", ascending=False)

In [None]:
latest_content_df = pd.DataFrame()

for category in content_df.category.unique():
    latest_data = pd.Series(content_df[content_df.category == category].iloc[0])
    latest_content_df = latest_content_df._append(latest_data, ignore_index=True)

latest_content_df['date'] = pd.to_datetime(latest_content_df['date'], format='%m-%d-%Y')
latest_content_df = latest_content_df.sort_values(by='date', ascending=False)

In [None]:
json_files = ['general', 'homepage', 'research', 'support', 'contact']
data = {}

for json_file in json_files:
    try:
        with open(f"../group-data/website_data/{json_file}.json") as json_var:
            data[json_file] = json.load(json_var)
    except (FileNotFoundError, json.JSONDecodeError):
        pass

##### Homepage

In [None]:
homepage_template = environment.get_template('homepage.html.j2')

In [None]:
homepage_content = homepage_template.render(general=data['general'], homepage=data['homepage'], recent_content=latest_content_df.to_dict(orient='records'), encoding='utf-8')


In [None]:
with open('../deepthought-initiative.github.io/index.html', mode='w', encoding='utf-8') as Homepage:
    Homepage.write(homepage_content)

##### People Page

In [None]:
people_template = environment.get_template("people.html.j2")

In [None]:
people_content = people_template.render(general=data["general"], 
                                        members=people_df['id'])

In [None]:
with open("../deepthought-initiative.github.io/People.html", mode="w", encoding="utf-8") as people:
    people.write(people_content)

##### Individual People Page

In [None]:
ind_person_template = environment.get_template("individual_person.html.j2")

In [None]:
for person in people_df['id']:
            filename = f"../deepthought-initiative.github.io/members/{ person }/{ person }.html"
            ind_person_content = ind_person_template.render(general=data["general"], 
                                                            member_id=person, 
                                                            content=content_df.to_dict(orient='records'))
            with open(filename, mode="w", encoding="utf-8") as page:
                page.write(ind_person_content)

##### Research Page

In [None]:
research_template = environment.get_template("research.html.j2")

In [None]:
main_page_research_content = research_template.render(general=data["general"],
                                            content=research_content)

In [None]:
with open("../deepthought-initiative.github.io/Research.html", mode="w", encoding="utf-8") as research:
        research.write(main_page_research_content)

In [None]:
sub_research_template = environment.get_template("sub_research_frontpage.html.j2")

In [None]:
for category in content_df.loc[content_df.category != "News", "category"].unique():
        sub_research_content = sub_research_template.render(general=data["general"], 
                                                            research_general=data["research"], 
                                                            content = research_content,
                                                            category = category
                                                            )
        folder_path = f"../deepthought-initiative.github.io/sub_research/{page_link(category.lower())}"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"../deepthought-initiative.github.io/sub_research/{page_link(category.lower())}.html", mode="w", encoding="utf-8") as sub_research:
            sub_research.write(sub_research_content)

##### Individual Research Page

In [None]:
template_no_twitter = environment.get_template("research_page_no_twitter.html.j2")

In [None]:
for ind_research_keys, ind_research_values in research_content.iterrows():
    if "news" not in ind_research_values.category.lower():
        ind_research_content = template_no_twitter.render(general=data["general"], 
                                                          member_ids=people_df['id'],
                                                           nonmem_ids = non_mem_df['id'],
                                                          content=ind_research_values)
        folder_path = f"../deepthought-initiative.github.io/sub_research/{page_link(ind_research_values.category.lower())}"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"{folder_path}/{page_link(ind_research_values.article_id.lower())}.html", mode="w", encoding="utf-8") as ind_research_page:
            ind_research_page.write(ind_research_content)


##### News Page

In [None]:
news_template = environment.get_template("news.html.j2")

In [None]:
news_page_content = news_template.render(general=data["general"],
                                         content=news_content,
                                         member_ids=people_df['id'],
                                         nonmem_ids = non_mem_df['id'],
                                         category="News")

In [None]:
with open("../deepthought-initiative.github.io/News.html", mode="w", encoding="utf-8") as news:
        news.write(news_page_content)

##### Individual News Pages

In [None]:
news_template_no_twitter = environment.get_template("news_page_no_twitter.html.j2")
#news_template_twitter = environment.get_template("news_page_twitter.html.j2")

In [None]:
for ind_news_keys, ind_news_values in news_content.iterrows():
        ind_news_content = news_template_no_twitter.render(general=data["general"], 
                                                          member_ids = people_df['id'],
                                                          nonmem_ids = non_mem_df['id'],
                                                          content = ind_news_values
                                                            )
        folder_path = f"../deepthought-initiative.github.io/news/"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"{ folder_path }/{page_link(ind_news_values.article_id.lower())}.html", mode="w", encoding="utf-8") as ind_news_page:
            ind_news_page.write(ind_news_content)

##### Support Page

In [None]:
support_template = environment.get_template('support.html.j2')

In [None]:
support_content = support_template.render(general=data["general"], support=data["support"])

In [None]:
with open('../deepthought-initiative.github.io/Support.html', mode='w', encoding='utf-8') as support:
    support.write(support_content)

##### Contact

In [None]:
contact_template = environment.get_template('contact.html.j2')

In [None]:
contact_content = contact_template.render(general=data["general"], contact=data["contact"])

In [None]:
with open('../deepthought-initiative.github.io/Contact.html', mode='w', encoding='utf-8') as contact:
    contact.write(contact_content)