### This notebook consist of code for creating the html files for the website each time data is updated.

##### Set-up

In [1026]:
import json
import os
from pathlib import Path
import glob
import shutil
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from jinja2.exceptions import UndefinedError

In [1027]:
def delete_ds_store(path):

    ds_store_files = glob.glob(os.path.join(path, '**', '.DS_Store'), recursive=True)
    

    for file_path in ds_store_files:
        os.remove(file_path)
        print(f"Deleted: {file_path}")
        

current_directory = os.getcwd()


delete_ds_store(current_directory)


In [1028]:
# # Copying people directories

# def copy_folders(source, destination):
#     try:
#         shutil.copytree(source, destination)
#         print("Folders copied successfully!")
#     except shutil.Error as e:
#         print(f"Error copying folders: {e}")
#     except OSError as e:
#         print(f"Error creating destination directory: {e}")

# source_dir = "../temp/group-data/members"
# destination_dir = "../group-data/members"

# for folder_name in os.listdir(source_dir):
#     folder_path = os.path.join(source_dir, folder_name)
#     if os.path.isdir(folder_path):
#         json_path = os.path.join(folder_path, "jsons", "basic_info.json")
#         if os.path.exists(json_path):
#             with open(json_path) as f:
#                 data = json.load(f)
#                 display_info = data.get("display", {})
#                 if display_info.get("dti", True):
#                     destination_folder = os.path.join(destination_dir, folder_name)
#                     copy_folders(folder_path, destination_folder)

In [1029]:
# # Copying common article content
# source_folder = '../temp/group-data/website_data'
# destination_folder = '../group-data/website_data'

# def copy_json_files(source_folder, destination_folder):
#     source_content_folder = os.path.join(source_folder, 'content')
#     destination_content_folder = os.path.join(destination_folder, 'content')
#     destination_website_files_folder = os.path.join(destination_folder, 'website_files')

#     os.makedirs(destination_content_folder, exist_ok=True)
#     os.makedirs(destination_website_files_folder, exist_ok=True)

#     for filename in os.listdir(source_content_folder):
#         if filename.endswith('.json'):
#             file_path = os.path.join(source_content_folder, filename)
#             with open(file_path, 'r') as file:
#                 try:
#                     json_data = json.load(file)
#                     platforms = json_data.get('platforms')
#                     if platforms and 'dti' in platforms:
#                         destination_path = os.path.join(destination_content_folder, filename)
#                         shutil.copy(file_path, destination_path)
#                         print(f"Copied file: {filename} from '{source_content_folder}' to '{destination_content_folder}'")

#                         process_cover_image(json_data, source_folder, destination_website_files_folder)
#                         process_content_images(json_data, source_folder, destination_website_files_folder)

#                 except json.JSONDecodeError:
#                     print(f"Error decoding JSON in file: {file_path}")

# def process_cover_image(json_data, source_folder, destination_website_files_folder):
#     cover_image_path = json_data.get('cover_image')
#     if cover_image_path:
#         source_image_path = os.path.join(source_folder, cover_image_path)
#         if os.path.exists(source_image_path):
#             destination_image_path = os.path.join(destination_website_files_folder, 'images', 'article_content', os.path.basename(cover_image_path))
#             os.makedirs(os.path.dirname(destination_image_path), exist_ok=True)
#             shutil.copy(source_image_path, destination_image_path)
#             print(f"Copied file: {cover_image_path} from '{source_folder}' to '{destination_image_path}'")

# def process_content_images(json_data, source_folder, destination_website_files_folder):
#     content = json_data.get('content')
#     if content:
#         for key, value in content.items():
#             if key.startswith('img'):
#                 image_path = value
#                 if image_path:
#                     source_image_path = os.path.join(source_folder, image_path)
#                     if os.path.exists(source_image_path):
#                         destination_image_path = os.path.join(destination_website_files_folder, 'images', 'article_content', os.path.basename(image_path))
#                         os.makedirs(os.path.dirname(destination_image_path), exist_ok=True)
#                         shutil.copy(source_image_path, destination_image_path)
#                         print(f"Copied file: {image_path} from '{source_folder}' to '{destination_image_path}'")

# copy_json_files(source_folder, destination_folder)


In [1030]:
def page_link(a):
    """Return the HTML file name after replacing blank spaces(" ") with underscores("-")"""
    return a.replace(" ", "_") if " " in a else a

In [1031]:
GROUP_DATA_DIR = Path("../../group-data")
TEMPLATE_DIR_PATH = GROUP_DATA_DIR.parent / "dtiwebsite_generator" / "templates"
environment = Environment(
    loader=FileSystemLoader(TEMPLATE_DIR_PATH),
    extensions=["jinja2.ext.loopcontrols", "jinja2.ext.do"],
)
environment.globals["page_link"] = page_link

In [1032]:
WEBSITE_DATA_PATH = GROUP_DATA_DIR / "website_data/"
JSON_FILES_TO_LOAD = [
    "general",
    "homepage",
    "research",
    "support",
    "contact",
]

In [1033]:
HOSTING_PATH = GROUP_DATA_DIR.parent / "deepthought-initiative.github.io"
def create_page(template, html, **kwargs):
    """
    Create an HTML page using a Jinja2 template and save it to a specified path.

    Parameters:
    ----------
    template : str
        The filename of the Jinja2 template to be used.
    html : str
        The filename of the HTML file to be generated.
    **kwargs : dict
        Additional keyword arguments to be passed to the Jinja2 template for rendering.

    Returns:
    -------
    None

    """
    page_template = environment.get_template(template)
    template_level = html.count("/")
    page_html_path = HOSTING_PATH / html
    page_content = page_template.render(TEMPLATE_LEVEL=template_level, **kwargs)
    with open(page_html_path, mode="w", encoding="utf-8") as page:
        page.write(page_content)

In [1034]:
def loading_general_json_files(file_to_load):
    """
    Load data from JSON files specified in a list of file names.

    Parameters:
    ----------
    json_data_list : list of str
        A list of file names (without extension) to load as JSON.

    Returns:
    -------
    dict
        A dictionary where keys are file names and values are the corresponding JSON data.

    Raises:
    ------
    FileNotFoundError:
        If a specified file does not exist.
    json.JSONDecodeError:
        If there's an issue decoding the JSON content from a file.

    """
    loaded_data = {}
    file_matches = WEBSITE_DATA_PATH/ f"{file_to_load}.json"

    if file_matches:
        try:
            with open(file_matches, "r") as json_file:
                loaded_data = json.load(json_file)
        except json.JSONDecodeError:
            print(f"Error decoding JSON in '{file_matches}'.")
    else:
        print(f"File '{file_to_load}.json' not found.")

    return loaded_data

In [1035]:
CONTENT_DIR_PATH = WEBSITE_DATA_PATH / "content"
article_content_list = []

for content_file_name in CONTENT_DIR_PATH.iterdir():
    with open(content_file_name, 'r') as fcontent:
        article_content = json.load(fcontent)
    article_content_list.append(article_content)
article_content_df = pd.DataFrame(article_content_list)
news_content_df = article_content_df[
    (article_content_df["category"] == "News")
    | (
        article_content_df["tags"].apply(
            lambda x: "news" in x if isinstance(x, list) else False
        )
    )
].sort_values(by=["date"], ascending=[False])
news_content_df

Unnamed: 0,title,author_id,article_id,display,category,date,tags,platforms,short_description,long_description,cover_image,content,people_involved_ids,links,twitter
3,European Space Agency Research Internship,vicente_amado,ESA_internship_vicente,True,News,06-15-2023,"[news, research, Internship]","[kg, dti]",Unleashing the Power of Data Science: Internsh...,,website_files/images/article_content/esa_inter...,{'para1': 'The Deepthought Initiative (DTI) co...,"[vicente_amado, wolfgang_kerzendorf]",{},
2,Peer Review Under Review - Workshop at Europea...,vicente_amado,prur_conference,True,News,02-12-2023,"[news, research]",,DeepThought Initiative and collaborators organ...,,website_files/images/article_content/img_PRUR.png,{'para1': 'Wolfgang Kerzendorf and collaborato...,"[vicente_amado, wolfgang_kerzendorf]",{'NASA ADS': 'https://ui.adsabs.harvard.edu/ab...,


In [1036]:
research_content_df = article_content_df[article_content_df["category"] != "News"].sort_values(
    by=["category", "date"], ascending=[True, False]
)
research_content_df

Unnamed: 0,title,author_id,article_id,display,category,date,tags,platforms,short_description,long_description,cover_image,content,people_involved_ids,links,twitter
1,MIDSURE 2022,bea_lu,midsure22_poster_bea,True,Computational Metascience,07-22-2022,[research],"[kg, dti]",Poster presentation at the Mid-Michigan Sympos...,,website_files/images/article_content/bea_midsu...,{'para1': 'Abstract: Interdisciplinary scienti...,"[bea_lu, vicente_amado, wolfgang_kerzendorf]",{},
0,MSU UURAF 2021,vicente_amado,uuraf21_poster_vicente,True,Computational Metascience,04-19-2021,[research],"[kg, dti]",Poster presentation for MSU's University Under...,,website_files/images/article_content/MAST_Post...,{'para1': 'Abstract: The modern scientific com...,"[vicente_amado, wolfgang_kerzendorf, jack_o_br...",{},


In [1037]:
# Reading info.json
MEMBERS_DIR = Path("../../group-data/members")
info_json_list = []
for single_info_file_path in MEMBERS_DIR.glob("*/info.json"):
    with open(single_info_file_path, 'r') as f_info:
        member_data = json.load(f_info)
    info_json_list.append(member_data)
info_json_df = pd.DataFrame(info_json_list)
info_json_df.set_index("id", inplace=True)
info_json_df["full_name"] = info_json_df.apply(
    lambda row: (
        row["nick_name"] + " " + row["last_name"]
        if pd.notna(row["nick_name"])
        else row["first_name"] + " " + row["last_name"]
    ),
    axis=1,
)
info_json_df

Unnamed: 0_level_0,first_name,last_name,image_path,cover_image_path,introduction,nick_name,full_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
sofia_biriouk,Sofia,Biriouk,media/images/sofia.jpg,media/images/cover.jpg,,,Sofia Biriouk
vicente_amado,Vicente,Amado Olivo,media/images/ESD_headshot.jpg,media/images/cover.jpg,I am a graduate student at Michigan State Univ...,,Vicente Amado Olivo
kevin_cawley,Kevin,Cawley,media/images/KevinCawleyFall2022.jpg,media/images/cover.jpg,,,Kevin Cawley
yuki_matsumura,Yuki,Matsumura,media/images/yuki_face.png,media/images/cover.jpg,"Hello, I am a first-year physics graduate stud...",,Yuki Matsumura
wolfgang_kerzendorf,Wolfgang,Kerzendorf,media/images/wolfgang.jpg,media/images/cover.jpg,I am an astrophysicist deeply intrigued by nuc...,,Wolfgang Kerzendorf
sona_chitchyan,Sona,Chitchyan,media/images/sona_photo.jpg,media/images/cover.jpg,Sona is a Master's student in astrophysics pri...,,Sona Chitchyan
bea_lu,Bea,Lu,media/images/bea_lu.jpg,media/images/cover.jpg,"Hi, my name is Bea and I am currently a studen...",,Bea Lu
richard_dow,Richard,Dow,media/images/richard.jpg,media/images/cover.jpg,"Hi, my name is Tripp, and I'm a computer scien...",Tripp,Tripp Dow


In [1038]:
general = loading_general_json_files("general")
homepage = loading_general_json_files("homepage")
contact = loading_general_json_files("contact")
research = loading_general_json_files("research")
support = loading_general_json_files("support")
create_page(
    "homepage.html.j2",
    "index.html",
    general=general,
    homepage=homepage,
    recent_content=[],
)
create_page("support.html.j2", "Support.html", general=general, support=support)
create_page(
    "news.html.j2",
    "News.html",
    general=general,
    content=news_content_df,
    category="news",
    member_data=info_json_df.to_dict("index"),
)
create_page(
    "research.html.j2",
    "Research.html",
    general=general,
    content=research_content_df,
    current_members=info_json_df.to_dict("index"),
    research=research,
)
create_page(
    "people.html.j2",
    "People.html",
    general=general,
    current_members=info_json_df.to_dict("index"),
)
create_page("contact.html.j2", "Contact.html", general=general, contact=contact)

In [1039]:
for ind_news_keys, ind_news_values in news_content_df.iterrows():
    folder_path = (
        Path(HOSTING_PATH) / "news" / page_link(ind_news_values.article_id.lower())
    )
    create_page(
        "news_page_no_twitter.html.j2",
        f"news/{page_link(ind_news_values.article_id.lower())}.html",
        general=general,
        content=ind_news_values,
        member_data=info_json_df.to_dict("index"),
        category="News",
    )

In [1040]:
folder_path = Path(HOSTING_PATH) / "sub_research"
folder_path.mkdir(parents=True, exist_ok=True)

for category in article_content_df.loc[article_content_df.category != "News", "category"].unique():
    create_page(
        "sub_research_frontpage.html.j2",
        f"sub_research/{page_link(category.lower())}.html",
        general=general,
        research=research,
        content=research_content_df,
        category=category,
        current_members=info_json_df.to_dict("index"),
    )

In [1041]:
for ind_research_keys, ind_research_values in research_content_df.iterrows():
    folder_path = (
        Path(HOSTING_PATH)
        / "sub_research"
        / page_link(ind_research_values.category.lower())
    )
    folder_path.mkdir(parents=True, exist_ok=True)
    create_page(
        "research_page_no_twitter.html.j2",
        f"sub_research/{page_link(ind_research_values.category.lower())}/{page_link(ind_research_values.article_id.lower())}.html",
        general=general,
        content=ind_research_values,
        member_data=info_json_df.to_dict("index"),
        article_id=ind_research_values["article_id"],
    )

In [1042]:
# Function for reading member data files
def read_member_data_jsons(file_to_read):
    data_list = []
    for single_member_file_path in MEMBERS_DIR.rglob(f"{file_to_read}"):
        with open(single_member_file_path, "r") as fname:
            member_data = json.load(fname)
        info_json_file_path = single_member_file_path.parent.parent / "info.json"
        with open(info_json_file_path, "r") as file_info:
            member_info_data = json.load(file_info)
        mem_id = member_info_data["id"]
        for single_entry in member_data:
            single_entry["id"] = mem_id
            data_list.append(single_entry)
    member_data_df = pd.DataFrame(data_list)
    member_data_df.set_index("id", inplace=True)
    return member_data_df

In [1043]:
exp_df = read_member_data_jsons("experiences.json")[
    [
        "role",
        "start_date",
        "end_date",
        "institution",
        "group",
    ]
]

exp_df

Unnamed: 0_level_0,role,start_date,end_date,institution,group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sofia_biriouk,STEAM Learning and Education Intern,2023-01-01,2023-12-31,Holland Public Museum,
sofia_biriouk,Undergraduate Learning Assistant for Intro to ...,2023-01-01,2023-12-31,New York University,
sofia_biriouk,Professorial Assistant,2021-01-01,2023-12-31,Michigan State University,
kevin_cawley,Software Intern,2022-01-01,2023-12-31,Northrop Grumman Corporation,
kevin_cawley,Professorial Assistant,2020-01-01,2021-12-31,Michigan State University,
wolfgang_kerzendorf,Principal Investigator,2013-01-01,,TARDIS,
wolfgang_kerzendorf,Assistant Professor,2019-08-15,,Michigan State University,
wolfgang_kerzendorf,Senior Research Associate,2019-01-01,2019-12-31,New York University,
wolfgang_kerzendorf,ESO Fellow,2014-10-01,2018-12-31,European Southern Observatory,
wolfgang_kerzendorf,Postdoctoral Fellow,2011-10-01,2014-09-30,University of Toronto,


In [1044]:
edu_df = read_member_data_jsons("education.json")[
    ["start_date", "end_date", "institution", "degree"]
]
edu_df

Unnamed: 0_level_0,start_date,end_date,institution,degree
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
sofia_biriouk,2021-01-01,,Michigan State University,Bachelors
vicente_amado,2021-01-01,,Michigan State University,PhD
vicente_amado,2017-01-01,2021-12-31,Michigan State University,Bachelors
kevin_cawley,2020-01-01,2024-12-31,Michigan State University,Bachelors
yuki_matsumura,2022-01-01,,Michigan State University,PhD
yuki_matsumura,2020-01-01,2022-12-31,California Polytechnic State University,Bachelors
wolfgang_kerzendorf,2007-02-01,2011-12-18,Australian National University,PhD
bea_lu,2021-09-01,,Michigan State University,Bachelors
richard_dow,2021-01-01,,University of Minnesota,Bachelors


In [1045]:
projects_df = read_member_data_jsons("projects.json")
projects_df

Unnamed: 0_level_0,project_title,start_date,end_date,description
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
sofia_biriouk,SNR0509-675 Center Investigation,2022-01-01,2023-12-31,No Surviving SN Ia Companion In SNR 0509-67.5:...
sofia_biriouk,Cassiopeia A Progenitor Search,2021-01-01,2022-12-31,A search for the progenitor of Cass A type IIB...
vicente_amado,Development Of A Global Registry For Peer Revi...,,,
kevin_cawley,Tardis Cuda,2021-01-01,2022-12-31,Creating a Numba Cuda version of the formal in...
kevin_cawley,Tardis Website,2021-01-01,2022-12-31,Creating an official platform for TARDIS
yuki_matsumura,Type IIP Supernovae As Cosmological Distance P...,,,
wolfgang_kerzendorf,Supernovae & Computational Metaresearch,,,
sona_chitchyan,Enhanced Type IIP Cosmology With Statistics An...,,,
bea_lu,Natural Language Processing,2021-01-01,2023-12-31,Text classification of astronomical publicatio...
bea_lu,Kerzendorf Group Website,2022-10-31,2023-12-31,Developing this website with jinja2 and GitHub...


In [1046]:
awards_df = read_member_data_jsons("awards.json")
awards_df

Unnamed: 0_level_0,award_name,date,description
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
sofia_biriouk,Natural Science Dean's List,2021-01-01,4.0 Grade Point Average
yuki_matsumura,NatSci Great IDEA Fellowship,2023-01-01,"Focusing on inclusion, diversity, equity, and ..."


In [1050]:
#Creating an instance of the Environment class that looks for templates. Page_link is set to the global variable so that it can be accessed by all templates
environment = Environment(loader=FileSystemLoader('templates/'),extensions=['jinja2.ext.loopcontrols'])
environment.globals['page_link'] = page_link
# environment.globals['MemberData'] = MemberData
# environment.globals['ContentData'] = ContentData

##### Individual People Page

In [1065]:
# ind_person_template = environment.get_template("individual_person.html.j2")

In [1066]:
# for person in people_df['id']:
#             filename = f"../deepthought-initiative.github.io/members/{ person }/{ person }.html"
#             ind_person_content = ind_person_template.render(general=data["general"], 
#                                                             member_id=person, 
#                                                             content=content_df.to_dict(orient='records'))
#             with open(filename, mode="w", encoding="utf-8") as page:
#                 page.write(ind_person_content)