In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from datetime import datetime

def articles_data(url):
    html_text = requests.get(url)
    soup = BeautifulSoup(html_text.content, 'html.parser')
    
    gdd_art = soup.find_all('div', class_='stm_single_post')

    data_list = []

    # Get the current date and time
    date_fetched = datetime.now().strftime("%Y-%m-%d")

    for content in gdd_art:
        gdd_arttitle = content.find('h1', class_='h2 post_title').text.strip()
        gdd_artimg = content.find('div', class_='post_thumbnail').img['src']
        gdd_artcontent_div = content.find('div', class_='text_block clearfix')

        if gdd_artcontent_div:
            paragraphs = gdd_artcontent_div.find_all('p')

            # Initialize an empty string to store content
            full_content = ''

            for paragraph in paragraphs:
                # Append each paragraph to the content string
                full_content += paragraph.text + '\n'

            gdd_artpublisheddate = content.find('li', class_='post_date h6').span.text
            gdd_artcategory = content.find('li', class_='post_cat h6').span.text
            gdd_artcategorylink = content.find('li', class_='post_cat h6').a['href']

            data_list.append({
                'Title': gdd_arttitle,
                'Image_link': gdd_artimg,
                'Content': full_content,
                'Published Date': gdd_artpublisheddate,
                'Category': gdd_artcategory,
                'Category Link': gdd_artcategorylink,
                'Date Fetched': date_fetched
            })

    return pd.DataFrame(data_list)

def posts_data():
    html_text = requests.get('https://globaldevelopmentaldelay.com.au/blog/')
    soup = BeautifulSoup(html_text.content, 'html.parser')
    
    gdd = soup.find_all('div', class_='col-md-4 col-sm-6 col-xs-12 plugin_style')

    data_list = []

    for idx, articles in enumerate(gdd, start=1):
        gdd_title = articles.find('a', class_='post_list_item_title h3').text
        gdd_link = articles.find('a', class_='post_list_item_title h3')['href']

        data_list.append({
            'Index': idx,
            'Title': gdd_title,
            'Link': gdd_link
        })

    return pd.DataFrame(data_list)

# Call the functions with index
posts_df = posts_data()

# Use the length of the data_list for unique indices in articles_data function
articles_df = pd.concat([articles_data(link).assign(Index=i+1) for i, link in enumerate(posts_df['Link'])], ignore_index=True)

# Set "Index" as the index of the DataFrame
articles_df.set_index("Index", inplace=True)

# Print the DataFrames
print("Posts Data:")
print(posts_df)

print("\nArticles Data:")
print(articles_df)

Posts Data:
   Index                                              Title  \
0      1             Starting School: A parent’s experience   
1      2  The Importance of a Holistic Approach to Inter...   
2      3  How can I get the outcomes I want for my child...   
3      4  Global Developmental Delay – A Sibling’s Persp...   
4      5  Our child has global developmental delay! What...   

                                                Link  
0  https://globaldevelopmentaldelay.com.au/starti...  
1  https://globaldevelopmentaldelay.com.au/the-im...  
2  https://globaldevelopmentaldelay.com.au/how-ca...  
3  https://globaldevelopmentaldelay.com.au/global...  
4  https://globaldevelopmentaldelay.com.au/our-ch...  

Articles Data:
                                                   Title  \
Index                                                      
1                 Starting School: A parent’s experience   
2      The Importance of a Holistic Approach to Inter...   
3      How can I get th

In [2]:
articles_df.to_csv('GlobalDevelopmentDelay.csv', index = False)