# This file is to scrape the scheme website filtered by agriculture and environment. 


## Try to scrape single-website content

In [59]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

URL = "https://www.myscheme.gov.in/schemes/bjrcy"
#"https://www.myscheme.gov.in/schemes/e-nam"
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.3'}

response = requests.get(URL, headers=HEADERS)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Locate the outer div using its class attributes
    outer_div = soup.find('div', class_='w-full flex flex-row justify-between items-start mb-5')
    
    if outer_div:
        # Extract state and scheme_name from within the outer div
        state_elem = outer_div.find('h2', class_='text-darkIndigo-900 text-sm')
        scheme_name_elem = outer_div.find('h1', class_='font-bold text-green-600 text-xl sm:text-2xl mt-1')

        # Use a ternary conditional to handle potential None values
        state = state_elem.text.strip() if state_elem else "N/A"
        scheme_name = scheme_name_elem.text.strip() if scheme_name_elem else "N/A"

        # Extract the 'sources' link
        sources_link = soup.find('a', class_='flex flex-row items-center py-4 justify-start hover:underline underline-offset-2')['href']

        # Find all divs with class 'pt-10'
        divs = soup.find_all('div', class_='pt-10')

        # Extracting the id and the content of the div
        data = {'url': URL, 'state': state, 'scheme_name': scheme_name, 'sources': sources_link}  # Initialize with URL, state, scheme_name, and sources_link
        for div in divs:
            div_id = div.get('id')
            if div_id:
                # Exclude content from <div class="mb-2" ...>
                excluded_content = div.find('div', class_='mb-2')
                if excluded_content:
                    excluded_content.extract()  # Remove the unwanted content

                # Extract content from <ol> and <li> elements
                ol_content = div.find('ol')
                if ol_content:
                    data[div_id] = ' '.join([li.text.strip() for li in ol_content.find_all('li')])

        # Convert the data dictionary to a DataFrame
        df = pd.DataFrame([data])

        # Save the data to a CSV file
        df.to_csv("scraped_data.csv", index=False)
        print("Data saved to 'scraped_data.csv'.")

    else:
        print("Couldn't find the outer div element.")

else:
    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")


Data saved to 'scraped_data.csv'.


In [60]:
df

Unnamed: 0,url,state,scheme_name,sources
0,https://www.myscheme.gov.in/schemes/bjrcy,Ministry Of Social Justice and Empowerment,Babu Jagjivan Ram Chhatrawas Yojna,https://socialjustice.gov.in/writereaddata/Upl...


In [63]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_url_details(URL):
    HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.3'}
    data = {'url': URL}
    try:
        response = requests.get(URL, headers=HEADERS)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            
            outer_div = soup.find('div', class_='w-full flex flex-row justify-between items-start mb-5')
            
            if outer_div:
                state_elem = outer_div.find('h2', class_='text-darkIndigo-900 text-sm')
                scheme_name_elem = outer_div.find('h1', class_='font-bold text-green-600 text-xl sm:text-2xl mt-1')
                
                state = state_elem.text.strip() if state_elem else "N/A"
                scheme_name = scheme_name_elem.text.strip() if scheme_name_elem else "N/A"
                data.update({'state': state, 'scheme_name': scheme_name})
                
                # Extract the 'sources' link
                sources_elem = soup.find('a', class_='flex flex-row items-center py-4 justify-start hover:underline underline-offset-2')
                if sources_elem and sources_elem.has_attr('href'):
                    data['sources'] = sources_elem['href']

                # Extract details from relevant div sections
                sections = ['details', 'benefits', 'eligibility', 'exclusions', 'applicationProcess', 'documentsRequired', 'faq']
                for section in sections:
                    section_div = soup.find('div', id=section)
                    if section_div:
                        data[section] = ' '.join(section_div.stripped_strings)

            return data
        else:
            print(f"Failed to retrieve the webpage for {URL}. Status code: {response.status_code}")
            return None
    except Exception as e:
        print(f"An error occurred while processing {URL}: {e}")
        return None

URL = "https://www.myscheme.gov.in/schemes/e-nam"
#"https://www.myscheme.gov.in/schemes/bjrcy"
data = scrape_url_details(URL)
df = pd.DataFrame([data])
df.to_csv("scraped_data.csv", index=False)
print("Data saved to 'scraped_data.csv'.")


Data saved to 'scraped_data.csv'.


In [64]:
df

Unnamed: 0,url,state,scheme_name,sources,details,benefits,eligibility,applicationProcess,documentsRequired
0,https://www.myscheme.gov.in/schemes/e-nam,Ministry Of Agriculture and Farmers Welfare,National Agriculture Market,https://enam.gov.in/web/docs/namguidelines.pdf,Details e-National Agriculture Market (e-NAM) ...,Benefits Benefits of trading on e-NAM: Transpa...,Eligibility Eligibility criteria for availing ...,Application Process Online Registration Guidel...,Documents Required No Documents required for t...


## Get all the relevant links (schemes)

extract link and keywords

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.common.exceptions import ElementClickInterceptedException
import time

page_first = "https://www.myscheme.gov.in/search"

def click_next_page(driver, current_page):
    try:
        # Click on the next page number based on the current_page
        next_page_num = current_page + 1
        next_page_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, f"//li[text()='{next_page_num}']")))
        
        # Try scrolling the element into view
        driver.execute_script("arguments[0].scrollIntoView();", next_page_button)
        time.sleep(2)  # Wait a bit for any potential overlay to disappear
        
        # Try JavaScript click if regular click fails
        try:
            next_page_button.click()
        except ElementClickInterceptedException:
            driver.execute_script("arguments[0].click();", next_page_button)
        
        print(f"Clicked to navigate to page {next_page_num}")  # Debugging Output
        return True
    except (NoSuchElementException, TimeoutException):
        print(f"Failed to navigate to page {next_page_num}")  # Debugging Output
        return False
    except ElementClickInterceptedException:
        # You might add more specific handling for this exception here if needed
        print(f"Element was obscured when trying to navigate to page {next_page_num}")
        return False

def extract_content_from_current_page(driver):
    wait = WebDriverWait(driver, 10)
    card_divs = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.mx-auto.rounded-xl.shadow-md')))
    
    url_keywords_map = {}

    for index in range(len(card_divs)):
        # Re-fetch the card element by its index
        card = driver.find_elements(By.CSS_SELECTOR, 'div.mx-auto.rounded-xl.shadow-md')[index]
        url_element = card.find_element(By.CSS_SELECTOR, 'h2 a')
        url = url_element.get_attribute('href')
        
        # Explicitly wait for the keywords to be loaded
        wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.text-gray-800.bg-gray-100.rounded')))
        # Now, re-fetch the keyword divs from the re-fetched card
        keyword_divs = card.find_elements(By.CSS_SELECTOR, 'div.text-gray-800.bg-gray-100.rounded')
        
        # Fetch the text from each keyword div
        keywords = [div.text for div in keyword_divs]
        url_keywords_map[url] = keywords

    print(f"Extracted {len(url_keywords_map)} items from the page.")
    return url_keywords_map


def main():
    # Initialize WebDriver
    driver = webdriver.Chrome()
    driver.get(page_first)

    current_page = 1
    all_content = []
    
    all_content.append(extract_content_from_current_page(driver))

    # Try navigating using pagination numbers
    while click_next_page(driver, current_page):
        current_page += 1
        # Wait until one of the expected elements of the new page is present
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.mx-auto.rounded-xl.shadow-md')))
        all_content.append(extract_content_from_current_page(driver))

    driver.close()

    for content in all_content:
        print(content)
    
    return all_content  # Return the collected content

if __name__ == "__main__":
    collected_data = main()  # Assign the result to a variable

In [None]:
# Transform the collected data into a list of dictionaries
formatted_data = []
for data in collected_data:
    for url, keywords in data.items():
        formatted_data.append({
            "URL": url,
            "Keywords": keywords
        })

# Convert the list of dictionaries to JSON
import json
json_data = json.dumps(formatted_data, indent=4)

# Output the JSON
print(json_data)


In [65]:
flattened_data = []
for data_dict in collected_data:
    for url, keywords in data_dict.items():
        flattened_data.append({"url": url, "keywords": keywords})

keywords_df = pd.DataFrame(flattened_data)

In [66]:
keywords_df

Unnamed: 0,url,keywords
0,https://www.myscheme.gov.in/schemes/bjrcy,"[Hostel, Student]"
1,https://www.myscheme.gov.in/schemes/onorc,"[Migrant Workers, Ration Card]"
2,https://www.myscheme.gov.in/schemes/nos-sc,"[Degree, International Education, PhD, Post-Gr..."
3,https://www.myscheme.gov.in/schemes/kvsy,"[Financial Assistance, Girl, Kanya, Marriage, ..."
4,https://www.myscheme.gov.in/schemes/uky,"[Deprivation Of Liberty, Domestic Violence, Me..."
...,...,...
1123,https://www.myscheme.gov.in/schemes/tkgthe,"[Craftsman, Grant, Handicrafts, OBC, Self-empl..."
1124,https://www.myscheme.gov.in/schemes/mnssy,"[Empowerment, Financial Assistance, Girl Educa..."
1125,https://www.myscheme.gov.in/schemes/apy,"[Bank Account Holders, Pension, Unorganized Wo..."
1126,https://www.myscheme.gov.in/schemes/sisnhe,"[Employment, Enterprises, Interest Subvention,..."


## Extract page content from all url links

Original code: the code below should be archived: some content are not extracted. 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.3'}

def scrape_url(URL):
    response = requests.get(URL, headers=HEADERS)
    data = {}

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        outer_div = soup.find('div', class_='w-full flex flex-row justify-between items-start mb-5')

        if outer_div:
            state_elem = outer_div.find('h2', class_='text-darkIndigo-900 text-sm')
            scheme_name_elem = outer_div.find('h1', class_='font-bold text-green-600 text-xl sm:text-2xl mt-1')

            state = state_elem.text.strip() if state_elem else "N/A"
            scheme_name = scheme_name_elem.text.strip() if scheme_name_elem else "N/A"
            sources_elem = soup.find('a', class_='flex flex-row items-center py-4 justify-start hover:underline underline-offset-2')
            sources_link = sources_elem['href'] if sources_elem else 'N/A'

            divs = soup.find_all('div', class_='pt-10')

            data = {'url': URL, 'state': state, 'scheme_name': scheme_name, 'sources': sources_link}
            for div in divs:
                div_id = div.get('id')
                if div_id:
                    excluded_content = div.find('div', class_='mb-2')
                    if excluded_content:
                        excluded_content.extract()
                    ol_content = div.find('ol')
                    if ol_content:
                        data[div_id] = ' '.join([li.text.strip() for li in ol_content.find_all('li')])
        else:
            print(f"Couldn't find the outer div element for {URL}.")
    else:
        print(f"Failed to retrieve the webpage {URL}. Status code: {response.status_code}")
    
    return data

# Collect data from all URLs
all_results = []

# Iterate over each dictionary in collected_data
for data_dict in collected_data:
    # For each dictionary, iterate over its keys (which are the URLs)
    for url in data_dict.keys():
        result = scrape_url(url)
        all_results.append(result)

# Convert the list of results to a DataFrame and save to json
df = pd.DataFrame(all_results)
df.to_json("scraped_data.json", orient="records", lines=True)
print("Data saved to 'scraped_data.json'.")



Data saved to 'scraped_data.json'.


In [None]:
# find urls containts keywords like: agri, farm, ....

In [73]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.3'}

            
def scrape_url(URL):
    data = {'url': URL}
    
    try:
        response = requests.get(URL, headers=HEADERS, timeout=10)  # Add a timeout for the request

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            outer_div = soup.find('div', class_='w-full flex flex-row justify-between items-start mb-5')

            if outer_div:
                state_elem = outer_div.find('h2', class_='text-darkIndigo-900 text-sm')
                scheme_name_elem = outer_div.find('h1', class_='font-bold text-green-600 text-xl sm:text-2xl mt-1')

                state = state_elem.text.strip() if state_elem else "N/A"
                scheme_name = scheme_name_elem.text.strip() if scheme_name_elem else "N/A"
                sources_elem = soup.find('a', class_='flex flex-row items-center py-4 justify-start hover:underline underline-offset-2')
                sources_link = sources_elem['href'] if sources_elem else 'N/A'

                divs = soup.find_all('div', class_='pt-10')

                data.update({
                    'state': state, 
                    'scheme_name': scheme_name, 
                    'sources': sources_link
                })

                for div in divs:
                    div_id = div.get('id')
                    if div_id:
                        excluded_content = div.find('div', class_='mb-2')
                        if excluded_content:
                            excluded_content.extract()
                        ol_content = div.find('ol')
                        if ol_content:
                            data[div_id] = ' '.join([li.text.strip() for li in ol_content.find_all('li')])
            else:
                print(f"Couldn't find the outer div element for {URL}.")

        else:
            print(f"Failed to retrieve the webpage {URL}. Status code: {response.status_code}")
    
    except requests.Timeout:
        print(f"Timeout occurred for {URL}.")

    return data


# Collect data from all URLs
all_results = []

# Iterate over each dictionary in collected_data
for data_dict in collected_data:
    # For each dictionary, iterate over its keys (which are the URLs)
    for url in data_dict.keys():
        result = scrape_url_details(url)
        all_results.append(result)

# Convert the list of results to a DataFrame and save to json
df = pd.DataFrame(all_results)



In [74]:
len(df)

1128

In [86]:
df

Unnamed: 0,url,state,scheme_name,sources,details,benefits,eligibility,exclusions,applicationProcess,documentsRequired
0,https://www.myscheme.gov.in/schemes/bjrcy,Ministry Of Social Justice and Empowerment,Babu Jagjivan Ram Chhatrawas Yojna,https://socialjustice.gov.in/writereaddata/Upl...,Details Background: This scheme for the constr...,Benefits FUNDING PATTERN ﻿ For Girls Hostels: ...,Eligibility Eligibility for the Hostels: 1. Wh...,Exclusions NA,Application Process Offline The hard copy of t...,Documents Required List of Documents to be sub...
1,https://www.myscheme.gov.in/schemes/onorc,"Ministry Of Consumer Affairs, Food And Public ...",One Nation One Ration Card,https://nfsa.gov.in/,Details The ONORC scheme is being implemented ...,Benefits This system allows all NFSA beneficia...,Eligibility One Nation One Ration Card for pro...,,Application Process Offline Interested person ...,Documents Required Ration Card Aadhar Card (if...
2,https://www.myscheme.gov.in/schemes/nos-sc,Ministry Of Social Justice and Empowerment,National Overseas Scholarship For Scheduled Ca...,https://nosmsje.gov.in/docs/nosGuidelines2022-...,Details A scholarship scheme by Ministry of So...,Benefits Annual Maintenance Allowance: For the...,Eligibility The applicant must be a student be...,"Exclusions Bachelors Level courses , in any di...",Application Process Online Step 1: The candida...,Documents Required During the Application Stag...
3,https://www.myscheme.gov.in/schemes/kvsy,Uttar Pradesh,Kanya Vivah Sahayta Yojana,https://upbocw.in/pdf/schemes/kanya_vivah_2022...,Details Details The primary objective of this ...,Benefits Benefits In case of fulfilment of all...,Eligibility Eligibility After the registration...,,Application Process Online Application Form Wi...,Documents Required List of required documents ...
4,https://www.myscheme.gov.in/schemes/uky,Madhya Pradesh,Usha Kiran Yojana,https://cmhelpline.mp.gov.in/Schmedetail.aspx?...,Details Violence against women and their child...,Benefits Providing protection and support agai...,Eligibility All women of all age groups. All c...,,Application Process Offline Applicants have to...,Documents Required Passport-size photograph. A...
...,...,...,...,...,...,...,...,...,...,...
1123,https://www.myscheme.gov.in/schemes/tkgthe,Kerala,Tool Kit Grant for Traditional Handicrafts Exp...,https://bcdd.kerala.gov.in/en/schemes/developm...,Details Introduction: A sizeable population of...,"Benefits 1.\tThe training costs, stipend (if f...",Eligibility 1.\tThe applicant should be a perm...,,Application Process Offline Step 01: The appli...,Documents Required 1.\tCaste Certificate 2.\tI...
1124,https://www.myscheme.gov.in/schemes/mnssy,Chhattisgarh,Mukhyamantri Noni Sashaktikaran Sahayata Yojana,https://cglabour.nic.in/BOCW/BOCADHISUCHNA/196...,"Details In 2022, Chhattisgarh State Government...","Benefits Financial Benefits: ₹ 20,000/- only f...",Eligibility The girl's father or mother or bot...,,Application Process Online Application Process...,Documents Required Labor registration card. Da...
1125,https://www.myscheme.gov.in/schemes/apy,Ministry Of Finance,Atal Pension Yojana,https://npscra.nsdl.co.in/nsdl/scheme-details/...,Details Atal Pension Yojana (APY) is an old ag...,Benefits Upon exit on attaining 60 years The s...,Eligibility Age of joining and contribution pe...,Exclusions Tax Payers will not be eligible to ...,Application Process Online Offline Process 1 ﻿...,Documents Required Aadhaar Card Active Bank/Po...
1126,https://www.myscheme.gov.in/schemes/sisnhe,Kerala,Scheme for Interest Subvention to Nano Househo...,https://industry.kerala.gov.in/index.php/schem...,Details The scheme “Scheme for Interest Subven...,Benefits 1. The unit will be eligible for inte...,Eligibility A unit/enterprise which satisfies ...,Exclusions Manufacturing units which are eligi...,Application Process Online Offline Step 01: Ap...,Documents Required 1.\tCopy of identity proof ...


### Data cleaning for embedding

In [93]:
results = df.copy()
results["keywords"] = keywords_df["keywords"].values


In [94]:
results

Unnamed: 0,url,state,scheme_name,sources,details,benefits,eligibility,exclusions,applicationProcess,documentsRequired,keywords
0,https://www.myscheme.gov.in/schemes/bjrcy,Ministry Of Social Justice and Empowerment,Babu Jagjivan Ram Chhatrawas Yojna,https://socialjustice.gov.in/writereaddata/Upl...,Details Background: This scheme for the constr...,Benefits FUNDING PATTERN ﻿ For Girls Hostels: ...,Eligibility Eligibility for the Hostels: 1. Wh...,Exclusions NA,Application Process Offline The hard copy of t...,Documents Required List of Documents to be sub...,"[Hostel, Student]"
1,https://www.myscheme.gov.in/schemes/onorc,"Ministry Of Consumer Affairs, Food And Public ...",One Nation One Ration Card,https://nfsa.gov.in/,Details The ONORC scheme is being implemented ...,Benefits This system allows all NFSA beneficia...,Eligibility One Nation One Ration Card for pro...,,Application Process Offline Interested person ...,Documents Required Ration Card Aadhar Card (if...,"[Migrant Workers, Ration Card]"
2,https://www.myscheme.gov.in/schemes/nos-sc,Ministry Of Social Justice and Empowerment,National Overseas Scholarship For Scheduled Ca...,https://nosmsje.gov.in/docs/nosGuidelines2022-...,Details A scholarship scheme by Ministry of So...,Benefits Annual Maintenance Allowance: For the...,Eligibility The applicant must be a student be...,"Exclusions Bachelors Level courses , in any di...",Application Process Online Step 1: The candida...,Documents Required During the Application Stag...,"[Degree, International Education, PhD, Post-Gr..."
3,https://www.myscheme.gov.in/schemes/kvsy,Uttar Pradesh,Kanya Vivah Sahayta Yojana,https://upbocw.in/pdf/schemes/kanya_vivah_2022...,Details Details The primary objective of this ...,Benefits Benefits In case of fulfilment of all...,Eligibility Eligibility After the registration...,,Application Process Online Application Form Wi...,Documents Required List of required documents ...,"[Financial Assistance, Girl, Kanya, Marriage, ..."
4,https://www.myscheme.gov.in/schemes/uky,Madhya Pradesh,Usha Kiran Yojana,https://cmhelpline.mp.gov.in/Schmedetail.aspx?...,Details Violence against women and their child...,Benefits Providing protection and support agai...,Eligibility All women of all age groups. All c...,,Application Process Offline Applicants have to...,Documents Required Passport-size photograph. A...,"[Deprivation Of Liberty, Domestic Violence, Me..."
...,...,...,...,...,...,...,...,...,...,...,...
1123,https://www.myscheme.gov.in/schemes/tkgthe,Kerala,Tool Kit Grant for Traditional Handicrafts Exp...,https://bcdd.kerala.gov.in/en/schemes/developm...,Details Introduction: A sizeable population of...,"Benefits 1.\tThe training costs, stipend (if f...",Eligibility 1.\tThe applicant should be a perm...,,Application Process Offline Step 01: The appli...,Documents Required 1.\tCaste Certificate 2.\tI...,"[Craftsman, Grant, Handicrafts, OBC, Self-empl..."
1124,https://www.myscheme.gov.in/schemes/mnssy,Chhattisgarh,Mukhyamantri Noni Sashaktikaran Sahayata Yojana,https://cglabour.nic.in/BOCW/BOCADHISUCHNA/196...,"Details In 2022, Chhattisgarh State Government...","Benefits Financial Benefits: ₹ 20,000/- only f...",Eligibility The girl's father or mother or bot...,,Application Process Online Application Process...,Documents Required Labor registration card. Da...,"[Empowerment, Financial Assistance, Girl Educa..."
1125,https://www.myscheme.gov.in/schemes/apy,Ministry Of Finance,Atal Pension Yojana,https://npscra.nsdl.co.in/nsdl/scheme-details/...,Details Atal Pension Yojana (APY) is an old ag...,Benefits Upon exit on attaining 60 years The s...,Eligibility Age of joining and contribution pe...,Exclusions Tax Payers will not be eligible to ...,Application Process Online Offline Process 1 ﻿...,Documents Required Aadhaar Card Active Bank/Po...,"[Bank Account Holders, Pension, Unorganized Wo..."
1126,https://www.myscheme.gov.in/schemes/sisnhe,Kerala,Scheme for Interest Subvention to Nano Househo...,https://industry.kerala.gov.in/index.php/schem...,Details The scheme “Scheme for Interest Subven...,Benefits 1. The unit will be eligible for inte...,Eligibility A unit/enterprise which satisfies ...,Exclusions Manufacturing units which are eligi...,Application Process Online Offline Step 01: Ap...,Documents Required 1.\tCopy of identity proof ...,"[Employment, Enterprises, Interest Subvention,..."


In [95]:
empty_percentage = (results["details"].isna() | results["details"].eq("")).sum() / len(results) * 100
empty_percentage

0.0

In [96]:
import json

# Convert the DataFrame to a dictionary
data_dict = results.to_dict(orient='records')

# Write the dictionary to a pretty-printed JSON file
with open('results.json', 'w') as f:
    json.dump(data_dict, f, indent=4)
