In [3]:
import json

# load the JSON data from the file
with open('url_meta_tags.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# function to get specific meta tag content
def extract_meta_tag(meta_tags, tag_name):
    for tag in meta_tags:
        if tag['name'] == tag_name:
            return tag['content']
    return ''

# list to hold the parsed results
parsed_results = []

# parse the JSON data
for entry in data:
    url = entry['url']
    meta_tags = entry['meta_tags'] if entry['meta_tags'] is not None else []
    
    title = extract_meta_tag(meta_tags, 'twitter:title')
    description = extract_meta_tag(meta_tags, 'description')
    keywords = extract_meta_tag(meta_tags, 'keywords')
    
    parsed_results.append({
        'url': url,
        'title': title,
        'description': description,
        'keywords': keywords
    })

# print results
for result in parsed_results:
    print(f"URL: {result['url']}")
    print(f"Title: {result['title']}")
    print(f"Description: {result['description']}")
    print(f"Keywords: {result['keywords']}")
    print("")

# save the parsed results to JSON
with open('meta_tags_parsed.json', 'w', encoding='utf-8') as json_file:
    json.dump(parsed_results, json_file, indent=4)

print("Parsed data saved to meta_tags_parsed.json")

# save the parsed results to HTML
html_content = "<html><head><title>Parsed Meta Tags</title></head><body><table border='1'>"
html_content += "<tr><th>URL</th><th>Title</th><th>Description</th><th>Keywords</th></tr>"

for result in parsed_results:
    html_content += f"<tr><td>{result['url']}</td><td>{result['title']}</td><td>{result['description']}</td><td>{result['keywords']}</td></tr>"

html_content += "</table></body></html>"

with open('meta_tags_parsed.html', 'w', encoding='utf-8') as html_file:
    html_file.write(html_content)

print("Parsed data saved to meta_tags_parsed.html")


URL: https://www.haberturk.com/images/common/manifest/180x180.png
Title: 
Description: 
Keywords: 

URL: https://www.haberturk.com/images/common/favicon/32x32.png
Title: 
Description: 
Keywords: 

URL: https://www.haberturk.com/images/common/favicon/16x16.png
Title: 
Description: 
Keywords: 

URL: https://www.haberturk.com/images/common/favicon/favicon.ico?v=001
Title: 
Description: 
Keywords: 

URL: https://www.haberturk.com/manifest.webmanifest
Title: 
Description: 
Keywords: 

URL: https://im.haberturk.com
Title: 
Description: 
Keywords: 

URL: https://adsp.haberturk.com
Title: 
Description: 
Keywords: 

URL: https://im.hthayat.com
Title: 
Description: 
Keywords: 

URL: https://geoim.bloomberght.com
Title: 
Description: 
Keywords: 

URL: https://im.showtv.com.tr
Title: 
Description: 
Keywords: 

URL: https://mo.ciner.com.tr
Title: 
Description: 
Keywords: 

URL: https://static.criteo.net
Title: 
Description: 
Keywords: 

URL: https://pagead2.googlesyndication.com
Title: 
Description