In [1]:
import pandas as pd
import json

# Load the media bias data and JSON evidence data
media_bias_path = 'media-bias/media-bias.csv'
json_path = 'output/search_results_new_filtered.json'

media_bias_df = pd.read_csv(media_bias_path)
with open(json_path, 'r') as f:
    evidence_data = json.load(f)


def extract_base_domain(link):
    if "://" in link:
        domain = link.split("/")[2]
        return domain.lstrip("www.")  # Remove 'www.' if present
    return None

# Update JSON evidence by refining and matching base domain
for claim_id, claim_details in evidence_data.items():
    for evidence in claim_details["evidence"]:
        refined_domain = extract_base_domain(evidence["link"])
        if refined_domain and any(refined_domain in base for base in media_bias_df['Group']):
            matched_row = media_bias_df[media_bias_df['Group'].str.contains(refined_domain)].iloc[0]
            evidence["MBFC Credibility Rating"] = matched_row["MBFC Credibility Rating"]
            evidence["Bias Rating"] = matched_row["Bias Rating"]
        else:
            evidence["MBFC Credibility Rating"] = None
            evidence["Bias Rating"] = None

# Save the refined JSON file
refined_json_path = 'output/search_results_mbfc_new.json'
with open(refined_json_path, 'w') as f:
    json.dump(evidence_data, f, indent=4)

In [42]:
def average_links_with_ratings(data):
    total_links = 0
    total_ids = 0

    for id_key, id_value in data.items():
        if "evidence" in id_value:
            links_with_ratings = [
                ev for ev in id_value["evidence"] 
                if ev.get("MBFC Credibility Rating") is not None and ev.get("Bias Rating") is not None
            ]
            total_links += len(links_with_ratings)
            total_ids += 1

    return total_links / total_ids if total_ids > 0 else 0

json_path = 'output/search_results_mbfc.json'
with open(json_path, 'r') as f:
    evidence_data = json.load(f)


def average_high_credibility_links(data):
    total_high_credibility_links = 0
    total_ids = 0

    for id_key, id_value in data.items():
        if "evidence" in id_value:
            high_credibility_links = [
                ev for ev in id_value["evidence"] 
                if ev.get("MBFC Credibility Rating") == "HIGH CREDIBILITY"
            ]
            total_high_credibility_links += len(high_credibility_links)
            total_ids += 1

    return total_high_credibility_links / total_ids if total_ids > 0 else 0


# Calculate and print the average
average = average_links_with_ratings(evidence_data)
high_cred_avg = average_high_credibility_links(evidence_data)
print(f"Average number of links with ratings per ID: {average}")
print(f"Average number of links with high ratings per ID: {high_cred_avg}")


Average number of links with ratings per ID: 10.217723453017571
Average number of links with high ratings per ID: 8.115355233002292


### Evidence links filtered by similarity Score

In [11]:
with open('output/search_results_latest.json', 'r') as f:
    data = json.load(f)

# Processing the data
result = []

for claim_id, claim_data in data.items():
    evidence_links = [evidence['link'] for evidence in claim_data['evidence']]
    evidence_links_similar = [
        evidence['link'] for evidence in claim_data['evidence'] if evidence['similarity_score'] >= 0.75
    ]
    result.append({
        "claimid": claim_id,
        "evidence_links_count": len(evidence_links),
        "evidence_links_similar_count": len(evidence_links_similar)
    })

# Output result

with open('output/evidence_links_filtered_by_similarity_score.json', 'w') as f:
    json.dump(result, f, indent=4)



In [14]:
import pandas as pd
import json

# Read the CSV file
file_path = 'output/snopes_results_latest.csv'
data = pd.read_csv(file_path)

# Filter rows with Impact Score > 8
filtered_data = data[data['Impact Score'] >= 8]

# Prepare the JSON structure
result = [
    {
        "claim": row['claim'],
        "impact score": row['Impact Score'],
        "impact justification": row['Impact Justification']
    }
    for _, row in filtered_data.iterrows()
]
print(len(result))
# Write the result to a JSON file
output_file_path = 'output/high_impact_claims.json'
with open(output_file_path, 'w') as json_file:
    json.dump(result, json_file, indent=4)

output_file_path


228


'output/high_impact_claims.json'