# Data collation

In [9]:
import pandas as pd
import numpy as np

df = pd.read_csv("Witchcraft_and_the_Press_Data_CSV.csv")


def number_to_category(num):
    map = {
        1: "Violence",
        2: "Traditional Medicine/Healing",
        3: "Politics/Law",
        4: "Economics",
        5: "Religion",
        6: "Education",
        7: "Society/other",
    }
    return map.get(num, np.nan)


def get_year(date):
    if pd.isnull(date):
        return np.nan
    last_two = int(date.split("/")[-1])
    if last_two >= 60:
        return 1900 + last_two
    else:
        return 2000 + last_two


def get_country(country):
    if country == "TAN":
        return "Tanzania"
    if country == "KEN":
        return "Kenya"
    if country == "UGA":
        return "Uganda"
    return np.nan


df["category"] = df["Category"].map(number_to_category)
df["year"] = df["Date"].map(get_year)
df["summary"] = df["Descriptor"]
df["country"] = df["Country"].map(get_country)

df = df["summary country year category".split()]
df = df.dropna().reset_index(drop=True)

df

Unnamed: 0,summary,country,year,category
0,"A European geologist, age 22, was killed by Go...",Tanzania,1960.0,Violence
1,Police have arrested 56 administrators of Mau ...,Kenya,1960.0,Violence
2,"A man in Paidha, Uganda, ran amok with a mache...",Uganda,1960.0,Violence
3,"A tribal chief in Papati, Uganda, was jailed f...",Uganda,1960.0,Violence
4,"Two women from Rufiji District, Tanzania, conf...",Tanzania,1961.0,Violence
...,...,...,...,...
507,Many soccer experts attribute the Ugandan nati...,Uganda,1999.0,Society/other
508,"A mob in Bamunaanika, Uganda lynched an old ma...",Uganda,2003.0,Society/other
509,A woman lost custody of her daughter because s...,Kenya,2009.0,Society/other
510,Two children were rescued from being starved b...,Uganda,2009.0,Society/other


# Google Gemini Pro for question answering

In [10]:
import google.generativeai as genai


with open("GOOGLE_API_KEY.txt", "r") as f:
    GOOGLE_API_KEY = f.read().strip()

genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel("models/gemini-1.0-pro")

In [11]:
questions = [
    (
        "region",
        "In what region did this event occur?",
    ),
    (
        "injuries",
        "Were there injuries due to witchcraft-related violence?",
    ),
    (
        "death",
        "Was there death due to witchcraft-related violence?",
    ),
    (
        "property_damage",
        "Was there property damage due to witchcraft-related violence?",
    ),
    (
        "property_ownership",
        "If there was property damage, who owned the property? Answer with 'individual', 'community', or 'government'.",
    ),
    (
        "perpetrators",
        "Did anyone commit witchcraft-related violence?",
    ),
    (
        "perpetrators_direction",
        "If anyone committed witchcraft-related violence, was the violence directed toward people accused of witchcraft or was the violence perpetrated by people accused of violence? Answer with 'toward' or 'by'.",
    ),
    (
        "perpetrators_group_size",
        "If anyone committed witchcraft-related violence, what was the size of the group? Answer with 'individual' if the group includes 1 individual, 'small' if the group includes 2-10 individuals, and 'large' if the group includes 10 or more individuals.",
    ),
    (
        "perpetrator_sex",
        "If anyone committed witchcraft-related violence, what was the predominant sex of the group or the individual? Answer with 'male', 'female', or 'mixed'.",
    ),
    (
        "victims",
        "Did anyone fall victim of witchcraft-related violence?",
    ),
    (
        "victims_group_size",
        "If anyone fell victim to witchcraft-related violence, what was the size of the group? Answer with 'individual' if the group includes 1 individual, 'small' if the group includes 2-10 individuals, and 'large' if the group includes 10 or more individuals.",
    ),
    (
        "victims_sex",
        "If anyone fell victim to witchcraft-related violence, what was the predominant sex of the group or the individual? Answer with 'male', 'female', or 'mixed'.",
    ),
    (
        "police_involvement",
        "Did the police or a similar force take action?",
    ),
    (
        "police_success",
        "If the police or a similar force took action, was the action successful?",
    ),
    (
        "judicial_success",
        "If judicial hearing occured, was it successful? This could include finding someone guilty, sentencing, etc.",
    ),
]

In [12]:
df = df.head(2)
df

Unnamed: 0,summary,country,year,category
0,"A European geologist, age 22, was killed by Go...",Tanzania,1960.0,Violence
1,Police have arrested 56 administrators of Mau ...,Kenya,1960.0,Violence


In [13]:
def prompt_gemini(summary, question):
    prompt = f"""
        You are going to read the summary of a news report related to witchcraft. Then, you will read a question about the report. Answer the question given the information in the summary. Your answer must only be one word. If you cannot answer the question or if there is any doubt in your answer, type 'N/A'.
        
        SUMMARY OF NEWS REPORT: {summary}
                
        QUESTION: {question}
        
        YOUR ANSWER: """
    return model.generate_content(prompt).parts[0].text


x = prompt_gemini("sdf", "sdfs")

In [14]:
from collections import defaultdict
from tqdm import tqdm
import time

answers = defaultdict(list)
for _, row in tqdm(df.iterrows(), total=len(df)):
    for column, question in questions:
        answer = prompt_gemini(row["summary"], question)
        answers[column].append(answer)
        time.sleep(1)

answers = pd.DataFrame(answers)
answers.to_csv("answers.csv", index=False)

  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
answers

Unnamed: 0,region,injuries,death,property_damage,property_ownership,perpetrators,perpetrators_direction,perpetrators_group_size,perpetrator_sex,victims,victims_group_size,victims_sex,police_involvement,police_success,judicial_success
0,Dodoma,,Yes,,,Yes,toward,,,Yes,individual,male,,,
1,Kenya,,,,,,,,,,,,Yes,Yes,
2,Uganda,Yes,Yes,,,Yes,toward,individual,male,Yes,individual,female,,,
3,Uganda,,,,,,,,,,,,,Yes,Yes
4,Tanzania,,Yes,,,Yes,by,individual,female,Yes,individual,male,,Yes,Yes


In [None]:
for summary in df["summary"]:
    print(summary)

A European geologist, age 22, was killed by Gogo people in a remote region of Dodoma District, Tanzania; the incident was believed to be related to local witchcraft beliefs when geologist found prospecting in a burial ground.
Police have arrested 56 administrators of Mau Mau type oaths in Kenya. Oathing is partially based on fear of witchcraft if oaths are violated. 
A man in Paidha, Uganda, ran amok with a machete, killing a woman and seriously injuring two others. He was responding to worries that a witch had killed one of his relatives.
A tribal chief in Papati, Uganda, was jailed for 18 months after he was caught practicing medicine without a license and for possessing poisons. 
Two women from Rufiji District, Tanzania, confess to ritual witchcraft murder of 4-year-old boy and sentenced to death by court. Cannibalism reported.
