In [1]:
import os
os.chdir('..')

In [2]:
import pandas as pd
from time import strptime
import numpy as np

In [55]:
df_date = pd.read_excel('data/processed/victim_list_01252023.xlsx', sheet_name = '名單')
victim2date = {
    df_date.iloc[i]['victim']: df_date.iloc[i]['time']
    for i in range(len(df_date))
}

victim2date['Debbie Schlussel']

Timestamp('2017-04-21 00:00:00')

In [56]:
predator2victim = {}
for i in range(len(df_date)):
    predator = df_date.iloc[i]['predator']
    victim = df_date.iloc[i]['victim']
    if predator not in predator2victim:
        predator2victim[predator] = [victim]
    else:
        predator2victim[predator].append(victim)

predator2victim['Sean Hannity']

['Debbie Schlussel']

In [57]:
victim2predator = {
    df_date.iloc[i]['victim']: df_date.iloc[i]['predator'] 
    for i in range(len(df_date))
}   

victim2predator['Debbie Schlussel']

'Sean Hannity'

In [96]:
victim2ancestor = {}
victim2affected = {}
for victim in victim2predator.keys():
    predator = victim2predator[victim]
    possible_ancestors = predator2victim[predator]
    date = victim2date[victim]
    victim2ancestor[victim] = []
    victim2affected[victim] = []
    for i in possible_ancestors:
        if victim2date[i] < date: 
            victim2ancestor[victim].append(i)
        else:
            if i != victim: victim2affected[victim].append(i)

sample = 'Liliana Segura'
ancestors = victim2ancestor[sample]
affecteds = victim2affected[sample]

print(f'sample\'s name: {sample}')
print(f'sample\'s predator: {victim2predator[sample]}')
print(f'sample issue at: {victim2date[sample]}\n')

print(f'ancestor1\'s name: {ancestors[0]}')
print(f'ancestor1\'s predator: {victim2predator[ancestors[0]]}')
print(f'ancestor1 issue at: {victim2date[ancestors[0]]}\n')

print(f'ancestor2\'s name: {ancestors[1]}')
print(f'ancestor2\'s predator: {victim2predator[ancestors[1]]}')
print(f'ancestor2 issue at: {victim2date[ancestors[1]]}\n')

print(f'affected1\'s name: {affecteds[0]}')
print(f'affected1\'s predator: {victim2predator[affecteds[0]]}')
print(f'affected1 issue at: {victim2date[affecteds[0]]}\n')

print(f'affected2\'s name: {affecteds[1]}')
print(f'affected2\'s predator: {victim2predator[affecteds[1]]}')
print(f'affected2 issue at: {victim2date[affecteds[1]]}\n')

sample's name: Liliana Segura
sample's predator: Don Hazen
sample issue at: 2017-12-22 00:00:00

ancestor1's name: Laura Gottesdiener
ancestor1's predator: Don Hazen
ancestor1 issue at: 2017-12-21 00:00:00

ancestor2's name: Mac McClelland
ancestor2's predator: Don Hazen
ancestor2 issue at: 2017-12-21 00:00:00

affected1's name: Kristen Gwynne
affected1's predator: Don Hazen
affected1 issue at: 2017-12-22 00:00:00

affected2's name: Tana Ganeva
affected2's predator: Don Hazen
affected2 issue at: 2017-12-22 00:00:00



In [78]:
df_a = pd.read_csv('data/raw/ait.csv')

interval = [
    pd.to_datetime(f'20{i[3:5]}-{strptime(i[:3],"%b").tm_mon}', format = '%Y-%m') 
    for i in df_a.columns 
    if i not in ['predator', 'predator_id', 'victim', 'victim_id ']
]
interval[0]

Timestamp('2015-01-01 00:00:00')

In [118]:
victim2id = {
    df_a.iloc[i]['victim']: df_a.iloc[i]['victim_id '] - 1
    for i in range(len(df_a))
}

victim2ancestor_id = {
    victim: list(map(lambda x: victim2id[x], victim2ancestor[victim]))
    for victim in victim2ancestor.keys()
}
victim2affected_id = {
    victim: list(map(lambda x: victim2id[x], victim2affected[victim]))
    for victim in victim2ancestor.keys()
}

victim2ancestor_id['Liliana Segura']

[92, 93]

In [142]:
s_adjust = np.zeros((len(victim2date.keys()), len(interval)))

for idx_i, victim in enumerate(victim2date.keys()):
    issue_date = victim2date[victim]
    for idx_j, date in enumerate(interval):
        if date > issue_date: s_adjust[idx_i][idx_j] = 1

s_adjust_ancestor = np.zeros((len(victim2date.keys()), len(interval)))
for idx_i, victim in enumerate(victim2date.keys()):
    issue_date = victim2date[victim]
    for idx_j, date in enumerate(interval):
        if date > issue_date: 
            s_adjust_ancestor[idx_i][idx_j] = 1
        elif date.year == issue_date.year and date.month == issue_date.month:
            s_adjust_ancestor[idx_i][idx_j] = 1

s_adjust_affected = np.copy(s_adjust)
for idx_i, victim in enumerate(victim2date.keys()):
    if len(victim2ancestor_id[victim]) != 0:
        for id in victim2ancestor_id[victim]:
            s_adjust[idx_i] += s_adjust_ancestor[id]
    if len(victim2affected_id[victim]) != 0:
        for id in victim2affected_id[victim]:
            s_adjust[idx_i] += s_adjust_affected[id]

In [143]:
df_a_adjust = pd.DataFrame({
    'predator': df_a['predator'],
    'predaotr_id': df_a['predator_id'],
    'victim': df_a['victim'],
    'victim_id': df_a['victim_id ']
})


for idx, i in enumerate(interval):
    df_a_adjust[i] = [i[idx] for i in s_adjust]

df_a_adjust

Unnamed: 0,predator,predaotr_id,victim,victim_id,2015-01-01 00:00:00,2015-02-01 00:00:00,2015-03-01 00:00:00,2015-04-01 00:00:00,2015-05-01 00:00:00,2015-06-01 00:00:00,...,2017-10-01 00:00:00,2017-11-01 00:00:00,2017-12-01 00:00:00,2018-01-01 00:00:00,2018-02-01 00:00:00,2018-03-01 00:00:00,2018-04-01 00:00:00,2018-05-01 00:00:00,2018-06-01 00:00:00,2018-07-01 00:00:00
0,Sean Hannity,1,Debbie Schlussel,1,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,Eric Bolling,2,Caroline Heldman,2,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,Charles Payne,3,Scottie Nell Hughes,3,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,Harry Knowles,4,Jasmine Baker,4,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
4,Lockhart Steele,5,Eden Rohatensky,5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,Francisco Ayala,84,Michelle Herrera,191,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
191,"Roland G. Fryer, Jr.",85,Naomi R. Shatz,192,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
192,George Tyndall,86,Dana Loewy,193,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0
193,George Tyndall,86,Heil,194,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0


In [3]:
from src.data.utils import adjust_ait
adjust_ait()

Unnamed: 0,predator,predaotr_id,victim,victim_id,2015-01-01 00:00:00,2015-02-01 00:00:00,2015-03-01 00:00:00,2015-04-01 00:00:00,2015-05-01 00:00:00,2015-06-01 00:00:00,...,2017-10-01 00:00:00,2017-11-01 00:00:00,2017-12-01 00:00:00,2018-01-01 00:00:00,2018-02-01 00:00:00,2018-03-01 00:00:00,2018-04-01 00:00:00,2018-05-01 00:00:00,2018-06-01 00:00:00,2018-07-01 00:00:00
0,Sean Hannity,1,Debbie Schlussel,1,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,Eric Bolling,2,Caroline Heldman,2,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,Charles Payne,3,Scottie Nell Hughes,3,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,Harry Knowles,4,Jasmine Baker,4,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
4,Lockhart Steele,5,Eden Rohatensky,5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,Francisco Ayala,84,Michelle Herrera,191,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
191,"Roland G. Fryer, Jr.",85,Naomi R. Shatz,192,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
192,George Tyndall,86,Dana Loewy,193,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0
193,George Tyndall,86,Heil,194,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0


In [5]:
adjust_ait(input_data_path = 'data/processed/adjust_victim_list_01252023.xlsx')