In [49]:
import json

with open('../checkpoint/filtered_results.json', 'r') as file:
    results = json.load(file)

In [52]:
def check_if_int(number):
    try:
        number = int(number)
        return True
    except ValueError:
        return False

def check_not_watch_time(number):
    str_num = str(number)
    if len(str_num) < 3:
        return True
    else:
        return False

def check_order_valid(query_order):
    if 'before' in query_order:
        return 'before'
    elif 'after' in query_order:
        return 'after'
    else:
        return False

def check_unit_valid(query_unit):
    if 'hour' in query_unit:
        return 'hour'
    elif 'minute' in query_unit:
        return 'minute'
    elif 'day' in query_unit:
        return 'day'
    elif 'month' in query_unit:
        return 'month'
    elif 'week' in query_unit:
        return 'week'
    elif 'year' in query_unit:
        return 'year'
    else:
        return False

def transform_to_hr(query_unit, query_num, query_order):
    if int(query_num) < 1:
        query_num = 1

    # Determine the multiplier based on 'before' or other order
    multiplier = -1 if 'before' in query_order else 1

    if 'hour' in query_unit:
        return round(multiplier * float(query_num), 1)
    elif 'minute' in query_unit:
        return round(multiplier * (float(query_num) / 60), 1)
    elif 'day' in query_unit:
        return round(multiplier * (float(query_num) * 24), 1)
    elif 'month' in query_unit:
        return round(multiplier * (float(query_num) * 24 * 30), 1)
    elif 'week' in query_unit:
        return round(multiplier * (float(query_num) * 24 * 7), 1)
    elif 'year' in query_unit:
        return round(multiplier * (float(query_num) * 24 * 365), 1)
    
def check_category_and_transform(query_category):

    if 'death of victim' in query_category:
        return 'Death of victim'
    elif 'school' in query_category:
        return 'School problem'
    elif 'job' in query_category:
        return 'Job problem'
    elif 'mental' in query_category or 'depressed' in query_category:
        return 'Depressed mood or mental health'
    elif 'other' in query_category:
        return 'Other'
    elif 'suicide attempt' in query_category:
        return 'History of suicide attempt' 
    elif 'love message' in query_category:
        return 'Sent love messages'
    elif 'relationship problem with partner' in query_category:
        return 'Relationship problem with partner'
    elif 'financial problem' in query_category:
        return 'Financial problem'
    elif 'drug' in query_category:
        return 'Drug'
    elif 'alcohol' in query_category:
        return 'Alcohol'
    elif 'weapon' in query_category:
        return 'Weapon'
    elif 'thought of suicide' in query_category:
        return 'Thought of suicide'
    elif 'argument with family' in query_category:
        return 'Argument with family'
    elif 'death of friend or family' in query_category:
        return 'Death of friend or family'
    else:
        return False

In [53]:
final_results = {}

for uid, values in results.items():
    tmp_useful_sentences = []
    tmp_timing_words = []
    tmp_category_ls = []
    tmp_exact_timing = []

    exact_timings = values['exact_timing']
    useful_sentences = values['useful_sentence']
    timing_words = values['original_timing_word']
    category_ls = values['category']
    for idx, exact_timing in enumerate(exact_timings):
        query_num = exact_timing['number']
        if check_if_int(query_num):
            if check_not_watch_time(query_num):
                query_unit = str(exact_timing['unit']).lower()
                query_order = str(exact_timing['before_or_after']).lower()
                query_category = str(category_ls[idx]).lower()

                if check_unit_valid(query_unit):
                    if check_order_valid(query_order):
                        if check_category_and_transform(query_category):
                            query_unit = check_unit_valid(query_unit)
                            query_order = exact_timing['before_or_after'] = check_order_valid(query_order)
                            exact_timing['number'] = transform_to_hr(query_unit, query_num, query_order)
                            exact_timing['unit'] = 'hour'
                            category_ls[idx] = check_category_and_transform(query_category)
                            
                            tmp_useful_sentences.append(useful_sentences[idx])
                            tmp_timing_words.append(timing_words[idx])
                            tmp_category_ls.append(category_ls[idx])
                            tmp_exact_timing.append(exact_timing)
    if len(tmp_useful_sentences) != 0:
        final_results[uid] = {
            "useful_sentence": tmp_useful_sentences,
            "original_timing_word": tmp_timing_words,
            "category": tmp_category_ls,
            "exact_timing": tmp_exact_timing
        }

In [None]:
output_file = "../output/final_results.json"
with open(output_file, "w") as json_file:
    json.dump(final_results, json_file, indent=4)

print(f"Data successfully saved to {output_file}")

In [56]:
import pandas as pd

rows = []
for uid, details in final_results.items():
    for category, timing in zip(details['category'], details['exact_timing']):
        rows.append({
            'uid': uid,
            'category': category,
            'exact_timing': timing['number']
        })

# Create DataFrame
df = pd.DataFrame(rows)
df = df.sort_values(by=['uid', 'exact_timing']).reset_index(drop=True)

In [58]:
df.to_csv('../output/final_output.csv', index=False)