In [5]:
import json
import pandas as pd
from tqdm.notebook import tqdm
from functools import partial
from sqlalchemy import MetaData, Table, create_engine
from sqlalchemy.sql import text

from config import db_connection_string
from utils import tx_count_between_accounts, tx_count_between_grants
from iou_calculator import IoUCalculator


In [2]:
engine = create_engine(db_connection_string)

In [3]:
with open('raw_data/contributions.json', 'r') as file:
    contributions = json.load(file)

contributions = [x for x in contributions if x['grant']!='86' and x['success']]

In [4]:
def generate_suspicious_ratio_df(pairs, objects, iou_threshold_list, min_contribution_count_list):
    return pd.DataFrame(
        [
            [
                iou_threshold, min_contribution_count, 
                (
                    str(sum(1 if len(x['contributions']) >= min_contribution_count else 0 for x in objects.values())) + ',' +
                    str(len({pair[i] for pair in pairs for i in range(2) if pair[2] >= iou_threshold and pair[3] >= min_contribution_count})) + ',' +
                    str(len({pair[i] for pair in pairs for i in range(2) if pair[2] >= iou_threshold and pair[3] >= min_contribution_count and pair[4] >= 1}))
                )
            ]
            for iou_threshold in iou_threshold_list
            for min_contribution_count in min_contribution_count_list
        ],
        columns=['iou_threshold', 'min_contribution_count', 'suspicious_account_ratio']
    ).pivot(index='iou_threshold', columns='min_contribution_count', values='suspicious_account_ratio')


## account suspicious ratio(grant)

In [5]:
calculator = IoUCalculator(
    contributions, lambda x: x['subscription']['contributor_profile'], lambda x: x['grant'],
    lambda x, y: tx_count_between_accounts(x, y, engine)
)
pairs = calculator.compute_suspicious_pairs(0.5, 1)
generate_suspicious_ratio_df(pairs, calculator.objects, [round(x * 0.1, 1) for x in range(5, 11)], [1, 3, 5, 10, 15, 20])

min_contribution_count,1,3,5,10,15,20
iou_threshold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.5,20655167542018,1093768801356,867150871197,48242526655,31451590512,23071180329
0.6,20655142901536,1093751191025,86714034904,48241948533,31451175426,2307853260
0.7,20655126041285,109374173826,86713382740,48241506433,3145878363,2307605206
0.8,20655119631085,109373532626,86712928562,48241268355,3145709299,2307472170
0.9,2065511300926,109372869467,86712334411,4824986291,3145538241,2307341128
1.0,2065510957851,109372526392,86711991336,4824770228,3145403185,230723388


## account suspicious ratio(grant+date)

In [6]:
calculator = IoUCalculator(
    contributions, lambda x:x['subscription']['contributor_profile'], lambda x:(x['grant'], x['created_on'][:10]),
    lambda x, y: tx_count_between_accounts(x, y, engine)
)
pairs = calculator.compute_suspicious_pairs(0.5, 1)
generate_suspicious_ratio_df(pairs, calculator.objects, [round(x * 0.1, 1) for x in range(5, 11)], [1, 3, 5, 10, 15, 20])


min_contribution_count,1,3,5,10,15,20
iou_threshold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.5,20655119321037,112104369753,88663566662,50021633372,3351922273,2497659164
0.6,206559496828,112103590605,88663040538,50021305318,3351715238,2497486135
0.7,206558845712,112103168504,88662695450,50021089271,3351582213,2497375114
0.8,206558487624,112102810416,88662388369,5002952234,3351494187,249730795
0.9,206558028527,112102351319,88661971281,5002780193,3351392149,249723265
1.0,206557782494,112102105286,88661725248,5002645170,3351318131,249717455


## grant suspicious ratio(account)

In [6]:
calculator = IoUCalculator(
    contributions, lambda x:x['grant'], lambda x:x['subscription']['contributor_profile'],
    lambda x, y: tx_count_between_grants(x, y, engine)
)
pairs = calculator.compute_suspicious_pairs(0.5, 1)
generate_suspicious_ratio_df(pairs, calculator.objects, [round(x * 0.1, 1) for x in range(5, 11)], [1, 3, 5, 10, 15, 20])


min_contribution_count,1,3,5,10,15,20
iou_threshold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.5,1433551105,118735075,98817544,7285722,616319,541289
0.6,143338448,118720022,9889216,7282613,61684,54164
0.7,143327815,11871337,988505,728125,61622,54122
0.8,143323010,1187852,988250,72820,61600,54100
0.9,14332038,1187580,988120,72800,61600,54100
1.0,14332038,1187580,988120,72800,61600,54100


## grant suspicious ratio(account+date)

In [7]:
calculator = IoUCalculator(
    contributions, lambda x:x['grant'], lambda x:(x['subscription']['contributor_profile'], x['created_on'][:10]),
    lambda x, y: tx_count_between_grants(x, y, engine)
)
pairs = calculator.compute_suspicious_pairs(0.5, 1)
generate_suspicious_ratio_df(pairs, calculator.objects, [round(x * 0.1, 1) for x in range(5, 11)], [1, 3, 5, 10, 15, 20])


min_contribution_count,1,3,5,10,15,20
iou_threshold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.5,143349671,124437565,104720031,7765419,665319,591226
0.6,143334342,124425236,1047957,776165,66550,59100
0.7,143328129,124419523,1047493,776103,66520,59100
0.8,143323310,12441474,1047350,77640,66500,59100
0.9,14331878,12441012,1047150,77600,66500,59100
1.0,14331878,12441012,1047150,77600,66500,59100
