In [1]:
from temporal_networkx import TemporalDiGraph
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import math
from datetime import datetime, timedelta
from copy import deepcopy

In [2]:
temporal_graph = TemporalDiGraph(pd.read_csv('./soc-sign-bitcoinotc.csv', header=None))

# Calculating Fairness and Goodness Scores for Each Rolling Month

The following code generates DiGraph of the trust network for each rolling month, i.e. from the first month of the data set to each month covered by the data set, and then saves the results in separate CSV files.

In [11]:
user_fairness_by_month = {}
user_goodness_by_month = {}
user_average_ratings_by_month = {}
user_bad_ratings_count_by_month = {}
user_good_ratings_count_by_month = {}
sep = '---------------------------------------------------------------------------'
first_month = None

for month in temporal_graph.get_all_months():
    # Loop through all months
    if first_month is None:
        first_month = str(month)


    # Generate a DiGraph from the first months till the current month
    print(f'Calculating fairness and goodness for the period between {str(first_month)} and {str(month)}')
    G = temporal_graph.get_DiGraph(str(first_month), str(month), run_REV2=True)
    nodes = G.nodes(data=True)
    edges = G.edges()
    print(f'This period contains {len(nodes)} nodes and {len(edges)} edges')
    
    # Loop through all nodes
    for node in nodes:
        user_id = node[0]

        # Fairness and goodness scores were calculated by REV2
        # when the graph was generated
        user_fairness = node[1]['fairness']
        user_goodness = node[1]['goodness']
        user_average_ratings = 0.0
        user_bad_ratings_count = 0
        user_good_ratings_count = 0
        
        in_edges = G.in_edges(user_id, data=True)

        # Loop through the in-edges of this node to
        # calculate average ratings and count good/bad ratings
        for edge in in_edges:
            user_average_ratings = user_average_ratings + edge[2]['weight']

            if edge[2]['weight'] > 0.0:
                user_good_ratings_count = user_good_ratings_count + 1
            elif edge[2]['weight'] < 0.0:
                user_bad_ratings_count = user_bad_ratings_count + 1

        if len(in_edges) > 0:
            user_average_ratings = user_average_ratings / len(in_edges)

        # Save the results into dictionaries
        if user_id not in user_fairness_by_month:
            user_fairness_by_month[user_id] = {}

        if user_id not in user_goodness_by_month: 
            user_goodness_by_month[user_id] = {}

        if user_id not in user_average_ratings_by_month:
            user_average_ratings_by_month[user_id] = {}

        if user_id not in user_bad_ratings_count_by_month:
            user_bad_ratings_count_by_month[user_id] = {}

        if user_id not in user_good_ratings_count_by_month:
            user_good_ratings_count_by_month[user_id] = {}

        user_fairness_by_month[user_id][str(month)] = user_fairness
        user_goodness_by_month[user_id][str(month)] = user_goodness
        user_average_ratings_by_month[user_id][str(month)] = user_average_ratings
        user_bad_ratings_count_by_month[user_id][str(month)] = user_bad_ratings_count
        user_good_ratings_count_by_month[user_id][str(month)] = user_good_ratings_count

    print(sep)
    #bad_users_by_month[str(month)] = [n for n in G.nodes(data=True) if n[1]['goodness'] < 0]


Calculating fairness and goodness for the period between 2010-11 and 2010-11
This period contains 26 nodes and 60 edges
---------------------------------------------------------------------------
Calculating fairness and goodness for the period between 2010-11 and 2010-12
This period contains 55 nodes and 142 edges
---------------------------------------------------------------------------
Calculating fairness and goodness for the period between 2010-11 and 2011-01
This period contains 84 nodes and 241 edges
---------------------------------------------------------------------------
Calculating fairness and goodness for the period between 2010-11 and 2011-02
This period contains 137 nodes and 486 edges
---------------------------------------------------------------------------
Calculating fairness and goodness for the period between 2010-11 and 2011-03
This period contains 186 nodes and 680 edges
---------------------------------------------------------------------------
Calculating fa

In [23]:
fairness_df = pd.DataFrame.from_dict(user_fairness_by_month, orient='index')
goodness_df = pd.DataFrame.from_dict(user_goodness_by_month, orient='index')
average_ratings_df = pd.DataFrame.from_dict(user_average_ratings_by_month, orient='index')
bad_ratings_df = pd.DataFrame.from_dict(user_bad_ratings_count_by_month, orient='index')
good_ratings_df = pd.DataFrame.from_dict(user_good_ratings_count_by_month, orient='index')

In [25]:
fairness_df.to_csv('./user_fairness.csv')
goodness_df.to_csv('./user_goodness.csv')
average_ratings_df.to_csv('./user_average_ratings.csv')
bad_ratings_df.to_csv('./user_bad_ratings.csv')
good_ratings_df.to_csv('./user_good_ratings.csv')

In [3]:
fairness_df = pd.read_csv('./user_fairness.csv', index_col=0)
goodness_df = pd.read_csv('./user_goodness.csv', index_col=0)
average_ratings_df = pd.read_csv('./user_average_ratings.csv', index_col=0)
bad_ratings_df = pd.read_csv('./user_bad_ratings.csv', index_col=0)
good_ratings_df = pd.read_csv('./user_good_ratings.csv', index_col=0)

# Inspecting 'Bad' Users

After examining the scores, 2011-05 is the first month in the dataset with more than 10 'bad' users in terms of the REV2 goodness score.

The variable ```month``` can be changed to inspect other months covered by the data set.

In [4]:
month = '2011-05'
month_df = goodness_df[[month]].dropna()
month_df = month_df.rename(columns={month: 'goodness'})

print(f"""
In the month {month}, there are {len(month_df)} users:
Number of users with goodness score > 0: {len(month_df[month_df['goodness'] > 0])}
Number of users with goodness score = 0: {len(month_df[month_df['goodness'] == 0])}
Number of users with goodness score < 0: {len(month_df[month_df['goodness'] < 0])}
""")



In the month 2011-05, there are 721 users:
Number of users with goodness score > 0: 680
Number of users with goodness score = 0: 30
Number of users with goodness score < 0: 11



The following blocks inspect bad users (with a goodness score lower than 0.0) in the particular month, along with their average ratings in the month

In [5]:
goodness_threshold = 0

In [6]:
print(f"""
{len(month_df[month_df['goodness'] < goodness_threshold])} users with goodness score < {goodness_threshold} in the month {month}, and their average ratings:

{month_df[month_df['goodness'] < goodness_threshold].join(average_ratings_df[[month]]).rename(columns={month:'average_ratings'})}
""")


11 users with goodness score < 0 in the month 2011-05, and their average ratings:

     goodness  average_ratings
179 -0.233185        -0.428571
310 -1.000000        -5.142857
315 -1.000000        -4.500000
410 -1.000000        -6.000000
467 -1.000000        -3.750000
463 -0.239263        -1.000000
472 -1.000000        -3.733333
512 -1.000000        -1.750000
594 -1.000000        -2.250000
672 -1.000000        -4.666667
766 -1.000000       -10.000000



In [7]:
month_dt = datetime.strptime(month, '%Y-%m')
next_month_dt = (month_dt + timedelta(days=31)).replace(day=1)
next_month = next_month_dt.strftime('%Y-%m')

In [8]:
user_to_be_banned = 766
print(f"""
In the month {month}, user {user_to_be_banned} had a goodness score of {goodness_df.loc[user_to_be_banned][month]} and an average rating of {average_ratings_df.loc[user_to_be_banned][month]}.

In the next month, {next_month}, the user had a goodness score of {goodness_df.loc[user_to_be_banned][next_month]} and an average rating of {average_ratings_df.loc[user_to_be_banned][next_month]}.
""")


In the month 2011-05, user 766 had a goodness score of -1.0 and an average rating of -10.0.

In the next month, 2011-06, the user had a goodness score of -1.0 and an average rating of -10.0.



# Simulation of Banning Bad Users

This method simulates the effect of banning a specific user, by retrieving the in-edges that will be removed in a counter-factual future after the user was banned.

The method will return a dictionary of the counts of in-edges of different ratings if the user would have received ratings, or a boolean value of False if in reality the user did not receive further ratings, meaning banning the user would not have changed the future.

This method would also be used in a later section where banning bad users on a monthly basis is simulated.

In [9]:
def ban_user(tG: TemporalDiGraph, user_to_be_banned: int, month: str, verbose: bool):
    num_in_edges_dropped = 0
    num_out_edges_dropped = 0
    in_edges_dropped = {}
    out_edges_dropped = {}

    for m in tG.get_all_months():
        if str(m) <= month:
            continue

        month_df = tG.get_df(str(m))
        user_in_edges = month_df[month_df['target'] == user_to_be_banned]
        user_out_edges = month_df[month_df['source'] == user_to_be_banned]

        num_in_edges_dropped = num_in_edges_dropped + len(user_in_edges)
        num_out_edges_dropped  = num_out_edges_dropped + len(user_out_edges)

        if num_in_edges_dropped + num_out_edges_dropped > 0:

            if verbose:

                if len(user_in_edges) > 0:
                    print(f"The following in-edges will be dropped in the month {m}:")
                    print(user_in_edges)
                    print('\n')

                if len(user_out_edges) > 0:
                    print(f"The following out-edges will be dropped in the month {m}:")
                    print(user_out_edges)
                    print('\n')

            for rating in user_in_edges['rating'].to_list():
                if rating not in in_edges_dropped:
                    in_edges_dropped[rating] = 1
                else:
                    in_edges_dropped[rating] = in_edges_dropped[rating] + 1

            for rating in user_out_edges['rating'].to_list():
                if rating not in out_edges_dropped:
                    out_edges_dropped[rating] = 1
                else:
                    out_edges_dropped[rating] = out_edges_dropped[rating] + 1

    if num_in_edges_dropped + num_out_edges_dropped == 0:
        # The future was not changed
        return False, False
    else:
        # The future was changed
        if verbose:
            if num_in_edges_dropped > 0:
                print('After banning this user, the following in-edges to this user would be prevented:')
                for k in sorted(in_edges_dropped):
                    print(f'{k}: {in_edges_dropped[k]} edges')

            if num_out_edges_dropped > 0:
                print('After banning this user, the following out-edges from this user would be prevented:')
                for k in sorted(out_edges_dropped):
                    print(f'{k}: {out_edges_dropped[k]} edges')
    
    return in_edges_dropped, out_edges_dropped

The following sections simulates how banning "bad users" in a given month would change the future in a counter-factual world. 

In [86]:
month = '2011-06'
month_df = goodness_df[[month]].dropna()
month_df = month_df.rename(columns={month: 'goodness'})

bad_users = month_df[month_df['goodness'] < goodness_threshold].index.to_list()

In [87]:
sep = '---------------------------------------------------------------------------'
all_in_edges_dropped = {}
all_out_edges_dropped = {}
user_no_future_ratings = []
for bad_user in bad_users:
    print(f'''
Simulating banning the user {bad_user}
The user had a goodness score of {goodness_df.loc[bad_user][month]} in {month}
    ''')
    in_edges_dropped, out_edges_dropped = ban_user(temporal_graph, bad_user, month, verbose=True)
    
    if in_edges_dropped or out_edges_dropped:

        for k, v in in_edges_dropped.items():
            if k not in all_in_edges_dropped:
                all_in_edges_dropped[k] = v
            else:
                all_in_edges_dropped[k] = all_in_edges_dropped[k] + v

        for k, v in out_edges_dropped.items():
            if k not in all_out_edges_dropped:
                all_out_edges_dropped[k] = v
            else:
                all_out_edges_dropped[k] = all_out_edges_dropped[k] + v
    else:
        user_no_future_ratings.append(bad_user)
    print(sep)

print(f'''
The following {len(user_no_future_ratings)} users did not receive ratings after the month {month}
{', '.join([str(x) for x in user_no_future_ratings])}
''')


Simulating banning the user 179
The user had a goodness score of -0.2072038132260703 in 2011-06
    
---------------------------------------------------------------------------

Simulating banning the user 310
The user had a goodness score of -1.0 in 2011-06
    
---------------------------------------------------------------------------

Simulating banning the user 315
The user had a goodness score of -1.0 in 2011-06
    
---------------------------------------------------------------------------

Simulating banning the user 410
The user had a goodness score of -1.0 in 2011-06
    
---------------------------------------------------------------------------

Simulating banning the user 467
The user had a goodness score of -1.0 in 2011-06
    
---------------------------------------------------------------------------

Simulating banning the user 463
The user had a goodness score of -0.2559907854115311 in 2011-06
    
--------------------------------------------------------------------

After banning all bad users in the given month, the counter-factual future would have been changed as the following.

In [88]:
print(f"""
After banning users with goodness score < {goodness_threshold} identified in the month {month}, the following in-edges would be prevented:
""")
for k in sorted(all_in_edges_dropped):
    print(f'{k}: {all_in_edges_dropped[k]} edges')

print('\nAnd the following out-edges would be prevented:')
for k in sorted(all_out_edges_dropped):
    print(f'{k}: {all_out_edges_dropped[k]} edges')


After banning users with goodness score < 0 identified in the month 2011-06, the following in-edges would be prevented:

-10: 6 edges
-1: 1 edges
1: 3 edges
2: 1 edges
4: 1 edges
10: 1 edges

And the following out-edges would be prevented:
-10: 2 edges
1: 3 edges
2: 2 edges
10: 1 edges


# Simulating Banning Bad Users At the End of Each Month

## Banning Based on Goodness Scores

This section simulates how the future would have been changed if "bad users" are banned on a monthly basis according to their goodness scores.

The effect of banning bad users are retained in each iteration. If a user is banned after a particular month, any in-edges to that user would be removed when goodness scores are calculated in subsequent iterations. Also, their out-edges would also be dropped to simulate a complete ban from the platform.

In [82]:
goodness_threshold = 0.0
users_banned = [-1, -2]
first_month = None
users_band_by_months = {}
all_in_edges_dropped = {}
all_out_edges_dropped = {}
user_no_future_ratings = {}
sep = '---------------------------------------------------------------------------'

for month in temporal_graph.get_all_months():
    month = str(month)
    month_dt = datetime.strptime(month, '%Y-%m')
    next_month_dt = (month_dt + timedelta(days=31)).replace(day=1)
    next_month = next_month_dt.strftime('%Y-%m')

    if month not in all_in_edges_dropped:
        all_in_edges_dropped[month] = {}
        for i in range(-10, 11):
            all_in_edges_dropped[month][i] = 0
    if month not in all_out_edges_dropped:
        all_out_edges_dropped[month] = {} 
        for i in range(-10, 11):
            all_out_edges_dropped[month][i] = 0
    if month not in user_no_future_ratings:
        user_no_future_ratings[month] = []
    if month not in users_band_by_months:
        users_band_by_months[month] = deepcopy(users_banned)

    if first_month is None:
        first_month = month

    print(f'Simulating for the month {month}')

    # Create a new directed graph for simulation in each iteration
    simulation_graph = deepcopy(temporal_graph)#, first_month, str(m)) 

    # Remove users that were already banned from simulation_graph
    # if len(users_banned) > 0:
    #     simulation_graph.df = simulation_graph.df.drop(
    #         simulation_graph.df.query(f"target in ({','.join([str(x) for x in users_banned])})").index
    #     )
    for k in users_band_by_months:
        if k < month:
            # Only drop the in-edges from the current month
            simulation_graph.df = simulation_graph.df.drop(
                simulation_graph.df[simulation_graph.df.index > k]
                .query(f"target in ({','.join([str(x) for x in users_band_by_months[k]])})").index
            )
            simulation_graph.df = simulation_graph.df.drop(
                simulation_graph.df[simulation_graph.df.index > k]
                .query(f"source in ({','.join([str(x) for x in users_band_by_months[k]])})").index
            )
    
    G = simulation_graph.get_DiGraph(first_month, month, run_REV2=True)

    # Find out what users to ban in each month
    bad_users = [ n[0] for n in G.nodes(data=True) if n[1]['goodness'] < goodness_threshold ] 
    users_band_by_months[month] = users_band_by_months[month] + bad_users
    #users_banned = users_banned + bad_users

    # Check number of ratings that will be prevented in subsequent months
    for bad_user in bad_users:
        in_edges_dropped, out_edges_dropped = ban_user(temporal_graph, bad_user, month, verbose=False)

        if in_edges_dropped or out_edges_dropped:
            if len(in_edges_dropped) > 0:
                for k, v in in_edges_dropped.items():
                    if k not in all_in_edges_dropped[month]:
                        all_in_edges_dropped[month][k] = v
                    else:
                        all_in_edges_dropped[month][k] = all_in_edges_dropped[month][k] + v
            if len(out_edges_dropped) > 0:
                for k, v in out_edges_dropped.items():
                    if k not in all_out_edges_dropped[month]:
                        all_out_edges_dropped[month][k] = v
                    else:
                        all_out_edges_dropped[month][k] = all_out_edges_dropped[month][k] + v
        else:
            if month in user_no_future_ratings:
                user_no_future_ratings[month].append(bad_user)
            else:
                user_no_future_ratings[month] = [bad_user]

    print('Effects of banning users in this month:')
    print(f"{len(bad_users)} bad users were banned")

    future_changed = False

    if len(user_no_future_ratings[month]) > 0:
        print(f'{len(user_no_future_ratings[month])} banned users did not have any future transactions')

    if len(all_in_edges_dropped[month]) > 0:
        future_changed = True
        print('The following future in-ratings would have been prevented:')
        for k in sorted(all_in_edges_dropped[month]):
            print(f'{k}: {all_in_edges_dropped[month][k]} edges')

    if len(all_out_edges_dropped[month]) > 0:
        future_changed = True
        print('\nThe following future out-ratings would have been prevented:')
        for k in sorted(all_out_edges_dropped[month]):
            print(f'{k}: {all_out_edges_dropped[month][k]} edges')
            
    if not future_changed:
        print('Simulation of this month would not have affected the future')

    print(sep)


Simulating for the month 2010-11
Effects of banning users in this month:
0 bad users were banned
The following future in-ratings would have been prevented:
-10: 0 edges
-9: 0 edges
-8: 0 edges
-7: 0 edges
-6: 0 edges
-5: 0 edges
-4: 0 edges
-3: 0 edges
-2: 0 edges
-1: 0 edges
0: 0 edges
1: 0 edges
2: 0 edges
3: 0 edges
4: 0 edges
5: 0 edges
6: 0 edges
7: 0 edges
8: 0 edges
9: 0 edges
10: 0 edges

The following future out-ratings would have been prevented:
-10: 0 edges
-9: 0 edges
-8: 0 edges
-7: 0 edges
-6: 0 edges
-5: 0 edges
-4: 0 edges
-3: 0 edges
-2: 0 edges
-1: 0 edges
0: 0 edges
1: 0 edges
2: 0 edges
3: 0 edges
4: 0 edges
5: 0 edges
6: 0 edges
7: 0 edges
8: 0 edges
9: 0 edges
10: 0 edges
---------------------------------------------------------------------------
Simulating for the month 2010-12
Effects of banning users in this month:
0 bad users were banned
The following future in-ratings would have been prevented:
-10: 0 edges
-9: 0 edges
-8: 0 edges
-7: 0 edges
-6: 0 edges
-5: 

In [85]:
all_in_edges_dropped_rev2 = pd.DataFrame.from_dict(all_in_edges_dropped, orient='index')
all_out_edges_dropped_rev2 = pd.DataFrame.from_dict(all_out_edges_dropped, orient='index')

In [86]:
all_in_edges_dropped_rev2.to_csv('all_in_edges_dropped_rev2.csv')
all_out_edges_dropped_rev2.to_csv('all_out_edges_dropped_rev2.csv')

Summarising the effects of banning users based on goodness scores

In [58]:
all_in_edges_dropped_summary = {}
all_out_edges_dropped_summary = {}

for k in all_in_edges_dropped:
    if k not in all_in_edges_dropped_summary:
        all_in_edges_dropped_summary[k] = {
            'bad': 0,
            'good': 0
        }
    
    for score, count in all_in_edges_dropped[k].items():
        if score < 0:
            all_in_edges_dropped_summary[k]['bad'] = all_in_edges_dropped_summary[k]['bad'] + count
        else:
            all_in_edges_dropped_summary[k]['good'] = all_in_edges_dropped_summary[k]['good'] + count

for k in all_out_edges_dropped:
    if k not in all_out_edges_dropped_summary:
        all_out_edges_dropped_summary[k] = {
            'bad': 0,
            'good': 0
        }
    
    for score, count in all_out_edges_dropped[k].items():
        if score < 0:
            all_out_edges_dropped_summary[k]['bad'] = all_out_edges_dropped_summary[k]['bad'] + count
        else:
            all_out_edges_dropped_summary[k]['good'] = all_out_edges_dropped_summary[k]['good'] + count

In [67]:
ban_summary_rev2 = pd.DataFrame.from_dict(all_in_edges_dropped_summary, orient='index')

## Banning Based on Average Rating

In [87]:
average_rating_threshold = -1
users_banned = [-1, -2]
first_month = None
users_banned_by_months = {}
all_in_edges_dropped = {}
all_out_edges_dropped = {}
user_no_future_ratings = {}
sep = '---------------------------------------------------------------------------'

for month in temporal_graph.get_all_months():
    month = str(month)
    month_dt = datetime.strptime(month, '%Y-%m')
    next_month_dt = (month_dt + timedelta(days=31)).replace(day=1)
    next_month = next_month_dt.strftime('%Y-%m')

    if month not in all_in_edges_dropped:
        all_in_edges_dropped[month] = {}
        for i in range(-10, 11):
            all_in_edges_dropped[month][i] = 0
    if month not in all_out_edges_dropped:
        all_out_edges_dropped[month] = {} 
        for i in range(-10, 11):
            all_out_edges_dropped[month][i] = 0
    if month not in user_no_future_ratings:
        user_no_future_ratings[month] = []
    if month not in users_band_by_months:
        users_banned_by_months[month] = deepcopy(users_banned)

    if first_month is None:
        first_month = month

    print(f'Simulating for the month {month}')

    # Create a new directed graph for simulation in each iteration
    simulation_graph = deepcopy(temporal_graph)#, first_month, str(m)) 

    # Remove users that were already banned from simulation_graph
    for k in users_banned_by_months:
        if k < month:
            # Only drop the in-edges from the current month
            simulation_graph.df = simulation_graph.df.drop(
                simulation_graph.df[simulation_graph.df.index > k]
                .query(f"target in ({','.join([str(x) for x in users_banned_by_months[k]])})").index
            )
            simulation_graph.df = simulation_graph.df.drop(
                simulation_graph.df[simulation_graph.df.index > k]
                .query(f"source in ({','.join([str(x) for x in users_banned_by_months[k]])})").index
            )
    
    # In this scenario, we don't need to use goodness score, hence
    # not running REV2 algorithm
    G = simulation_graph.get_DiGraph(first_month, month, run_REV2=False)

    # Find out what users to ban in each month
    bad_users = []
    for node in G.nodes():
        # Getting all in-edges of this node to calculate the average ratings
        in_edges = G.in_edges(node, data=True)
        user_rating_count = 0
        user_total_ratings = 0
        user_average_rating = None
        
        for in_edge in in_edges:
            user_total_ratings = user_total_ratings + in_edge[2]['weight']
            user_rating_count = user_rating_count + 1
        
        if user_rating_count > 0:
            user_average_rating = user_total_ratings / user_rating_count
        
        if user_average_rating is not None and user_average_rating < average_rating_threshold:
            bad_users.append(node)

    #bad_users = [ n[0] for n in G.nodes(data=True) if n[1]['goodness'] < goodness_threshold ] 
    users_band_by_months[month] = users_band_by_months[month] + bad_users
    #users_banned = users_banned + bad_users

    # Check number of ratings that will be prevented in subsequent months
    for bad_user in bad_users:
        in_edges_dropped, out_edges_dropped = ban_user(temporal_graph, bad_user, month, verbose=False)

        if in_edges_dropped or out_edges_dropped:
            if len(in_edges_dropped) > 0:
                for k, v in in_edges_dropped.items():
                    if k not in all_in_edges_dropped[month]:
                        all_in_edges_dropped[month][k] = v
                    else:
                        all_in_edges_dropped[month][k] = all_in_edges_dropped[month][k] + v
            if len(out_edges_dropped) > 0:
                for k, v in out_edges_dropped.items():
                    if k not in all_out_edges_dropped[month]:
                        all_out_edges_dropped[month][k] = v
                    else:
                        all_out_edges_dropped[month][k] = all_out_edges_dropped[month][k] + v
        else:
            if month in user_no_future_ratings:
                user_no_future_ratings[month].append(bad_user)
            else:
                user_no_future_ratings[month] = [bad_user]

    print('Effects of banning users in this month:')
    print(f"{len(bad_users)} bad users were banned")

    future_changed = False

    if len(user_no_future_ratings[month]) > 0:
        print(f'{len(user_no_future_ratings[month])} banned users did not have any future transactions')

    if len(all_in_edges_dropped[month]) > 0:
        future_changed = True
        print('The following future in-ratings would have been prevented:')
        for k in sorted(all_in_edges_dropped[month]):
            print(f'{k}: {all_in_edges_dropped[month][k]} edges')

    if len(all_out_edges_dropped[month]) > 0:
        future_changed = True
        print('\nThe following future out-ratings would have been prevented:')
        for k in sorted(all_out_edges_dropped[month]):
            print(f'{k}: {all_out_edges_dropped[month][k]} edges')
            
    if not future_changed:
        print('Simulation of this month would not have affected the future')

    print(sep)


Simulating for the month 2010-11
Effects of banning users in this month:
0 bad users were banned
The following future in-ratings would have been prevented:
-10: 0 edges
-9: 0 edges
-8: 0 edges
-7: 0 edges
-6: 0 edges
-5: 0 edges
-4: 0 edges
-3: 0 edges
-2: 0 edges
-1: 0 edges
0: 0 edges
1: 0 edges
2: 0 edges
3: 0 edges
4: 0 edges
5: 0 edges
6: 0 edges
7: 0 edges
8: 0 edges
9: 0 edges
10: 0 edges

The following future out-ratings would have been prevented:
-10: 0 edges
-9: 0 edges
-8: 0 edges
-7: 0 edges
-6: 0 edges
-5: 0 edges
-4: 0 edges
-3: 0 edges
-2: 0 edges
-1: 0 edges
0: 0 edges
1: 0 edges
2: 0 edges
3: 0 edges
4: 0 edges
5: 0 edges
6: 0 edges
7: 0 edges
8: 0 edges
9: 0 edges
10: 0 edges
---------------------------------------------------------------------------
Simulating for the month 2010-12
Effects of banning users in this month:
0 bad users were banned
The following future in-ratings would have been prevented:
-10: 0 edges
-9: 0 edges
-8: 0 edges
-7: 0 edges
-6: 0 edges
-5: 

In [88]:
all_in_edges_dropped_average_ratings = pd.DataFrame.from_dict(all_in_edges_dropped, orient='index')
all_out_edges_dropped_average_ratings = pd.DataFrame.from_dict(all_out_edges_dropped, orient='index')

In [89]:
all_in_edges_dropped_average_ratings.to_csv('all_in_edges_dropped_average_ratings.csv')
all_out_edges_dropped_average_ratings.to_csv('all_out_edges_dropped_average_ratings.csv')

In [71]:
all_in_edges_dropped_summary = {}
all_out_edges_dropped_summary = {}

for k in all_in_edges_dropped:
    if k not in all_in_edges_dropped_summary:
        all_in_edges_dropped_summary[k] = {
            'bad': 0,
            'good': 0
        }
    
    for score, count in all_in_edges_dropped[k].items():
        if score < 0:
            all_in_edges_dropped_summary[k]['bad'] = all_in_edges_dropped_summary[k]['bad'] + count
        else:
            all_in_edges_dropped_summary[k]['good'] = all_in_edges_dropped_summary[k]['good'] + count

for k in all_out_edges_dropped:
    if k not in all_out_edges_dropped_summary:
        all_out_edges_dropped_summary[k] = {
            'bad': 0,
            'good': 0
        }
    
    for score, count in all_out_edges_dropped[k].items():
        if score < 0:
            all_out_edges_dropped_summary[k]['bad'] = all_out_edges_dropped_summary[k]['bad'] + count
        else:
            all_out_edges_dropped_summary[k]['good'] = all_out_edges_dropped_summary[k]['good'] + count

In [72]:
ban_summary_avgrating = pd.DataFrame.from_dict(all_in_edges_dropped_summary, orient='index')

In [78]:
total_ratings_stopped = sum(ban_summary_rev2.sum())
bad_ratings_stopped = ban_summary_rev2.sum()['bad']
good_ratings_stopped = ban_summary_rev2.sum()['good']
print(f"""
Using REV2 goodness score to ban users, it would have prevented {bad_ratings_stopped} bad ratings ({round(bad_ratings_stopped / total_ratings_stopped, 2)*100}%) and {good_ratings_stopped} good ratings ({round(good_ratings_stopped / total_ratings_stopped, 2)*100}%).
""")


Using REV2 goodness score to ban users, it would have prevented 501 bad ratings (77.0%) and 148 good ratings (23.0%).



In [79]:
total_ratings_stopped = sum(ban_summary_avgrating.sum())
bad_ratings_stopped = ban_summary_avgrating.sum()['bad']
good_ratings_stopped = ban_summary_avgrating.sum()['good']
print(f"""
Using average ratings to ban users, it would have prevented {bad_ratings_stopped} bad ratings ({round(bad_ratings_stopped / total_ratings_stopped, 2)*100}%) and {good_ratings_stopped} good ratings ({round(good_ratings_stopped / total_ratings_stopped, 2)*100}%).
""")


Using average ratings to ban users, it would have prevented 2147 bad ratings (57.99999999999999%) and 1568 good ratings (42.0%).



In [80]:
ban_summary_rev2

Unnamed: 0,bad,good
2010-11,0,0
2010-12,0,0
2011-01,0,0
2011-02,0,0
2011-03,0,0
...,...,...
2015-09,0,0
2015-10,0,0
2015-11,0,0
2015-12,0,0
