In [None]:
parameters_path = ""

In [None]:
from rad import importer
import rad.praise.analysis as pr_analysis

import pandas as pd
import json
from IPython.display import Markdown as md


with open(parameters_path, "r") as read_file:
        params = json.load(read_file)
CATEG_KEYWORDS = params["report_settings"]["cross_period_settings"]["category_keywords"]
NUMBER_OF_PERIODS =  params["report_settings"]["cross_period_settings"]["number_of_periods"]
STEP_SIZE = params["report_settings"]["cross_period_settings"]["step_size"]


(rwdObj, rwdDist) = importer.load_multiple_periods(parameters_path)


allrounds_distribution_data = pr_analysis.cross_period_helpers.combine_distribution_table(rwdDist["praise"])


allrounds_df = pr_analysis.cross_period_helpers.combine_quantification_data(rwdDist["praise"])


# Cross-Period Analysis Report
This report aims to offer a perspective on the activity inside the praise system over several rounds.

In [None]:
md(f"This report will cover <b>{NUMBER_OF_PERIODS}</b> weeks, divided into blocks of <b>{STEP_SIZE}</b> weeks each.")


# General Statistics
The full range will be subdivided into the following periods:

In [None]:
end_date = allrounds_df["DATE"].max().ceil("D")



dates=pd.date_range(end=end_date, periods=NUMBER_OF_PERIODS, freq=STEP_SIZE)

print(dates)

In [None]:
#round_stats = pr_analysis.cross_period_round_stats.run(allrounds_df)


## Praise Involvement

### How much praise? 
This graph shows the trend of total number of praise instances across time.

In [None]:
pr_analysis.cross_period_round_stats.printGraph(allrounds_df, _config={"x":"period_start_time", "y":"total_praise"})


### How many people give and receive praise?
Counting the unique ID of praise givers and receivers, we can visualize the change across time. In the figure, the blue line represents the amount of praise receivers and thered line the amount of givers.

In [None]:
pr_analysis.cross_period_round_stats.printGraph(allrounds_df, _config={"x":"period_start_time", "y":["total_praise_receivers","total_praise_givers"]})


## Quantifier Involvement
Showing how many quantifiers are involved in each round.

In [None]:
pr_analysis.cross_period_round_stats.printGraph(allrounds_df, _config={"x":"period_start_time", "y":"total_quantifiers"})

### Quantifier trend

### average score displacement: tendency to under/over-estimate?

In [None]:
pr_analysis.cross_period_quantifier_stats.printGraph(allrounds_df,_config={"y":"pearson_coef"} )

### average score correlation coefficient: how much do i agree with other people?

In [None]:
pr_analysis.cross_period_quantifier_stats.printGraph(allrounds_df,_config={"y":"av_score_displacement"} )

# System Health Evaluation

## Number of new TEC members involved in praise (either giving or receiving)
Counting the round-by-round change of unique IDs being either praise giver or praise receiver.

The blue line represents new IDs in this round, the red line represents IDs that are absent in this round but were present in the last round. The green line shows the net difference, with above 0 meaning more people joined praise than people left and below 0 meaning the opposite.

In [None]:
pr_analysis.cross_period_round_stats.printGraph(allrounds_df, _config={"x":"period_start_time", "y":["round_user_new","round_user_left","round_net_user_diff"]})


## Distribution Equality

### Nakamoto Coefficient

The Nakamato Coefficient is defined as the smallest number of accounts who control at least 50% of the resource. Although its significance relates to the prospect of a 51% attack on a network, which may not be relevant in our context, we can still use it as an intuitive measure of how many individuals received the majority of rewards.

Bigger coefficient means more distributed (i.e. needs more people to pass 50%), smaller means more concentrated power. The number should always be an integer.

In [None]:
# TODO: DOESNT FIT DIRECTLY INTO TIME-BASED ROUND STATS, REVIEW LATER
""" 
def nakamoto_coeff(x, key):
    value_sum= x[key].sum()
    x['PERCENTAGE'] = x[key] / value_sum
    sorted_x = x.sort_values(by='PERCENTAGE', ascending=False)
    tot_sum = np.array(sorted_x['PERCENTAGE'].cumsum())
    try:
        winner = np.array([k for k in range(len(tot_sum))
                          if tot_sum[k] > 0.5]).min() + 1
    except:
        winner = -1
    return winner
def nakamoto_coeff_ratio(x, key):
    winner = nakamoto_coeff(x, key)
    ratio = winner / len(x)
    return ratio """

""" round_stats['nakamoto']  = [nakamoto_coeff(allrounds_finaldist[round_name],'PRAISE REWARDS') for round_name in roundname_list]
round_stats['nakamoto_ratio']= [nakamoto_coeff_ratio(allrounds_finaldist[round_name],'PRAISE REWARDS') for round_name in roundname_list]
px.line(round_stats,x='period_start_time',y='nakamoto',markers=True,title='Minimum number of people receiving 50% of total rewards')

px.line(round_stats,x='period_start_time',y='nakamoto_ratio',markers=True,title='Ratio of people accumulating 50% of total rewards in relation to total number of receivers in that round') """


# Categorizing praise based on the praise reason

##  the average, min, max score of each category


In [None]:
res = pr_analysis.cross_period_category_analysis.printDescription(allrounds_df, CATEG_KEYWORDS, _config={"mode":"summary-table", "num":"2"})


In [None]:
pr_analysis.cross_period_category_analysis.printGraph(allrounds_df, CATEG_KEYWORDS, _config={"mode":"avg-stats", "num":"3"})



## Top 3 highest scored praise in each category
A convenient way to check if the categorization keywords are reasonable.

In [None]:
pr_analysis.cross_period_category_analysis.printDescription(allrounds_df, CATEG_KEYWORDS, _config={"mode":"top-scored", "num":"3"})

## trend across time

In [None]:
pr_analysis.cross_period_category_analysis.printGraph(allrounds_df, CATEG_KEYWORDS, _config={"mode":"trend", "y":"num"})

In [None]:
pr_analysis.cross_period_category_analysis.printGraph(allrounds_df, CATEG_KEYWORDS, _config={"mode":"trend", "y":"_avg_score"})