Skip to content

Commit

Permalink
the two series of the tokens owned now run in paralell
Browse files Browse the repository at this point in the history
  • Loading branch information
robertour committed Jan 19, 2019
1 parent 5435380 commit 3a3e194
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 12 deletions.
Expand Up @@ -304,8 +304,8 @@
"outputs": [],
"source": [
"from visualization.owned_listener import OwnedListener\n",
"owned = calculator.all_actions\n",
"listener = OwnedListener(owned, editor_inputname)\n",
"all_actions = calculator.all_actions\n",
"listener = OwnedListener(all_actions, editor_inputname)\n",
"\n",
"traces = ['Tokens Owned', 'Tokens Owned (%)']\n",
"\n",
Expand All @@ -315,9 +315,24 @@
"from ipywidgets.widgets import Dropdown\n",
"\n",
"interact(listener.listen,\n",
" _range = get_date_slider_from_datetime(owned['rev_time']),\n",
" _range = get_date_slider_from_datetime(all_actions['rev_time']),\n",
" granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Monthly'),\n",
" trace=Dropdown(options=traces, value='Tokens Owned (%)'))\n"
" trace=Dropdown(options=traces, value='Tokens Owned (%)'))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = all_actions\n",
"df = df.sort_values(['token_id', 'rev_time'], ascending=True).set_index('token_id')\n",
"last_action = df.groupby('token_id').last()\n",
"surv = last_action[last_action['action'] != 'out']\n",
"#sum(surv['o_editor'] == editor_inputname)\n",
"surv\n"
]
},
{
Expand Down Expand Up @@ -354,7 +369,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
51 changes: 51 additions & 0 deletions develop.py
@@ -0,0 +1,51 @@
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
the_page = wikipedia_dv.get_page('The Camp of the Saints')

from wikiwho_wrapper import WikiWho



wikiwho = WikiWho(lng='en')
agg_actions = wikiwho.dv.edit_persistence(the_page.page_id)


editors = wikipedia_dv.get_editors(agg_actions['editor_id'].unique()).rename(columns = {
'userid': 'editor_id'})

# Merge the namesof the editors to the aggregate actions dataframe
agg_actions = agg_actions.merge(editors[['editor_id', 'name']], on='editor_id')
agg_actions.insert(3, 'editor', agg_actions['name'])
agg_actions = agg_actions.drop(columns=['name'])
agg_actions['editor'] = agg_actions['editor'].fillna("Unregistered")

all_content = wikiwho.dv.all_content(the_page['page_id'])



revisions = wikiwho.dv.rev_ids_of_article(the_page['page_id'])

from metrics.conflict import ConflictManager
calculator = ConflictManager(all_content, revisions)
calculator.calculate()

editors_conflicts = calculator.get_conflict_score_per_editor()

editors['editor_id'] = editors['editor_id'].astype(str)
if len(editors_conflicts) > 0:
editors_conflicts = editors[['editor_id', 'name']].merge(editors_conflicts,
right_index=True, left_on='editor_id').set_index('editor_id')



from visualization.owned_listener import OwnedListener
owned = calculator.all_actions
listener = OwnedListener(owned, '28921814')


listener.listen(
_range = (owned['rev_time'].dt.date.min(), owned['rev_time'].dt.date.max()),
granularity='Monthly',
trace='Tokens Owned (%)')

import ipdb; ipdb.set_trace() # breakpoint b86e2bcc //
54 changes: 47 additions & 7 deletions visualization/owned_listener.py
@@ -1,27 +1,67 @@
import pandas as pd
import plotly
from plotly import graph_objs

import datetime

class OwnedListener():

def __init__(self, df, editor):
self.df = df.sort_values(['token_id', 'rev_time'], ascending=True).set_index('token_id')
self.editor = editor
self.days = days = pd.Series(df.loc[df['o_editor'] == editor, 'rev_time'].dt.to_period('D').unique()).sort_values(ascending=False)

days = self.days.dt.to_timestamp('D') + pd.DateOffset(1)

_all = []
_abs = []
df = self.df
for rev_time in days:
df = df[df['rev_time'] <= rev_time]
last_action = df.groupby('token_id').last()
surv = last_action[last_action['action'] != 'out']
_abs.append(sum(surv['o_editor'] == self.editor))
_all.append(len(surv))

self.summ = pd.DataFrame({
'day': days,
'abs': _abs,
'all': _all
})
self.summ['res'] = 100 * self.summ['abs'] / self.summ['all']

self.df_plotted = None

def listen(self, _range, granularity, trace):
df = self.df

df = df[(df.rev_time.dt.date >= _range[0]) &
(df.rev_time.dt.date <= _range[1])]

self.doi = df.loc[df['editor'] == self.editor, 'rev_time'].dt.to_period(
granularity[0]).dt.to_timestamp(granularity[0]).sort_values(ascending=False).unique()
(df.rev_time.dt.date <= _range[1] + datetime.timedelta(days=1))]

self.doi = pd.Series(self.days.dt.to_timestamp(granularity[0]).unique()) + pd.DateOffset(1)
self.traces = []
self.is_norm_scale = True
df = self.__add_trace(df, trace, 'rgba(0, 0, 0, 1)')

if trace == 'Tokens Owned':
self.is_norm_scale = False
_df = self.summ
_df['time'] = _df['day'].dt.to_period(granularity[0]).dt.to_timestamp(granularity[0])
_df = _df[~_df.duplicated(subset='time', keep='first')]
_y = _df['abs']

elif trace == 'Tokens Owned (%)':
_df = self.summ
_df['time'] = _df['day'].dt.to_period(granularity[0]).dt.to_timestamp(granularity[0])
_df = _df[~_df.duplicated(subset='time', keep='first')]
_y = _df['res']

self.traces.append(
graph_objs.Scatter(
x=_df['time'], y=_y,
name=trace,
marker=dict(color='rgba(255, 0, 0, .5)'))
)

self.__add_trace(df, trace, 'rgba(0,0,255, .5)')

_range = None
if self.is_norm_scale:
Expand Down Expand Up @@ -61,7 +101,7 @@ def __add_trace(self, df, trace, color):

self.traces.append(
graph_objs.Scatter(
x=pd.Series(self.doi), y=_y,
x=self.doi, y=_y,
name=trace,
marker=dict(color=color))
)
Expand Down

0 comments on commit 3a3e194

Please sign in to comment.