In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from pici.pici import Pici
from pici.communities.oem import OEMCommunityFactory
from pici.communities.osm import OSMCommunityFactory
from pici.communities.preciousplastic import PPCommunityFactory
import pandas as pd

from pici.labelling import InnovationLabels

import plotly.express as ex

Note: to be able to use all crisp methods, you need to install some additional packages:  {'wurlitzer', 'graph_tool', 'karateclub', 'leidenalg', 'infomap'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'karateclub', 'ASLPAw'}
Note: to be able to use all bipartite methods, you need to install some additional packages:  {'wurlitzer', 'leidenalg', 'infomap'}


In [3]:
pd.options.plotting.backend = "plotly"

In [4]:
pici = Pici(
    communities={
        'OpenEnergyMonitor': OEMCommunityFactory,
        'OpenStreetMap': OSMCommunityFactory,
        'PreciousPlastic': PPCommunityFactory,
    },
    labels=[
        InnovationLabels(
            pd.read_excel("../test_labels.xlsx")
        ),
        InnovationLabels().from_limesurvey(
            pd.read_excel("../test_ls_labels.xlsx"),
            drop_labellers=["Test","test"]
        )
    ],
    cache_dir='../../../cache',
    start='2017-01-01',
    end='2019-01-01',
    #cache_nrows=1000
)

In [5]:
l0 = pici.labels.labels[0]

In [6]:
l0.stats.label_correlation()

Unnamed: 0,label_idea,label_evaluation,label_implementation,label_modification,label_improvement,label_any_activity,label_has_potential
label_idea,1.0,0.595919,0.326779,0.298365,0.162463,0.696467,0.546226
label_evaluation,0.595919,1.0,0.455191,0.394749,0.264871,0.738665,0.494644
label_implementation,0.326779,0.455191,1.0,0.380155,0.343803,0.398075,0.279047
label_modification,0.298365,0.394749,0.380155,1.0,0.564156,0.53957,0.261059
label_improvement,0.162463,0.264871,0.343803,0.564156,1.0,0.384941,0.19829
label_any_activity,0.696467,0.738665,0.398075,0.53957,0.384941,1.0,0.566328
label_has_potential,0.546226,0.494644,0.279047,0.261059,0.19829,0.566328,1.0


In [7]:
l0.stats.plot_label_correlation()

In [8]:
l0.stats.label_counts(normalize=True)

label_idea              0.442396
label_evaluation        0.471582
label_implementation    0.205837
label_modification      0.322581
label_improvement       0.195084
label_any_activity      0.620584
label_has_potential     0.344086
dtype: float64

In [9]:
l0.stats.label_counts(normalize=True).plot(kind='bar')

In [10]:
l0.stats.label_counts_by_labeller(normalize=True)

Unnamed: 0_level_0,label_idea,label_evaluation,label_implementation,label_modification,label_improvement,label_any_activity,label_has_potential
labeller,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Jonathan1.732,0.344444,0.255556,0.088889,0.066667,0.044444,0.366667,0.333333
Larilu,0.262069,0.241379,0.089655,0.041379,0.02069,0.282759,0.262069
anna+philipp,0.590164,0.442623,0.229508,0.295082,0.229508,0.688525,0.491803
elisagleu@gmail.com,0.266667,0.52,0.346667,0.213333,0.24,0.613333,0.426667
mr.kiborg468@mail.ru,0.2,0.0,0.2,0.2,0.0,0.6,0.6
palubdiana@gmail.com,0.914286,0.971429,0.742857,0.685714,0.285714,0.971429,0.0
petrol39blackberry0,0.24,0.92,0.24,0.32,0.04,0.92,0.56
xwegner_lgh@outlook.de,0.576744,0.586047,0.186047,0.609302,0.35814,0.846512,0.35814


In [11]:
l0.stats.label_counts_by_labeller(normalize=False).plot(kind='barh')

In [12]:
report = pici.reports.topics_summary()

In [13]:
report.labelled_results

Unnamed: 0_level_0,number of contributors,delay first second post,number of posts,community_name,delay first last post,labeller,url,label_idea,label_evaluation,label_implementation,label_modification,label_improvement,label_potential,label_any_activity,label_has_potential
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1-click-power-measurement-of-any-device-in-your-house,2,0,3,OpenEnergyMonitor,0,,,,,,,,,,
10000-kwh-ceiling-in-emoncms,4,0,11,OpenEnergyMonitor,3,,,,,,,,,,
100a-0-5v-dc-output-ct-interesting,2,0,2,OpenEnergyMonitor,0,,,,,,,,,,
120-volt-service-in-mexico,3,0,3,OpenEnergyMonitor,1,,,,,,,,,,
2-phase-whole-house-and-pv-monitoring-from-brazil,4,0,11,OpenEnergyMonitor,2,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
hello-all-you-do-goood-simon-from-thailand,3,187,4,PreciousPlastic,187,,,,,,,,,,
machines-built-in-brazil,2,224,2,PreciousPlastic,224,,,,,,,,,,
hello-anyone-from-brasil,4,257,4,PreciousPlastic,288,,,,,,,,,,
motor-for-shredder,1,0,1,PreciousPlastic,0,,,,,,,,,,


In [28]:
report.labelled_results.groupby(by="label_potential")[['number of contributors','delay first second post','number of posts','delay first last post']].mean()

Unnamed: 0_level_0,number of contributors,delay first second post,number of posts,delay first last post
label_potential,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,3.357143,12.47619,4.708333,43.761905
1,4.614679,12.100917,12.137615,95.981651
2,6.321429,14.607143,12.392857,215.928571


In [15]:
ex.scatter(report.labelled_results, x='number of posts',y='label_any_activity')

In [24]:
ex.histogram(report.labelled_results[['number of posts', "label_has_potential"]].dropna(), x='number of posts', color="label_has_potential", histnorm='percent')