In [None]:
# The note book file is run in google collab

In [None]:
# Install the toolkit in order to access the dataset
# pip install delitoolkit

In [None]:
# Import necessary package
import delitoolkit
from delitoolkit.delidata import DeliData
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Import the dataset
delidata_corpus = DeliData()
groups = list(delidata_corpus.corpus.keys())

In [None]:
def getUserCount(string):
  before_and = string.split('&&')[0]
  words_before_and = before_and.split(',')
  count_words = len(words_before_and)
  #since SYSTEM is included in count reduce it
  return count_words - 1

In [None]:
# Creating classes to process the dataset and store information realted to each conversation
class GroupDetails:
  def __init__(
    self,
    number_of_conversations,
    number_of_participants,
    message_per_user,
    submission_count_per_user,
    submission_per_user,
    consensus_,
    idle_users
    ):
    self.number_of_conversations = number_of_conversations
    self.number_of_participants = number_of_participants
    self.message_per_user = message_per_user
    self.submission_count_per_user = submission_count_per_user
    self.submission_per_user = submission_per_user
    self.consensus_ = consensus_
    self.idle_users = idle_users

  def __str__(self):
      return (f"GroupDetails(number_of_conversations={self.number_of_conversations}, \n"
              f"number_of_participants={self.number_of_participants}, \n"
              f"message_per_user={self.message_per_user}, \n"
              f"submission_count_per_user={self.submission_count_per_user}, \n"
              f"submission_per_user={self.submission_per_user}, \n"
              f"consensus_={self.consensus_}, \n"
              f"idle_users={self.idle_users})")

class GroupStorage:
    def __init__(self):
      self.storage_ = {}

    def addToStorage(self, group_id):
      group_converstaion = delidata_corpus.corpus[group_id]
      length_of_converstaion = len(group_converstaion) - 1
      idle_user_count = 0
      consensus = 0

      def getStats():
        message_count = {}
        submit_count = {}
        user_submission = {}
        num_user = 0
        for m in group_converstaion:
          m_type, origin_, original_text = m['message_type'], m['origin'], m['original_text']
          if m_type == "INITIAL":
            num_user = getUserCount(original_text)
          elif m_type == "MESSAGE":
             count = message_count.get(origin_, 0)
             message_count[origin_] = count +  1
          else:
            count =  submit_count.get(origin_, 0)
            submit_count[origin_] = count + 1
            user_submission[origin_] = original_text
        return message_count, submit_count, user_submission, num_user

      def getConsensus(user_submission, num_user):
          responseCount = dict()
          for k,v in user_submission.items():
            count = responseCount.get(v, 0)
            responseCount[v] = count + 1
          max = 0
          for k,v in responseCount.items():
              if v > max:
                  max = v
              # print(v, " number of users selected ", k)
              # print(int(v)/num_user * 100, " % agreement")
          return float(int(max)/num_user)

      message_count, submit_count, user_submission, num_user = getStats()
      if len(user_submission) == 0:
        print("ZERO", group_id)
      else:
        consensus = getConsensus(user_submission, num_user)

      count = 0
      for k,v in message_count.items():
        count += 1
      if count != num_user:
        idle_user_count = num_user - count

      group_details = GroupDetails(
          length_of_converstaion,
          num_user,
          message_count,
          submit_count,
          user_submission,
          consensus,
          idle_user_count
      )
      self.storage_[group_id] = group_details

class Group:
  def __init__(self, array, group_name):
    self.array_ = array
    self.group_name = group_name
    self.group_storage = GroupStorage()

  def computeMessageStat(self):
    count = 0
    for group_id in self.array_:
      count += 1
      self.group_storage.addToStorage(group_id)

  def drawBoxPlot(self):
    if self.group_name == "A":
        categoryName = "A, [5-20) number of deliberation"
    if self.group_name == "B":
        categoryName = "B, [20-40) number of deliberation"
    if self.group_name == "C":
        categoryName = "C, 40+ number of deliberation"
    if self.group_name == "#2":
        categoryName = "Number of users 2"
    if self.group_name == "#3":
        categoryName = "NUmber of users 3"
    if self.group_name == "#4":
        categoryName = "Number of users 4"
    if self.group_name == "#5":
        categoryName = "Number of users 5"
      
    numOfConverstaions= []
    for k,v in self.group_storage.storage_.items():
      numOfConverstaions.append(v.number_of_conversations)
    plt.boxplot(numOfConverstaions)
    plt.title('Number of Deliberations for category ' + categoryName)
    plt.ylabel('No of deliberations')
    plt.show()

  def drawBarPlot(self):
    if self.group_name == "A":
        categoryName = "A, [5-20) number of deliberation"
    if self.group_name == "B":
        categoryName = "B, [20-40) number of deliberation"
    if self.group_name == "C":
        categoryName = "C, 40+ number of deliberation"
    if self.group_name == "#2":
        categoryName = "Number of users 2"
    if self.group_name == "#3":
        categoryName = "NUmber of users 3"
    if self.group_name == "#4":
        categoryName = "Number of users 4"
    if self.group_name == "#5":
        categoryName = "Number of users 5"
      
    numOfConverstaions= []
    fileName = []
    majority = []
    count = 1
    for k,v in self.group_storage.storage_.items():
      if v.number_of_conversations > 90:
        print(k)
      numOfConverstaions.append(v.number_of_conversations)
      fileName.append(count)
      count += 1
      majority.append(v.consensus_ * 100)
    
    if count > 20:
      numOfConverstaions = numOfConverstaions[0:21]
      fileName = fileName[:21]
      majority = majority[:21]
      
    users = fileName
    y1 = majority
    y2 = numOfConverstaions

    fig, ax1 = plt.subplots()

    ax1.bar(users, y1, color='brown', alpha=0.5, align='center')
    ax1.set_ylabel('Majority Agreement %', color='b')
    ax1.set_xlabel('Converstaion Name', color='b')

    ax2 = ax1.twinx()

    ax2.plot(users, y2, color='orange', marker='o')
    ax2.set_ylabel('Number of deliberations', color='b')

    ax2.legend(loc='upper left')

    plt.title('Category ' + categoryName)
    plt.xlabel('Users')

    plt.show()
    