In [27]:
# Importing for system files
import os
import sys
import csv
import pprint

In [2]:
# Loading environmental variables 
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
# Import System Files
dir_path = os.path.dirname(os.path.realpath('__file__'))
sys.path.append(dir_path + '/lib/')  
from process_work import process_work

In [36]:
# For processing WhatsApp data file
from whatstk import df_from_txt_whatsapp
from whatstk.data import whatsapp_urls

# Date before which chat is not scored
cut_off_date = 20231212

In [37]:
# file_key = '20231213_townhall_chat'
# file_key = '20231213_beerhall_chat'
# file_key = '20231214_agroverse_chat'
# file_key = '20231214_main_dao_website_chat'
file_key = '20240216_townhall_chat'


In [38]:
# Importing WhatsApp data file
df = df_from_txt_whatsapp(filepath=f"./data/{file_key}.txt")
df.head(100)

# Setup work analysis file
analysis_file = open(f"analysis/{file_key}.csv", 'w')
analysis_writer = csv.writer(analysis_file)

header = ['Contributor Name', 'Contribution Made', 'Rubric classification', 'TDGs Issued', 'Status date']
# data = ['Afghanistan', 652090, 'AF', 'AFG']
analysis_writer.writerow(header)

pp = pprint.PrettyPrinter(indent=4)

In [39]:
# For OpenAI client
from openai import OpenAI

# Acquire OpenAI API Key via this URL 
#    https://platform.openai.com/api-keys
# Save Secret in .env file with 
#    OPENAI_API_KEY=YOUR SECRET KEY

openai_client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [40]:
total_rows = df.count()['date']

In [41]:
# Calling OpenAi API to get time taken for work done
def check_tdg_issued(work_description):
    payload = f"We award 100TDG for every hour of contribution made that was explicitly reported in the Action Description. \
                We award 1.666666666666667TDG for every minute of contribution made that was explicitly reported in the Action Description. \
                 Amount of TDG issued should be prorated based on actual time explicitly reported\
                 Time taken indicated in the 'Action Description' may also appear in the following formats as a stand alone sentences\
                     Time Taken : X Hours \
                     Time Taken : Y Minutes \
                     \
                 We award 0.01TDG for each 0.01USD of expenses paid for on behalf by DAO member \
                 There are only two available options for 'Rubric classification' and they are: \
                     'Contributed Fiat Funding' \
                     'Contributed Human Effort' \
                     \
                 Response must be short and concise\
                 Response must be in a single line CSV format. Response must not include headers\
                     Rubric classification (String); TDGs Issued (Integer)\
                     \
                 If Action Description does not make sense return the following values \
                     Unknown; 0 \
                     \
                 For the following 'Action Description' determine Rubric classification and TDGs Issued \
                     \
                 Action Description:\
                 {work_description}."
    
    work_process_completion = openai_client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": payload,
            }
        ],
        model="gpt-3.5-turbo",
    )
    return work_process_completion.choices[0].message.content

# response = check_tdg_issued("Gary Teh walked the dog for 1 hour")

In [43]:
format = '%Y%m%d'
for index, row in df.iterrows():
    # Doesn't process record if occurred before a specific date
    if int(row['date'].strftime(format)) < cut_off_date:
        continue
    
    clean_work_description = row['message'].encode('ascii','ignore') 
    response = check_tdg_issued(clean_work_description)
    openai_result = response.split(';')
    
    classification = "Unknown"
    tdg_issue = 0
    
    if len(openai_result) == 2:
        classification = openai_result[0]
        tdg_issue = openai_result[1]

    # header = ['Contributor Name', 'Contribution Made', 'Rubric classification', 'TDGs Issued', 'Status date']
    data = [
        row['username'], 
        row['message'], 
        classification, 
        tdg_issue, 
        row['date'].strftime(format)
    ]
    print("")
    print(f"Processing {index} of {total_rows}: ")
    pp.pprint(response)
    pp.pprint(openai_result)
    pp.pprint(len(openai_result))
    pp.pprint(data)
    
    
    analysis_writer.writerow(data)        


Processing 2761 of 3006: 
'Unknown; 0'
['Unknown', ' 0']
2
['Gary Teh', 'Bump', 'Unknown', ' 0', '20231212']

Processing 2762 of 3006: 
'Unknown; 0'
['Unknown', ' 0']
2
['Gary Teh', 'Bump', 'Unknown', ' 0', '20231212']

Processing 2763 of 3006: 
'Unknown; 0'
['Unknown', ' 0']
2
['Gary Teh', 'Bump', 'Unknown', ' 0', '20231212']

Processing 2764 of 3006: 
'Unknown; 0'
['Unknown', ' 0']
2
['Gary Teh', 'Bump', 'Unknown', ' 0', '20231212']

Processing 2765 of 3006: 
'Unknown; 0'
['Unknown', ' 0']
2
[   'Gary Teh',
    'As I have shared in @120363041505997891@g.us this is a negative '
    'non-actionable statement \n'
    '\n'
    'What is the actual actionable set of rules you are proposing for voting? '
    '\u200e<This message was edited>',
    'Unknown',
    ' 0',
    '20231212']

Processing 2766 of 3006: 
'Unknown; 0'
['Unknown', ' 0']
2
[   'Gary Teh',
    'What does differentiation concretely mean to you \n'
    '\n'
    'The deck is a prop you use to deliver your story \n'
    '\n'


KeyboardInterrupt: 

In [None]:
analysis_file.close()

In [304]:
print(f"We award 100TDG for every hour of contribution made that was explicitly reported in the Action Description. \
                We award 1.666666666666667TDG for every minute of contribution made that was explicitly reported in the Action Description. \
                 Amount of TDG issued should be prorated based on actual time explicitly reported\
                 Time taken indicated in the 'Action Description' may also appear in the following formats as a stand alone sentences\
                     Time Taken : X Hours \
                     Time Taken : Y Minutes \
                     \
                 We award 1TDG for each USD of expenses paid for onbehalf by DAO member \
                 There are only two available options for 'Rubric classification' and they are: \
                     'Contributed Fiat Funding' \
                     'Contributed Human Effort' \
                     \
                 Response must be short and concise\
                 Response must be in a single line CSV format. Response must not include headers\
                     Rubric classification (String); TDGs Issued (Integer)\
                     \
                 If Action Description does not make sense return the following values \
                     Unknown; 0 \
                     \
                 For the following 'Action Description' determine Rubric classification and TDGs Issued \
                     \
                 Action Description:\
                 Text submission by DAO member in WhatsApp channels.")


We award 100TDG for every hour of contribution made that was explicitly reported in the Action Description.                 We award 1.666666666666667TDG for every minute of contribution made that was explicitly reported in the Action Description.                  Amount of TDG issued should be prorated based on actual time explicitly reported                 Time taken indicated in the 'Action Description' may also appear in the following formats as a stand alone sentences                     Time Taken : X Hours                      Time Taken : Y Minutes                                       We award 1TDG for each USD of expenses paid for onbehalf by DAO member                  There are only two available options for 'Rubric classification' and they are:                      'Contributed Fiat Funding'                      'Contributed Human Effort'                                       Response must be short and concise                 Response must be in a single line CSV format. 