In [2]:
# Importing for system files
import os
import sys
import csv
import pprint

In [3]:
# Loading environmental variables 
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
# Import System Files
dir_path = os.path.dirname(os.path.realpath('__file__'))
sys.path.append(dir_path + '/lib/')  
from process_work import process_work

In [5]:
# For processing WhatsApp data file
from whatstk import df_from_txt_whatsapp
from whatstk.data import whatsapp_urls

# Date before which chat is not scored
cut_off_date = 20240317

In [6]:
# file_key = '20231213_townhall_chat'
# file_key = '20231213_beerhall_chat'
# file_key = '20231214_agroverse_chat'
# file_key = '20231214_main_dao_website_chat'
file_key = '20240603_agroverse_chat'


In [7]:
# Importing WhatsApp data file
df = df_from_txt_whatsapp(filepath=f"./data/{file_key}.txt")
df.head(100)

# Setup work analysis file
analysis_file = open(f"analysis/{file_key}.csv", 'w')
analysis_writer = csv.writer(analysis_file)

header = ['Contributor Name', 'Contribution Made', 'Rubric classification', 'TDGs Issued', 'Status date']
# data = ['Afghanistan', 652090, 'AF', 'AFG']
analysis_writer.writerow(header)

pp = pprint.PrettyPrinter(indent=4)

In [8]:
# For OpenAI client
from openai import OpenAI

# Acquire OpenAI API Key via this URL 
#    https://platform.openai.com/api-keys
# Save Secret in .env file with 
#    OPENAI_API_KEY=YOUR SECRET KEY

openai_client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [9]:
total_rows = df.count()['date']

In [10]:
# Calling OpenAi API to get time taken for work done
def check_tdg_issued(work_description):
    payload = f"We award 100TDG for every hour of contribution made that was explicitly reported in the Action Description. \
                We award 1.65TDG for every minute of contribution made that was explicitly reported in the Action Description. \
                 Amount of TDG issued should be prorated based on actual time explicitly reported\
                 Time taken indicated in the 'Action Description' may also appear in the following formats as a stand alone sentences\
                     Time Taken : X Hours \
                     Time Taken : Y Minutes \
                     \
                 We award 0.01TDG for each 0.01USD of expenses paid for on behalf by DAO member \
                 There are only two available options for 'Rubric classification' and they are: \
                     'Contributed Fiat Funding' \
                     'Contributed Human Effort' \
                     \
                 Response must be short and concise\
                 Response must be in a single line CSV format. Response must not include headers\
                     Rubric classification (String); TDGs Issued (Integer)\
                     \
                 If Action Description does not make sense return the following values \
                     Unknown; 0 \
                     \
                 For the following 'Action Description' determine Rubric classification and TDGs Issued \
                     \
                 Action Description:\
                 {work_description}."
    
    work_process_completion = openai_client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": payload,
            }
        ],
        model="gpt-3.5-turbo",
    )
    return work_process_completion.choices[0].message.content

# response = check_tdg_issued("Gary Teh walked the dog for 1 hour")

In [None]:
format = '%Y%m%d'
for index, row in df.iterrows():
    # Doesn't process record if occurred before a specific date
    if int(row['date'].strftime(format)) < cut_off_date:
        continue
    
    clean_work_description = row['message'].encode('ascii','ignore') 
    response = check_tdg_issued(clean_work_description)
    openai_result = response.split(';')
    
    classification = "Unknown"
    tdg_issue = 0
    
    if len(openai_result) == 2:
        classification = openai_result[0]
        tdg_issue = openai_result[1]

    # header = ['Contributor Name', 'Contribution Made', 'Rubric classification', 'TDGs Issued', 'Status date']
    data = [
        row['username'], 
        row['message'], 
        classification, 
        tdg_issue, 
        row['date'].strftime(format)
    ]
    print("")
    print(f"Processing {index} of {total_rows}: ")
    pp.pprint(response)
    pp.pprint(openai_result)
    pp.pprint(len(openai_result))
    pp.pprint(data)
    
    
    analysis_writer.writerow(data)        


Processing 7497 of 8126: 
'Unknown; 0'
['Unknown', ' 0']
2
[   'Gary Teh',
    'Tat person is me \n'
    '\n'
    'That is why I need the listed ordered as such \n'
    '\n'
    'Easy for me to plan my Overland route',
    'Unknown',
    ' 0',
    '20240317']

Processing 7498 of 8126: 
'Contributed Human Effort; 16'
['Contributed Human Effort', ' 16']
2
[   'Gary Teh',
    'I spent 10 minutes replying to Jasmine the TikTok influencer\n\u200e',
    'Contributed Human Effort',
    ' 16',
    '20240317']

Processing 7499 of 8126: 
'Unknown; 0'
['Unknown', ' 0']
2
['Gary Teh', '\u200eimage omitted\n\u200e', 'Unknown', ' 0', '20240317']

Processing 7500 of 8126: 
'Unknown; 0'
['Unknown', ' 0']
2
['Gary Teh', '\u200eimage omitted\n\u200e', 'Unknown', ' 0', '20240317']

Processing 7501 of 8126: 
'Unknown; 0'
['Unknown', ' 0']
2
['Gary Teh', '\u200eimage omitted', 'Unknown', ' 0', '20240317']

Processing 7502 of 8126: 
'Unknown; 0'
['Unknown', ' 0']
2
['Anupa/diya', 'I can just populate it 🤣'

In [None]:
analysis_file.close()

In [304]:
print(f"We award 100TDG for every hour of contribution made that was explicitly reported in the Action Description. \
                We award 1.666666666666667TDG for every minute of contribution made that was explicitly reported in the Action Description. \
                 Amount of TDG issued should be prorated based on actual time explicitly reported\
                 Time taken indicated in the 'Action Description' may also appear in the following formats as a stand alone sentences\
                     Time Taken : X Hours \
                     Time Taken : Y Minutes \
                     \
                 We award 1TDG for each USD of expenses paid for onbehalf by DAO member \
                 There are only two available options for 'Rubric classification' and they are: \
                     'Contributed Fiat Funding' \
                     'Contributed Human Effort' \
                     \
                 Response must be short and concise\
                 Response must be in a single line CSV format. Response must not include headers\
                     Rubric classification (String); TDGs Issued (Integer)\
                     \
                 If Action Description does not make sense return the following values \
                     Unknown; 0 \
                     \
                 For the following 'Action Description' determine Rubric classification and TDGs Issued \
                     \
                 Action Description:\
                 Text submission by DAO member in WhatsApp channels.")


We award 100TDG for every hour of contribution made that was explicitly reported in the Action Description.                 We award 1.666666666666667TDG for every minute of contribution made that was explicitly reported in the Action Description.                  Amount of TDG issued should be prorated based on actual time explicitly reported                 Time taken indicated in the 'Action Description' may also appear in the following formats as a stand alone sentences                     Time Taken : X Hours                      Time Taken : Y Minutes                                       We award 1TDG for each USD of expenses paid for onbehalf by DAO member                  There are only two available options for 'Rubric classification' and they are:                      'Contributed Fiat Funding'                      'Contributed Human Effort'                                       Response must be short and concise                 Response must be in a single line CSV format. 