## Converting dates to a consistent format

In [1]:
import os
import pandas as pd

In [2]:
import sys
sys.path.append('../..')
import openai_data_tools as dt

In [3]:
examples = [
       {'item': 'May 5, 1985', 'target': '1985-05-05'}
]

In [4]:
dates = pd.read_csv('dates.csv', dtype=str, keep_default_na=False)

In [5]:
dates

Unnamed: 0,item,target
0,"Dec 12, 1995",1995-12-12
1,23 March 1974,1974-03-23
2,February 14th 2010,2010-02-14
3,7/9/80,1980-07-09
4,14/19/93,
5,2/15,
6,"13th of April, 2016",2016-04-13
7,"Twenty-fifth of August, 1955",1955-08-25
8,"October 12, nineteen eighty-six",1986-10-12


In [6]:
processor = dt.DataProcessor(
    api_key=os.getenv("OPENAI_API_KEY"),
    model = 'gpt-3.5-turbo', 
    instructions = "You will be provided with dates in various formats. For each date, convert it to the format YYYY-MM-DD. If it is not a valid date, return 'NA'.",
    examples = examples
)

In [7]:
output = processor.process(dates['item'])

Progress: 100%


In [8]:
output

['1995-12-12',
 '1974-03-23',
 '2010-02-14',
 '1980-07-09',
 'NA',
 'NA',
 '2016-04-13',
 '1955-08-25',
 '1986-10-12']

In [10]:
dates['target']

0    1995-12-12
1    1974-03-23
2    2010-02-14
3    1980-07-09
4            NA
5            NA
6    2016-04-13
7    1955-08-25
8    1986-10-12
Name: target, dtype: object

In [9]:
scorer = dt.Scorer(output, dates['target'])

In [11]:
scorer.score()

[1, 1, 1, 1, 1, 1, 1, 1, 1]

In [12]:
scorer.accuracy()

1.0