# Scripts to download data for AE KB

In [16]:
import json
import requests
import pandas as pd


from tqdm import tqdm
from pathlib import Path

PATH_DATA = Path('../data/askextension_kb')
PATH_DATA.mkdir(parents=True, exist_ok=True)

In [17]:
def download_ask_extention_data(start_year: int = 2006, end_year: int = 2024):
    '''Calls OS ticket API to get all ask extension data'''
    for i in tqdm(range(start_year, end_year), desc='Calling OS Ticket API to download AE data...'):
        start = str(i) 
        end = str(i+1)
        url = f'https://qa.osticket.eduworks.com/api/knowledge/{start}-01-01/{end}-01-01'
        try:
            r = requests.get(url, timeout=40)
            items = r.json()
        except requests.exceptions.Timeout: 
            print(f"Failed to download data for year {start}")
            continue
        
        if items:
            PATH_SAVE = Path.joinpath(PATH_DATA, f'{start_year}.json')
            with open(PATH_SAVE, 'w') as f:
                json.dump(items, f)

def get_ask_extension_data() -> list:
    '''Attempts to load from AE data. Though, will call os ticket API if not available'''
    DATA_FILE_NAMES = sorted(PATH_DATA.iterdir())
    
    if len(DATA_FILE_NAMES) == 0:
        download_ask_extention_data()

    # Combines the data files into one and returns it.
    df = pd.DataFrame()
    print(f'List of files:\n{[data_file.name for data_file in DATA_FILE_NAMES]}')
    for f in DATA_FILE_NAMES:
        df = pd.concat([df, pd.read_json(f)], ignore_index = True, axis = 0)
        return df

In [18]:
get_ask_extension_data()

List of files:
['2012-2013.json', '2014-2015.json', '2016-2017.json', '2018-2019.json', '2020-1.json', '2020-2.json', '2021-1.json', '2021-2.json']


Unnamed: 0,faq-id,title,created,updated,tags,state,county,question,answer,attachments
0,109900,When can I plant blue spruce trees in Colorado...,2012-12-03 15:53:47,2012-12-03 17:47:21,[trees and shrubs],Colorado,El Paso County,I need to plant two blue spruce trees that are...,"{'1': {'response': 'Jerry, you can plant them...",
1,109905,Chinkapin oak trunk damage #109905,2012-12-03 16:05:27,2012-12-20 15:32:48,"[oaks, trees and shrubs, horticulture]",Texas,Burnet County,The base of the trunk of a chinkapin oak in ou...,{'1': {'response': 'Thank you for you question...,[https://osticket.eduworks.com/file.php?key=ma...
2,109912,how would I know if my sheep have Enterotoxemi...,2012-12-03 16:42:05,2012-12-03 21:29:47,"[sheep, health and diseases]",Missouri,Burnet County,how would I know if my sheep have Enterotoxemi...,{'1': {'response': 'The symptoms of overeating...,
3,109918,Maple tree with several problems #109918,2012-12-03 16:59:02,2013-01-07 12:06:57,"[maple trees, trees and shrubs, horticulture]",Texas,Fort Bend County,I have a maple tree (exact species unknown) in...,{'1': {'response': 'Vincent M gave this reply:...,
4,109943,Moss control #109943,2012-12-03 18:39:40,2012-12-11 03:09:08,"[moss, horticulture]",Washington,Thurston County,I put moss killer on my yard but that does not...,{'1': {'response': 'We apologize for the laten...,
...,...,...,...,...,...,...,...,...,...,...
11895,163090,Help us identify wasp(?) #163090,2013-12-30 23:59:38,2014-01-13 14:43:11,"[insect identification, integrated pest manage...",Texas,Atascosa County,We found 3 huge nests on our property - all le...,"{'1': {'response': 'Hello Sandra,Those wasps a...",[https://osticket.eduworks.com/file.php?key=dy...
11896,163091,Thinking about planting s... #163091,2013-12-31 00:27:34,2013-12-31 11:55:23,[],Minnesota,Ramsey County,Thinking about planting some Red Currants in t...,{'1': {'response': 'It's good you're consideri...,
11897,163094,Reference question #163094,2013-12-31 02:12:31,2014-01-03 12:25:07,[],Texas,Bell County,I am reading from one of your listings:https:/...,{'1': {'response': 'I will see what I can find...,
11898,163103,Roof pitch of a greenhouse and glazing #163103,2013-12-31 03:01:19,2013-12-31 20:18:22,"[greenhouse, front page]",Colorado,Larimer County,Is there a suggested pitch of the roof of a gr...,{'1': {'response': 'The standard pitch for a g...,
