In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
pd.options.display.max_rows = 300
pd.options.display.max_columns = 100

In [3]:
class API:
    def __init__(self):
        self.base = 'http://apps.bea.gov/api/data?&UserID=F2205008-0F90-4F72-B77E-D7938D6188E6&ResultFormat=JSON'
    
    def datasetList(self):
        url = self.base+'&method=GETDATASETLIST'
        req = requests.get(url)
        ret = req.json()        
        return ret
        
    def paramList(self,datasetname):
        url = self.base+'&method=GETPARAMETERLIST&datasetname='+datasetname
        req = requests.get(url)
        ret = req.json()
        return(ret)
    
    def paramValues(self, datasetname, parametername):
        url = self.base+'&method=GETPARAMETERVALUES&datasetname='+datasetname+'&parametername='+parametername
        req = requests.get(url)
        ret = req.json()
        return(ret)
    
    def parseNames(self, data):
        nameList = []
        for x in data['BEAAPI']['Results'].keys():
            for y in data['BEAAPI']['Results'][x]:
                nameList.append(y)
        return nameList[:-1]
    
    def parseMethods(self, data):
        methodList = []
        for x in range(len(data['BEAAPI']['Results']['Parameter'])):
            try:
                methodList.append(data['BEAAPI']['Results']['Parameter'][x]['ParameterName'])
            except KeyError:
                pass
        return methodList
    
    def parseValues(self, data, which):
        valueList = []
        for x in range(len(data['BEAAPI']['Results']['ParamValue'])):
            try: 
                val = list(data['BEAAPI']['Results']['ParamValue'][x].values())
                valueList.append(val[which])
            except KeyError:
                pass
        return valueList
    
    def createDatalist(self):
        datasets = self.parseNames(a1.datasetList())
        
        params = {} #dictionary of dataset names as keys with corresponding value as a list of methods for that dataset
        paramVals = {} #dictionary of dataset name and parameter as keys with corresponding value as a list of possible values for that parameter
        #paramDesc = {} #Corresponding keyvalue as a description of possible values for that parameter
        for x in range(len(datasets)):
            params[datasets[x]['DatasetName']] = self.parseMethods(self.paramList(datasets[x]['DatasetName']))
            paramVals[datasets[x]['DatasetName']] = {}
            #paramDesc[datasets[x]['DatasetName']] = {}
            for y in params[datasets[x]['DatasetName']]:
                paramVals[datasets[x]['DatasetName']][y] = self.parseValues(self.paramValues(datasets[x]['DatasetName'], y), 0)
                #paramDesc[datasets[x]['DatasetName']][y] = self.parseValues(self.paramValues(datasets[x]['DatasetName'], y), 1)
        print(params)
        print(paramVals)
        return paramVals #, paramDesc
    
    def getDataSettings(self, paramlistdata):
        paramSettings = {}
        for x in paramlistdata['BEAAPI']['Results']['Parameter']:
            paramSettings[x['ParameterName']] = {}
            paramSettings[x['ParameterName']]['MultipleAccepted'] = x['MultipleAcceptedFlag'] 
            #paramSettings[x['ParameterName']]['ParamRequired'] = x['ParameterIsRequiredFlag'] don't care about this right now
            try:
                if x['AllValue']:
                    paramSettings[x['ParameterName']]['AllVal'] = x['AllValue'] 
            except KeyError:
                pass
        return paramSettings
    
    def createURLS(self, paramVals, paramSettings, dsetName):
        urlComponents = {}
        multA = []
        only1 = []
        allVal = {}
        for x in paramSettings: #for parameter in NIPA
            if paramSettings[x]['MultipleAccepted'] == str(1): #paramSettings[Frequency][MultipleAccepted] == yes
                multA.append(x) #Frequency 
            else:
                only1.append(x) #TableID
            try:
                if paramSettings[x]['AllVal']: #paramSettings[Year]['AllVal'] == 'X'
                    allVal[x] = paramSettings[x]['AllVal'] #Year = 'X'
            except KeyError:
                pass
            
        baseUrl = self.base+'&method=GETDATA'+'&datasetname='+dsetName+'&'
        url= baseUrl
        URLs = {}
        for y in multA:
            url += y+'='
            if y in allVal:
                url += allVal[y] + ','
            else:
                for t in paramVals[y]:
                    url += t + ','
            url = url.rstrip(',')
            url+= '&'
        for j in only1:
            url += j+'='
            URLs[dsetName] = {}
            urlB = url
            for k in paramVals[j]:
                url = urlB
                url += k + ','
                url = url.rstrip(',')
                url += '&'
                URLs[dsetName][k] = url.rstrip('&')
        return URLs

a1 = API()
vals = a1.createDatalist()
urls = []
for x in a1.parseNames(a1.datasetList()):
    urls.append(a1.createURLS(vals[x['DatasetName']], a1.getDataSettings(a1.paramList(x['DatasetName'])), x['DatasetName']))
print(urls)
#to fix:
#NIPA ok, NIUnderlyingDetail ok, MNE= create exception for all values (Perhaps with a request evaluation), FixedAssets = ok, Regional no, Others missing. 

{'NIPA': ['Frequency', 'ShowMillions', 'TableID', 'TableName', 'Year'], 'NIUnderlyingDetail': ['Frequency', 'TableID', 'TableName', 'Year'], 'MNE': ['DirectionOfInvestment', 'OwnershipLevel', 'NonbankAffiliatesOnly', 'Classification ', 'Country', 'Industry', 'Year', 'State', 'SeriesID', 'GetFootnotes', 'Investment', 'ParentInvestment'], 'FixedAssets': ['TableName', 'Year'], 'ITA': ['Indicator', 'AreaOrCountry', 'Frequency', 'Year'], 'IIP': ['TypeOfInvestment', 'Component', 'Frequency', 'Year'], 'InputOutput': ['TableID', 'Year'], 'IntlServTrade': ['TypeOfService', 'TradeDirection', 'Affiliation', 'AreaOrCountry', 'Year'], 'GDPbyIndustry': ['Frequency', 'Industry', 'TableID', 'Year'], 'Regional': ['GeoFips', 'LineCode', 'TableName', 'Year'], 'UnderlyingGDPbyIndustry': ['Frequency', 'Industry', 'TableID', 'Year']}
{'NIPA': {'Frequency': ['A', 'Q', 'M'], 'ShowMillions': ['Y', 'N'], 'TableID': ['T10101', 'T10102', 'T10103', 'T10104', 'T10105', 'T10106', 'T10107', 'T10108', 'T10109', 'T1011

[{'NIPA': {'T10101': 'http://apps.bea.gov/api/data?&UserID=F2205008-0F90-4F72-B77E-D7938D6188E6&ResultFormat=JSON&method=GETDATA&datasetname=NIPA&Frequency=A,Q,M&Year=X&ShowMillions=N&TableID=T80400&TableName=T10101', 'T10102': 'http://apps.bea.gov/api/data?&UserID=F2205008-0F90-4F72-B77E-D7938D6188E6&ResultFormat=JSON&method=GETDATA&datasetname=NIPA&Frequency=A,Q,M&Year=X&ShowMillions=N&TableID=T80400&TableName=T10102', 'T10103': 'http://apps.bea.gov/api/data?&UserID=F2205008-0F90-4F72-B77E-D7938D6188E6&ResultFormat=JSON&method=GETDATA&datasetname=NIPA&Frequency=A,Q,M&Year=X&ShowMillions=N&TableID=T80400&TableName=T10103', 'T10104': 'http://apps.bea.gov/api/data?&UserID=F2205008-0F90-4F72-B77E-D7938D6188E6&ResultFormat=JSON&method=GETDATA&datasetname=NIPA&Frequency=A,Q,M&Year=X&ShowMillions=N&TableID=T80400&TableName=T10104', 'T10105': 'http://apps.bea.gov/api/data?&UserID=F2205008-0F90-4F72-B77E-D7938D6188E6&ResultFormat=JSON&method=GETDATA&datasetname=NIPA&Frequency=A,Q,M&Year=X&Sho

In [21]:
from operator import itemgetter
from itertools import groupby

class DataProcessing:
    def __init__(self, url):
        self.address = url

    def dataRetrieval(self):
        req = requests.get(self.address)
        ret = req.json()
        return ret

    def parseData(self):
        data = self.dataRetrieval()
        seriescode = set()
        dvals = {}
        for x in range(len(data['BEAAPI']['Results']['Data'])):
            seriescode.add(str(data['BEAAPI']['Results']['Data'][x]['SeriesCode']))
        for code in seriescode:
            seriesList = []
            numList = []
            dvals[code] = {}
            for x in range(len(data['BEAAPI']['Results']['Data'])):
                if data['BEAAPI']['Results']['Data'][x]['SeriesCode'] == code:
                    seriesList.append((data['BEAAPI']['Results']['Data'][x]['DataValue'],data['BEAAPI']['Results']['Data'][x]['TimePeriod']))
                    dvals[code]['info'] = {'Description' : data['BEAAPI']['Results']['Data'][x]['LineDescription'], 
                                        'METRIC_NAME': data['BEAAPI']['Results']['Data'][x]['METRIC_NAME'], 
                                        'UNIT_MULT': data['BEAAPI']['Results']['Data'][x]['UNIT_MULT'],
                                        'CL UNIT' : data['BEAAPI']['Results']['Data'][x]['CL_UNIT']}
            dvals[code]['data'] = seriesList
        return dvals
    
    def frameData(self):
        d = self.parseData()
        dframes = {}
        dframes = pd.DataFrame({k: dict(map(reversed, v['data'])) for k, v in d.items()}).T
        return dframes
    
    def frameInfo(self):
        d = self.parseData()
        dframe = {}
        for key in d.keys():
            dframe[key] = d[key]['info']
        return dframe

In [None]:
tables = {}
for x in urls[0]['NIPA']:
    dset = DataProcessing(urls[0]['NIPA'][x])
    tables[x] = dset.frameData()

In [22]:
dset = DataProcessing(urls[0]['NIPA']['T10101'])
tableT1info = dset.frameInfo()

In [1]:
print(urls[0]['NIPA'])

NameError: name 'urls' is not defined

In [57]:
table = pd.DataFrame()
print(len(tableT1info.keys()))

25


In [64]:
notpct = set()
for key in tableT1info:
    for keyval in tableT1info[key]:
        if 'Fisher Quantity Index' not in tableT1info[key]['METRIC_NAME']:
            notpct.add(key)
            
print(notpct)

{'A191RP'}


In [None]:
nipadata = tables['T10101']
nipadata = nipadata.loc[:,nipadata.columns.str.contains('Q')].T.iloc[1:,:]

In [None]:
tnames = list(tables.keys())
for x in range(len(tables)):
    print(tables[tnames[x]])
    print(tnames[x])

In [None]:
listnames = pd.DataFrame(tnames).to_csv('listnames.csv')

In [6]:
def saveTables(data, tname):
    data = data.loc[:,data.columns.str.contains('Q')].T.iloc[1:,:]
    data.to_pickle(str(tname+'.pkl'))

tnames = list(tables.keys())
for x in range(len(tables)):
    saveTables(tables[tnames[x]],tnames[x])
    

In [None]:
nipadata.to_csv('tablet10101.csv')
spqs.to_csv('spq.csv')

In [None]:
##NIPA DATA (COULD BE ALL DATA)

def dataGrabber(url):
    req = requests.get(url)
    ret = req.json()
    return ret

def dataParser(data, table):
    dataVal = []
    yearVal = []
    for x in range(len(data['BEAAPI']['Results']['Data'])):
        if data['BEAAPI']['Results']['Data'][x]['SeriesCode'] == str(table):
            dataVal.append(data['BEAAPI']['Results']['Data'][x]['DataValue'])
            yearVal.append(data['BEAAPI']['Results']['Data'][x]['TimePeriod'])
            labelstr = str(data['BEAAPI']['Results']['Data'][x]['LineDescription'])
            linestr = str(data['BEAAPI']['Results']['Data'][x]['LineNumber'])
            colname = labelstr+linestr

            
            metstr = str(data['BEAAPI']['Results']['Data'][x]['METRIC_NAME'])
            unitmult = str(data['BEAAPI']['Results']['Data'][x]['UNIT_MULT'])
            clunit = str(data['BEAAPI']['Results']['Data'][x]['CL_UNIT'])
            tablename = str(data['BEAAPI']['Results']['Data'][x]['TableName'])
            seriescode = str(data['BEAAPI']['Results']['Data'][x]['SeriesCode'])
    try:
        df = pd.DataFrame(columns=[colname], index=pd.to_datetime(yearVal))
    except ValueError:
        newlist = []
        for x in yearVal:
            newlist.append(x.replace('M', ''))
        df = pd.DataFrame(columns=[colname], index=pd.to_datetime(newlist, format='%Y%m'))
    try:
        df[colname] = pd.to_numeric(dataVal)
    except ValueError:
        newvals = []
        for x in dataVal:
            newvals.append(x.replace(',',''))
        df[colname] = pd.to_numeric(newvals)
    df.attrs['metric'] = metstr
    df.attrs['multiplier'] = unitmult
    df.attrs['cl unit'] = clunit
    df.attrs['tablename'] = tablename
    df.attrs['seriescode'] = seriescode
    df.attrs['linenumber'] = linestr
    return df

def dataSelector(data):
    seriescode = set()
    dfs = dict()
    for x in range(len(data['BEAAPI']['Results']['Data'])):
        seriescode.add(str(data['BEAAPI']['Results']['Data'][x]['SeriesCode']))
    for code in seriescode:
        dfs[code] = dataParser(data, code)
        #dataPlotter(dataParser(data, code))
    return dfs

def dataPlotter(dframe):
    plt.xticks(rotation=45)
    plt.yticks(rotation=45)
    sns.lineplot(data=dframe, y=dframe.columns[0], x=dframe.index).set(title=dframe.columns[0])
    plt.show()

allDfs = dict()
for table in urls[0]['NIPA']:
    allDfs[str(table)] = dataSelector(dataGrabber(urls[0]['NIPA'][table]))

In [None]:
#for x in allDfs.keys():
#    print(x)
print(allDfs['T60300C'])

In [None]:
print(urls[0]['NIPA']['T60300C'])

In [None]:
#def dframeCreator(dataname):
#    dframeList = list(allDfs[dataname].keys())
#    df = allDfs[dataname][dframeList[0]]
#    for x in range(len(dframeList[1:])):
#        df = df.join(allDfs[dataname][dframeList[x+1]])
#    print(df.head)
#    return df

dfJoinedTables = {}
for x in allDfs.keys():
    dfJoinedTables[x] = dframeCreator(x)
    print(x)

In [None]:
print(dfJoinedTables.keys())

In [None]:
display(df1)

In [None]:
print(len(allDfs.keys()))

In [None]:
def dframeCreator(dataname):
    dframeList = list(allDfs[dataname].keys())
    df = allDfs[dataname][dframeList[0]]
    for x in range(len(dframeList[1:])):
        df = df.join(allDfs[dataname][dframeList[x+1]])
    return df

df1 = dframeCreator('noT60300C')
display(df1.tail())

In [None]:
for x in allDfs.keys():
    for y in allDfs[x]:
        print(x,y)
        print(len(allDfs[x][y]))

In [None]:
from itertools import groupby

things = [("animal", "bear"), ("animal", "duck"), ("plant", "cactus"), ("vehicle", "speed boat"), ("vehicle", "school bus")]

for key, group in groupby(things, lambda x: x[0]):
    print(group)
    print(key)
    for thing in group:
        print(thing[0])
        print("A %s is a %s." % (thing[1], key))
    print("")

In [None]:
dir(groupby())