diff --git a/archive.py b/archive.py deleted file mode 100755 index a2a453e..0000000 --- a/archive.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 - -from pyFCC.archive import get_attachment_urls, parse_fccid, load_next, fetch_and_pack#, errorFunction -from pyFCC.fccDB import create_product_table, populate_products -import sys - -if __name__ == '__main__': - # if len(sys.argv) in (2, 3): - # (appid, productid) = parse_fccid(*sys.argv[1:]) - if len(sys.argv) <2: - print("Usage: archive.py ") - sys.exit(1) - for fccid in sys.argv[1:]: - print("Looking up FCC id: %s" % fccid) - ###############call to function here - productData = load_next(fccid) - #appid, productid = parse_fccid(fccid) - #html_doc = lookup_fccid(appid, productid) - #productData = parse_search_results(html_doc) - - for key, value in productData.items(): - for x, row in enumerate(value, 1): - detail_url, ID, low, high = row - print("Fetching result %d" % x) - appid, productid = parse_fccid(ID) - print(appid, productid) - - attachments = get_attachment_urls(detail_url) - dirname = "%s/%s/%d" % (appid, productid, x) - fetch_and_pack(attachments, dirname, detail_url) - - create_product_table() - populate_products(productData) diff --git a/pyFCC/archive.py b/pyFCC/archive.py index 0c3aa10..992a64a 100644 --- a/pyFCC/archive.py +++ b/pyFCC/archive.py @@ -11,8 +11,8 @@ s = requests.Session() -# Perform FCC id search -def lookup_fccid(appid, productid, FromRec = 1): +# Perform FCC ID search +def lookup_fcc_id(app_id, product_id, FromRec = 1): payload = { "application_status" : "", "applicant_name" : "", @@ -51,44 +51,40 @@ def lookup_fccid(appid, productid, FromRec = 1): "calledFromFrame" : "Y", "comments" : "", "show_records" : "100", - #if soup.text = app_len: - productid = appid[app_len:] + productid - appid = appid[:app_len] - return (appid, productid) + if len(app_id) > app_len: + product_id = app_id[app_len:] + product_id + app_id = app_id[:app_len] + return (app_id, product_id) # Parsesearch results page to find "Detail" link def parse_search_results(html, tupIDdict): soup = BeautifulSoup(html, "html.parser") - #print(html.prettify()) rs_tables = soup("table", id="rsTable") if len(rs_tables) != 1: raise Exception("Error, found %d results tables" % len(rs_tables)) - #print("Found %d results" % len(links)) - rows = rs_tables[0].find_all("tr") # get all of the rows in the table - return_value = [] - #tupIDdict = {} + + rows = rs_tables[0].find_all("tr") for row in rows: links = row.find_all("a", href=re.compile("/oetcf/eas/reports/ViewExhibitReport.cfm\?mode=Exhibits")) @@ -96,47 +92,40 @@ def parse_search_results(html, tupIDdict): if len(links) == 0: continue cols = row.find_all("td") - lot = (links[0], cols[11].get_text().strip(), cols[14].get_text().strip(), cols[15].get_text().strip()) - return_value.append(lot) - FullfccID = lot[1] - if FullfccID not in tupIDdict: - tupIDdict[FullfccID] = [] - tupIDdict[FullfccID].append(lot) - print(lot) - - # this line happens in main - #appid, productid = parse_fccid(FullfccID) + ID = cols[11].get_text().strip() + grantee_code, product_code = parse_fcc_id(ID) + + #links[0] = url, cols[11] = full ID, cols[14] = low_freq, cols[15] = high_freq + product_info = { + 'grantee_code': grantee_code, + 'product_code': product_code, + 'url': links[0], + 'ID': cols[11].get_text().strip(), + 'low_freq': cols[14].get_text().strip(), + 'high_freq': cols[15].get_text().strip(), + } + + if ID not in tupIDdict: + tupIDdict[ID] = [] + product_info['version'] = len(tupIDdict[ID]) + 1 + tupIDdict[ID].append(product_info) print("Detail link found") i = soup.find_all("input", value = "Show Next 100 Rows") - #print(i) - - #PrivErr = soup.find_all(href=re.compile('div class="red-content" align="center"')) - - return tupIDdict, len(i)!=0#, len(PrivErr)!=0 -#def errorFunction(PrivErr): - #if(len(PrivErr)!=0): - #print("This information is private and cannot be viewed") - #else: - #print("Public info: function working") + return tupIDdict, len(i)!=0 # Request details page def get_attachment_urls(detail_url): r = s.get(fcc_url + detail_url) soup = BeautifulSoup(r.text, "html.parser") - - - #PrivErr = soup.find_all(href=re.compile('There are no attachments for public review associated with this application')) rs_tables = soup("table", id="rsTable") - #print(PrivErr) + if len(rs_tables) == 0: print("No results available") return [] if len(rs_tables) != 1: raise Exception("Error, found %d results tables" % len(rs_tables)) - #else: - #print("") a_tags = rs_tables[0].find_all("a", href=re.compile("/eas/GetApplicationAttachment.html")) links = [(tag.string, tag['href']) for tag in a_tags] @@ -146,33 +135,30 @@ def get_attachment_urls(detail_url): # Fetch files and pack in to archive -def fetch_and_pack(attachments, dirname, referer): - os.makedirs(dirname) +def fetch_and_pack(attachments, dir_name, referer): + os.makedirs(dir_name) for (name, url) in attachments: print("Fetching %s" % name) r = s.get(fcc_url + url, headers=dict(Referer=referer)) + extension = r.headers['content-type'].split('/')[-1] filename = name + '.' + extension print("Writing %s" % filename) - #print(lot[1] for lot in referer) - ####change folder name here - - with open(dirname + '/' + filename, 'wb') as handle: + with open(dir_name + '/' + filename, 'wb') as handle: for chunk in r.iter_content(): handle.write(chunk) -def load_next(fccid): - appid, productid = parse_fccid(fccid) - html_doc = lookup_fccid(appid, productid) - #print(html_doc) +def load_next(fcc_id): + app_id, product_id = parse_fcc_id(fcc_id) + html_doc = lookup_fcc_id(app_id, product_id) productData = {} - productData, morePages = parse_search_results(html_doc, productData) #error after more + productData, morePages = parse_search_results(html_doc, productData) FromRec = 101 while morePages: - #print("looping") - html_doc = lookup_fccid(appid, productid, FromRec) + html_doc = lookup_fcc_id(app_id, product_id, FromRec) productData, morePages = parse_search_results(html_doc, productData) FromRec += 100 - #print("here") print(len(productData)) + return productData + diff --git a/pyFCC/fccDB.py b/pyFCC/fccDB.py index a6c9a27..1ffa9b4 100644 --- a/pyFCC/fccDB.py +++ b/pyFCC/fccDB.py @@ -1,5 +1,5 @@ import sqlite3 -from pyFCC.archive import parse_fccid +from pyFCC.archive import parse_fcc_id # creates a sqlite database for use with grantee data def create_grantee_table(): @@ -35,33 +35,31 @@ def create_product_table(): high_freq text, low_freq text, version text, - UNIQUE(grantee_code, product_code, version))''') #version doesn't currently have anything + UNIQUE(grantee_code, product_code, version))''') conn.commit() c.close() print("Product table created in FCC.db") # populates an existing database table with grantee data -def populate_grantees(granteeTest): +def populate_grantees(grantee_test): conn = sqlite3.connect('FCC.db') c = conn.cursor() - c.executemany('INSERT INTO grantees VALUES (?,?,?,?,?,?,?,?,?,?)', granteeTest) + c.executemany('INSERT INTO grantees VALUES (?,?,?,?,?,?,?,?,?,?)', grantee_test) conn.commit() c.close() print("Grantee Table populated in FCC.db") # populates an existing database table with product data -def populate_products(productsTest): - productList = [] - for key, value in productsTest.items(): - for version, row in enumerate(value, 1): - detail_url, ID, low, high = row - appid, productid = parse_fccid(ID) - row = (appid, productid, detail_url, high, low, version) - productList.append(row) +def populate_products(product_test): + product_list = [] + for key, value in product_test.items(): + for row in value: + dbValues = (row['grantee_code'], row['product_code'], row['url'], row['low_freq'], row['high_freq'], row['version']) + product_list.append(dbValues) conn = sqlite3.connect('FCC.db') c = conn.cursor() - c.executemany('INSERT OR IGNORE INTO products VALUES (?,?,?,?,?,?)', productList) + c.executemany('INSERT OR IGNORE INTO products VALUES (?,?,?,?,?,?)', product_list) conn.commit() c.close() print("Product Table populated in FCC.db") diff --git a/update_archive.py b/update_archive.py new file mode 100755 index 0000000..ca3f83e --- /dev/null +++ b/update_archive.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +from pyFCC.archive import get_attachment_urls, parse_fcc_id, load_next, fetch_and_pack#, errorFunction +from pyFCC.fccDB import create_product_table, populate_products +import sys + +if __name__ == '__main__': + if len(sys.argv) <2: + print("Usage: archive.py ") + sys.exit(1) + for fcc_id in sys.argv[1:]: + print("Looking up FCC id: %s" % fcc_id) + product_data = load_next(fcc_id) + + for key, value in product_data.items(): + for row in value: + print("Fetching result %d" % row['version']) + attachments = get_attachment_urls(row['url']) + + if attachments: + dir_name = "%s/%s/%d" % (row['grantee_code'], row['product_code'], row['version']) + fetch_and_pack(attachments, dir_name, row['url']) + + create_product_table() + populate_products(product_data) + diff --git a/update_grantees.py b/update_grantees.py index 09858f8..93879cb 100755 --- a/update_grantees.py +++ b/update_grantees.py @@ -15,7 +15,7 @@ try: grantees = parse_grantees() except FileNotFoundError: - print("No local xml file found.") + print("No local xml file found") print("Use '--help' for help") print("Downloading grantee data...") fetch_grantees_xml()