Skip to content

Commit

Permalink
Merge pull request #7 from GravesJake/master
Browse files Browse the repository at this point in the history
Store product data in DB
  • Loading branch information
dominicgs committed Jun 15, 2017
2 parents 3d48972 + 8328647 commit 5051e24
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 109 deletions.
33 changes: 0 additions & 33 deletions archive.py

This file was deleted.

110 changes: 48 additions & 62 deletions pyFCC/archive.py
Expand Up @@ -11,8 +11,8 @@

s = requests.Session()

# Perform FCC id search
def lookup_fccid(appid, productid, FromRec = 1):
# Perform FCC ID search
def lookup_fcc_id(app_id, product_id, FromRec = 1):
payload = {
"application_status" : "",
"applicant_name" : "",
Expand Down Expand Up @@ -51,92 +51,81 @@ def lookup_fccid(appid, productid, FromRec = 1):
"calledFromFrame" : "Y",
"comments" : "",
"show_records" : "100",
#if soup.text = <input class="button-content" name = "next_value" value="show Next 25 Rows"
"grantee_code" : appid,
"product_code" : productid,
"grantee_code" : app_id,
"product_code" : product_id,
"FromRec" : FromRec
}
r = s.post(fcc_url + product_search_url, data=payload)
print("FCC id lookup complete")
print("FCC ID lookup complete")
return r.text

# Try to format appid and productid correctly
def parse_fccid(appid=None, productid=None):
if appid is None:
# format app_id and product_id correctly
def parse_fcc_id(app_id=None, product_id=None):
if app_id is None:
return None
if productid is None:
productid = ''
if appid[0] in string.ascii_letters:
if product_id is None:
product_id = ''
if app_id[0] in string.ascii_letters:
app_len = 3
elif appid[0] in string.digits:
elif app_id[0] in string.digits:
app_len = 5
else:
return None

if len(appid) > app_len:
productid = appid[app_len:] + productid
appid = appid[:app_len]
return (appid, productid)
if len(app_id) > app_len:
product_id = app_id[app_len:] + product_id
app_id = app_id[:app_len]
return (app_id, product_id)

# Parsesearch results page to find "Detail" link
def parse_search_results(html, tupIDdict):
soup = BeautifulSoup(html, "html.parser")
#print(html.prettify())
rs_tables = soup("table", id="rsTable")
if len(rs_tables) != 1:
raise Exception("Error, found %d results tables" % len(rs_tables))
#print("Found %d results" % len(links))
rows = rs_tables[0].find_all("tr") # get all of the rows in the table
return_value = []
#tupIDdict = {}

rows = rs_tables[0].find_all("tr")

for row in rows:
links = row.find_all("a", href=re.compile("/oetcf/eas/reports/ViewExhibitReport.cfm\?mode=Exhibits"))
links = [link['href'] for link in links]
if len(links) == 0:
continue
cols = row.find_all("td")
lot = (links[0], cols[11].get_text().strip(), cols[14].get_text().strip(), cols[15].get_text().strip())
return_value.append(lot)
FullfccID = lot[1]
if FullfccID not in tupIDdict:
tupIDdict[FullfccID] = []
tupIDdict[FullfccID].append(lot)
print(lot)

# this line happens in main
#appid, productid = parse_fccid(FullfccID)
ID = cols[11].get_text().strip()
grantee_code, product_code = parse_fcc_id(ID)

#links[0] = url, cols[11] = full ID, cols[14] = low_freq, cols[15] = high_freq
product_info = {
'grantee_code': grantee_code,
'product_code': product_code,
'url': links[0],
'ID': cols[11].get_text().strip(),
'low_freq': cols[14].get_text().strip(),
'high_freq': cols[15].get_text().strip(),
}

if ID not in tupIDdict:
tupIDdict[ID] = []
product_info['version'] = len(tupIDdict[ID]) + 1
tupIDdict[ID].append(product_info)

print("Detail link found")
i = soup.find_all("input", value = "Show Next 100 Rows")
#print(i)

#PrivErr = soup.find_all(href=re.compile('div class="red-content" align="center"'))

return tupIDdict, len(i)!=0#, len(PrivErr)!=0

#def errorFunction(PrivErr):
#if(len(PrivErr)!=0):
#print("This information is private and cannot be viewed")
#else:
#print("Public info: function working")
return tupIDdict, len(i)!=0

# Request details page
def get_attachment_urls(detail_url):
r = s.get(fcc_url + detail_url)
soup = BeautifulSoup(r.text, "html.parser")


#PrivErr = soup.find_all(href=re.compile('There are no attachments for public review associated with this application'))
rs_tables = soup("table", id="rsTable")
#print(PrivErr)

if len(rs_tables) == 0:
print("No results available")
return []
if len(rs_tables) != 1:
raise Exception("Error, found %d results tables" % len(rs_tables))
#else:
#print("")

a_tags = rs_tables[0].find_all("a", href=re.compile("/eas/GetApplicationAttachment.html"))
links = [(tag.string, tag['href']) for tag in a_tags]
Expand All @@ -146,33 +135,30 @@ def get_attachment_urls(detail_url):


# Fetch files and pack in to archive
def fetch_and_pack(attachments, dirname, referer):
os.makedirs(dirname)
def fetch_and_pack(attachments, dir_name, referer):
os.makedirs(dir_name)
for (name, url) in attachments:
print("Fetching %s" % name)
r = s.get(fcc_url + url, headers=dict(Referer=referer))

extension = r.headers['content-type'].split('/')[-1]
filename = name + '.' + extension
print("Writing %s" % filename)
#print(lot[1] for lot in referer)
####change folder name here

with open(dirname + '/' + filename, 'wb') as handle:
with open(dir_name + '/' + filename, 'wb') as handle:
for chunk in r.iter_content():
handle.write(chunk)

def load_next(fccid):
appid, productid = parse_fccid(fccid)
html_doc = lookup_fccid(appid, productid)
#print(html_doc)
def load_next(fcc_id):
app_id, product_id = parse_fcc_id(fcc_id)
html_doc = lookup_fcc_id(app_id, product_id)
productData = {}
productData, morePages = parse_search_results(html_doc, productData) #error after more
productData, morePages = parse_search_results(html_doc, productData)
FromRec = 101
while morePages:
#print("looping")
html_doc = lookup_fccid(appid, productid, FromRec)
html_doc = lookup_fcc_id(app_id, product_id, FromRec)
productData, morePages = parse_search_results(html_doc, productData)
FromRec += 100
#print("here")
print(len(productData))

return productData

24 changes: 11 additions & 13 deletions pyFCC/fccDB.py
@@ -1,5 +1,5 @@
import sqlite3
from pyFCC.archive import parse_fccid
from pyFCC.archive import parse_fcc_id

# creates a sqlite database for use with grantee data
def create_grantee_table():
Expand Down Expand Up @@ -35,33 +35,31 @@ def create_product_table():
high_freq text,
low_freq text,
version text,
UNIQUE(grantee_code, product_code, version))''') #version doesn't currently have anything
UNIQUE(grantee_code, product_code, version))''')
conn.commit()
c.close()
print("Product table created in FCC.db")

# populates an existing database table with grantee data
def populate_grantees(granteeTest):
def populate_grantees(grantee_test):
conn = sqlite3.connect('FCC.db')
c = conn.cursor()
c.executemany('INSERT INTO grantees VALUES (?,?,?,?,?,?,?,?,?,?)', granteeTest)
c.executemany('INSERT INTO grantees VALUES (?,?,?,?,?,?,?,?,?,?)', grantee_test)
conn.commit()
c.close()
print("Grantee Table populated in FCC.db")

# populates an existing database table with product data
def populate_products(productsTest):
productList = []
for key, value in productsTest.items():
for version, row in enumerate(value, 1):
detail_url, ID, low, high = row
appid, productid = parse_fccid(ID)
row = (appid, productid, detail_url, high, low, version)
productList.append(row)
def populate_products(product_test):
product_list = []
for key, value in product_test.items():
for row in value:
dbValues = (row['grantee_code'], row['product_code'], row['url'], row['low_freq'], row['high_freq'], row['version'])
product_list.append(dbValues)

conn = sqlite3.connect('FCC.db')
c = conn.cursor()
c.executemany('INSERT OR IGNORE INTO products VALUES (?,?,?,?,?,?)', productList)
c.executemany('INSERT OR IGNORE INTO products VALUES (?,?,?,?,?,?)', product_list)
conn.commit()
c.close()
print("Product Table populated in FCC.db")
Expand Down
26 changes: 26 additions & 0 deletions update_archive.py
@@ -0,0 +1,26 @@
#!/usr/bin/env python3

from pyFCC.archive import get_attachment_urls, parse_fcc_id, load_next, fetch_and_pack#, errorFunction
from pyFCC.fccDB import create_product_table, populate_products
import sys

if __name__ == '__main__':
if len(sys.argv) <2:
print("Usage: archive.py <FCC id>")
sys.exit(1)
for fcc_id in sys.argv[1:]:
print("Looking up FCC id: %s" % fcc_id)
product_data = load_next(fcc_id)

for key, value in product_data.items():
for row in value:
print("Fetching result %d" % row['version'])
attachments = get_attachment_urls(row['url'])

if attachments:
dir_name = "%s/%s/%d" % (row['grantee_code'], row['product_code'], row['version'])
fetch_and_pack(attachments, dir_name, row['url'])

create_product_table()
populate_products(product_data)

2 changes: 1 addition & 1 deletion update_grantees.py
Expand Up @@ -15,7 +15,7 @@
try:
grantees = parse_grantees()
except FileNotFoundError:
print("No local xml file found.")
print("No local xml file found")
print("Use '--help' for help")
print("Downloading grantee data...")
fetch_grantees_xml()
Expand Down

0 comments on commit 5051e24

Please sign in to comment.