diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..599312a --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +countries_org-scraper \ No newline at end of file diff --git a/.idea/countries_org-scraper.iml b/.idea/countries_org-scraper.iml new file mode 100644 index 0000000..c956989 --- /dev/null +++ b/.idea/countries_org-scraper.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..d821048 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..8662aa9 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..a98155f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 0000000..922003b --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..072c3cc --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,209 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1415308284218 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/scraper.py b/scraper.py index d8a7c35..9947a63 100644 --- a/scraper.py +++ b/scraper.py @@ -21,3 +21,29 @@ # on Morph for Python (https://github.com/openaustralia/morph-docker-python/blob/master/pip_requirements.txt) and all that matters # is that your final data is written to an Sqlite database called data.sqlite in the current working directory which # has at least a table called data. +import scraperwiki +impot lxml.html + +html = scrperwiki,scrape("http://countrycode.org/") + +root = lxml.html.fromstring(html) + +i = 0 + +for tr in root.cssselect("#main_table_blue tbody tr") + i++ + tds = tr.select("td") + + iso = tds[1].text_content() + countryCode = tds[2].text_content() + + isoSplit = iso.split('/') + + data = { + 'name': tds[0[.text_content().strip(), + 'countryCode': int(countryCode), + 'countryCodeUnique': i, + 'ISO2': isoSplit[0].strip(), + 'ISO3': isoSplit[1].strip() + } +