Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonchanhku committed Oct 16, 2022
1 parent 7b7d71a commit 9b4087f
Showing 1 changed file with 6 additions and 18 deletions.
24 changes: 6 additions & 18 deletions scraper.py
@@ -1,24 +1,12 @@
# This is a template for a Python scraper on morph.io (https://morph.io)
# including some code snippets below that you should find helpful

import scraperwiki
import lxml.html
import pandas as pd
import sqlite3

# Read in a page
#html = scraperwiki.scrape("http://foo.com")

# Find something on the page using css selectors
#root = lxml.html.fromstring(html)
#root.cssselect("div[align='left']")
df = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
columns=['letter', 'number', 'animal'])

# Write out to the sqlite database using scraperwiki library
scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})

# An arbitrary query against the database
scraperwiki.sql.select("* from data where 'name'='susan'")

# You don't have to do things with the ScraperWiki and lxml libraries.
# You can use whatever libraries you want: https://morph.io/documentation/python
# All that matters is that your final data is written to an SQLite database
# called "data.sqlite" in the current working directory which has at least a table
# called "data".
conn = sqlite3.connect('data.sqlite')
df.to_sql('data', conn, if_exists='replace')

0 comments on commit 9b4087f

Please sign in to comment.