In [25]:
%%writefile tracks_db1.py

import sqlite3

#To use the module, you must first create a Connection object that represents the database. The reason this is called a "connection" is that sometimes the database is stored on a separate "database server" from the server on which we are running our application
conn = sqlite3.connect('music.sqlite')
# Once you have a Connection, you can create a Cursor object and call its execute() method to perform SQL commands
cur = conn.cursor()

#  the SQL keywords in uppercase and the parts of the command that we are adding (such as the table and column names) will be shown in lowercase.
# The first SQL command removes the Tracks table from the database if it exists.no undo. every thing gets deleted
cur.execute('DROP TABLE IF EXISTS Tracks')
cur.execute('CREATE TABLE Tracks (title TEXT, plays INTEGER)')
#commit the changes
conn.commit()

#? is a placeholder much like %s or %d in C

cur.execute('INSERT INTO Tracks (title, plays) VALUES (?, ?)',
    ('Thunderstruck', 20))
cur.execute('INSERT INTO Tracks (title, plays) VALUES (?, ?)',
    ('My Way', 15))
conn.commit()

print('Tracks:')
cur.execute('SELECT title, plays FROM Tracks')
for row in cur:
     print(row)

cur.execute('DELETE FROM Tracks WHERE plays > 100')
conn.commit()


conn.close()

Overwriting tracks_db1.py


In [24]:
# %load tracks_db1.py

import sqlite3

#To use the module, you must first create a Connection object that represents the database. The reason this is called a "connection" is that sometimes the database is stored on a separate "database server" from the server on which we are running our application
conn = sqlite3.connect('music.sqlite')
# Once you have a Connection, you can create a Cursor object and call its execute() method to perform SQL commands
cur = conn.cursor()

#  the SQL keywords in uppercase and the parts of the command that we are adding (such as the table and column names) will be shown in lowercase.
# The first SQL command removes the Tracks table from the database if it exists.no undo. every thing gets deleted
cur.execute('DROP TABLE IF EXISTS Tracks')
cur.execute('CREATE TABLE Tracks (title TEXT, plays INTEGER)')
#commit the changes
conn.commit()

#? is a placeholder much like %s or %d in C

cur.execute('INSERT INTO Tracks (title, plays) VALUES (?, ?)',
    ('Thunderstruck', 20))
cur.execute('INSERT INTO Tracks (title, plays) VALUES (?, ?)',
    ('My Way', 15))
conn.commit()

print('Tracks:')
cur.execute('SELECT title, plays FROM Tracks')
for row in cur:
     print(row)

cur.execute('DELETE FROM Tracks WHERE plays > 100')
conn.commit()


conn.close()

Tracks:
('Thunderstruck', 20)
('My Way', 15)


In [28]:
conn = sqlite3.connect('music.sqlite')
cur = conn.cursor()

cur.execute("SELECT * FROM Tracks WHERE title = 'My Way'")
for row in cur:
    print(row)

('My Way', 15)


In [30]:
cur.execute("SELECT title,plays FROM Tracks ORDER BY title")
for row in cur:
    print(row)

('My Way', 15)
('Thunderstruck', 20)


In [40]:
cur.execute("DELETE FROM Tracks WHERE title = 'My Way'")

<sqlite3.Cursor at 0x2746f9b08f0>

In [38]:
cur.execute("INSERT INTO Tracks (title,plays) VALUES (?,?)",('My Way',10))
cur.execute("UPDATE Tracks SET plays = 16 WHERE title = 'My Way'")

<sqlite3.Cursor at 0x2746f9b08f0>

In [41]:
cur.execute("SELECT title,plays FROM Tracks ORDER BY title")
for row in cur:
    print(row)

('My Way', 16)
('Thunderstruck', 20)


## Spidering Twitter using a database


One of the problems of any kind of spidering program is that it needs to be able to be stopped and restarted many times and you do not want to lose the data that you have retrieved so far. You don't want to always restart your data retrieval at the very beginning so we want to store data as we retrieve it so our program can start back up and pick up where it left off.

twurl is a gem from ruby. we use [rython](https://pypi.org/project/rython/) to wrap around that gem and import it into python

In [6]:
# !pip install rython

In [5]:
# import rython
# ctx = rython.RubyContext(requires=["rubygems", "twurl"])
# but rython has syntax errors, oh man!

In [42]:
%%writefile twitter_spidering.py

from urllib.request import urlopen
import urllib.error
import twurl
import json
import sqlite3
import ssl

TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'

conn = sqlite3.connect('spider.sqlite')
cur = conn.cursor()

cur.execute('''
            CREATE TABLE IF NOT EXISTS Twitter
            (name TEXT, retrieved INTEGER, friends INTEGER)''')

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    acct = input('Enter a Twitter account, or quit: ')
    if (acct == 'quit'): break
    if (len(acct) < 1):
        cur.execute('SELECT name FROM Twitter WHERE retrieved = 0 LIMIT 1')
        try:
            acct = cur.fetchone()[0]
        except:
            print('No unretrieved Twitter accounts found')
            continue

    url = twurl.augment(TWITTER_URL, {'screen_name': acct, 'count': '5'})
    print('Retrieving', url)
    connection = urlopen(url, context=ctx)
    data = connection.read().decode()
    headers = dict(connection.getheaders())

    print('Remaining', headers['x-rate-limit-remaining'])
    js = json.loads(data)
    # Debugging
    # print json.dumps(js, indent=4)

    cur.execute('UPDATE Twitter SET retrieved=1 WHERE name = ?', (acct, ))

    countnew = 0
    countold = 0
    for u in js['users']:
        friend = u['screen_name']
        print(friend)
        cur.execute('SELECT friends FROM Twitter WHERE name = ? LIMIT 1',
                    (friend, ))
        try:
            count = cur.fetchone()[0]
            cur.execute('UPDATE Twitter SET friends = ? WHERE name = ?',
                        (count+1, friend))
            countold = countold + 1
        except:
            cur.execute('''INSERT INTO Twitter (name, retrieved, friends)
                        VALUES (?, 0, 1)''', (friend, ))
            countnew = countnew + 1
    print('New accounts=', countnew, ' revisited=', countold)
    conn.commit()

cur.close()

Writing twitter_spidering.py


In [56]:
# a python wrapper around twitter
# !pip install python-twitter
# gem install twurl

In [60]:
# %load twitter_spidering.py

from urllib.request import urlopen
import urllib.error
import twitter as twurl
import json
import sqlite3
import ssl

TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'

conn = sqlite3.connect('spider.sqlite')
cur = conn.cursor()

cur.execute('''
            CREATE TABLE IF NOT EXISTS Twitter
            (name TEXT, retrieved INTEGER, friends INTEGER)''')

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    acct = input('Enter a Twitter account, or quit: ')
    if (acct == 'quit'): break
    if (len(acct) < 1):
        cur.execute('SELECT name FROM Twitter WHERE retrieved = 0 LIMIT 1')
        try:
            acct = cur.fetchone()[0]
        except:
            print('No unretrieved Twitter accounts found')
            continue

    url = twurl.augment(TWITTER_URL, {'screen_name': acct, 'count': '5'})
    print('Retrieving', url)
    connection = urlopen(url, context=ctx)
    data = connection.read().decode()
    headers = dict(connection.getheaders())

    print('Remaining', headers['x-rate-limit-remaining'])
    js = json.loads(data)
    # Debugging
    # print json.dumps(js, indent=4)

    cur.execute('UPDATE Twitter SET retrieved=1 WHERE name = ?', (acct, ))

    countnew = 0
    countold = 0
    for u in js['users']:
        friend = u['screen_name']
        print(friend)
        cur.execute('SELECT friends FROM Twitter WHERE name = ? LIMIT 1',
                    (friend, ))
        try:
            count = cur.fetchone()[0]
            cur.execute('UPDATE Twitter SET friends = ? WHERE name = ?',
                        (count+1, friend))
            countold = countold + 1
        except:
            cur.execute('''INSERT INTO Twitter (name, retrieved, friends)
                        VALUES (?, 0, 1)''', (friend, ))
            countnew = countnew + 1
    print('New accounts=', countnew, ' revisited=', countold)
    conn.commit()

cur.close()

Enter a Twitter account, or quit: @sudheernaidu19


AttributeError: module 'twitter' has no attribute 'augment'