In [1]:
import sqlite3
import pandas as pd
import json
import os
import matplotlib.pyplot as plt
from IPython.display import Markdown, display
import calendar


# Allow Markdown characters in the print statement
# this can make the output more print ready
def printmd(string):
    display(Markdown(string))

In [2]:
conn = sqlite3.connect("twitterTesla.db")
c = conn.cursor()

# Create tesla table to hold Elon Musk's tweets

sql_create = """
CREATE TABLE IF NOT EXISTS tweets (
    id_str TEXT PRIMARY KEY,
    created_at INTEGER,
    full_text TEXT
);
"""

# remove any data from a previous run
sql_delete = "DELETE FROM tweets"

try:
    c.execute(sql_create)
    c.execute(sql_delete)
    conn.commit()
finally:
    conn.rollback()
    c.close()
    conn.close()

In [3]:
conn = sqlite3.connect("twitterTesla.db")
c = conn.cursor()

# Create tesla table to hold stock prices

sql_create = """
CREATE TABLE IF NOT EXISTS tesla (
    id INTEGER PRIMARY KEY,
    date TEXT,
    open DECIMAL(6,2),
    close DECIMAL(6,2),
    high DECIMAL(6,2),
    low DECIMAL(6,2)
);
"""

# remove any data from a previous run
sql_delete = "DELETE FROM tesla"

try:
    c.execute(sql_create)
    c.execute(sql_delete)
    conn.commit()
finally:
    conn.rollback()
    c.close()
    conn.close()

In [4]:
# Sqlite db file will continute to grow in size after any deletes or drops
# This will optimize the database file and reduce its size
# You can also just remove the db file and recreate.
# http://www.sqlitetutorial.net/sqlite-vacuum/
conn = sqlite3.connect("twitterTesla.db")
conn.execute("VACUUM")

<sqlite3.Cursor at 0x11e9daea0>

In [5]:
conn = sqlite3.connect("twitterTesla.db")

# load stock into sqlite table
filepath = 'csv/tslaquotes.csv'

# read csv file
stock_df = pd.read_csv(filepath, parse_dates=['date'], usecols=['date', 'close', 'open', 'high', 'low'])

# drop any time values
stock_df['date'] = stock_df['date'].dt.date

# append the data to the 'tesla' table in the 'teslaTwitter.db'
stock_df.to_sql("tesla", conn, if_exists = "append", index=False)

In [6]:
conn = sqlite3.connect("twitterTesla.db")

# Pull out all of the json and put it into a tweets array to iterate through and create the data frame with
with open(os.path.join('json/', 'musktweets.json')) as json_file:
    tweets = []
    for line in json_file:
        tweets.append(json.loads(line))

tweet_df = pd.DataFrame(tweets, columns=['id_str','created_at','full_text'])

# Drop time values and change date format of %Y-%m-%d to match stock quotes
tweet_df['created_at'] = pd.to_datetime(tweet_df['created_at'], format='%a %b %d %H:%M:%S +0000 %Y', utc=True)
tweet_df['created_at'] = pd.DatetimeIndex(tweet_df.created_at).date

# append the data to the 'tweets' table in 'teslaTwitter.db'
tweet_df.to_sql("tweets", conn, if_exists="append", index=False)

In [7]:
conn = sqlite3.connect("twitterTesla.db")
c = conn.cursor()

total_count_sql = """
SELECT count(*) as total_count
FROM tweets
"""

total_count = c.execute(total_count_sql).fetchone()[0]

printmd(f"### Total Tweets: {total_count:,}")

### Total Tweets: 3,204