In [1]:
import sqlite3
import pandas as pd
import json
import os
import matplotlib.pyplot as plt
from IPython.display import Markdown, display
import calendar


# Allow Markdown characters in the print statement
# this can make the output more print ready
def printmd(string):
    display(Markdown(string))

In [2]:
conn = sqlite3.connect("twitterTesla.db")
c = conn.cursor()

# Create tesla table to hold Elon Musk's tweets

sql_create = """
CREATE TABLE IF NOT EXISTS tweets (
    id_str TEXT PRIMARY KEY,
    created_at INTEGER,
    full_text TEXT
);
"""

# remove any data from a previous run
sql_delete = "DELETE FROM tweets"

try:
    c.execute(sql_create)
    c.execute(sql_delete)
    conn.commit()
finally:
    conn.rollback()
    c.close()
    conn.close()

In [3]:
conn = sqlite3.connect("twitterTesla.db")
c = conn.cursor()

# Create tesla table to hold stock prices

sql_create = """
CREATE TABLE IF NOT EXISTS tesla (
    id INTEGER PRIMARY KEY,
    date TEXT,
    open DECIMAL(6,2),
    close DECIMAL(6,2),
    high DECIMAL(6,2),
    low DECIMAL(6,2)
);
"""

# remove any data from a previous run
sql_delete = "DELETE FROM tesla"

try:
    c.execute(sql_create)
    c.execute(sql_delete)
    conn.commit()
finally:
    conn.rollback()
    c.close()
    conn.close()

In [4]:
# Sqlite db file will continute to grow in size after any deletes or drops
# This will optimize the database file and reduce its size
# You can also just remove the db file and recreate.
# http://www.sqlitetutorial.net/sqlite-vacuum/
conn = sqlite3.connect("twitterTesla.db")
conn.execute("VACUUM")

<sqlite3.Cursor at 0x11d3e3ea0>

In [5]:
conn = sqlite3.connect("twitterTesla.db")

# load stock into sqlite table
filepath = 'csv/tslaquotes.csv'

# read csv file
stock_df = pd.read_csv(filepath, parse_dates=['date'], usecols=['date', 'close', 'open', 'high', 'low'])

# drop any time values
stock_df['date'] = stock_df['date'].dt.date

# append the data to the 'tesla' table in the 'teslaTwitter.db'
stock_df.to_sql("tesla", conn, if_exists = "append", index=False)

In [6]:
conn = sqlite3.connect("twitterTesla.db")

# Pull out all of the json and put it into a tweets array to iterate through and create the data frame with
with open(os.path.join('json/', 'musktweets.json')) as json_file:
    tweets = []
    for line in json_file:
        tweets.append(json.loads(line))

tweet_df = pd.DataFrame(tweets, columns=['id_str','created_at','full_text'])

# Drop time values and change date format of %Y-%m-%d to match stock quotes
tweet_df['created_at'] = pd.to_datetime(tweet_df['created_at'], format='%a %b %d %H:%M:%S +0000 %Y', utc=True)
tweet_df['created_at'] = pd.DatetimeIndex(tweet_df.created_at).date

# append the data to the 'tweets' table in 'teslaTwitter.db'
tweet_df.to_sql("tweets", conn, if_exists="append", index=False)

In [11]:
conn = sqlite3.connect("twitterTesla.db")
c = conn.cursor()


# SQL STATEMENTS
total_count_sql = """
SELECT count(*) as total_count
FROM tweets
"""
tweet_date_span = """
SELECT MIN(created_at) as min_date, MAX(created_at) as max_date 
FROM tweets
"""
stock_date_span = """
SELECT MIN(date) as min_date, MAX(date) as max_date 
FROM tesla
"""
month_counts = """
SELECT strftime('%m', created_at) as month, strftime('%Y', created_at) as year, count(*) as month_count
FROM tweets
GROUP BY month, year
ORDER BY year, month
"""
# tweet_and_stock_sql = """
# SELECT strftime(created_at) as date, full_text
# FROM tweets t
#     RIGHT JOIN tesla a ON t.created_at = a.date
# GROUP BY date
# ORDER BY date
# """
full_outer_join = """
SELECT t.full_text,
        t.created_at,
        s.date,
        s.open,
        s.close
FROM tweets t
LEFT JOIN tesla s ON t.created_at = s.date
WHERE s.date IS NOT NULL AND t.created_at IS NOT NULL AND s.date="2018-08-07"
UNION ALL
SELECT t.full_text,
        t.created_at,
        s.date,
        s.open,
        s.close
FROM tesla s
LEFT JOIN tweets t ON t.created_at = s.date
WHERE s.date IS NOT NULL AND t.created_at IS NOT NULL AND s.date="2018-08-07";
"""
tweet_and_stock_sql = """
SELECT *
FROM tesla a inner join tweets t ON t.created_at = a.date
WHERE a.date = "2018-08-07"
GROUP BY date
ORDER BY date
"""

# EXECUTE THE QUERIES
total_count = c.execute(total_count_sql).fetchone()[0]
twitter_date_span = c.execute(tweet_date_span).fetchone()
tesla_date_span = c.execute(stock_date_span).fetchone()
month_count = c.execute(month_counts).fetchall()
combined = c.execute(tweet_and_stock_sql).fetchall()
full_outer_join_command = c.execute(full_outer_join).fetchall()



# FORMATTED PRINT STATEMENTS
printmd(f"### Total Tweets: {total_count:,}")
printmd(f"""
### Date Span of Tweets
- Earliest Tweet: {twitter_date_span[0]}
- Latest Tweet: {twitter_date_span[1]}
""")
printmd(f"""
### Date Span of Tesla Quotes
- Earliest Quote: {tesla_date_span[0]}
- Latest Quote: {tesla_date_span[1]}
""")
print_records_by_month = """### Tweets by Month
|   Month   |    Year   |  Tweets   |
|-----------|-----------|-----------|
"""
for row in month_count:
    print_records_by_month += f"| {row[0]} | {row[1]} |{row[2]:,} |\n"

printmd(print_records_by_month)

print(combined[0:5])
print(len(combined))
print("***************************")
print(full_outer_join_command[:5])
print(len(full_outer_join_command))


### Total Tweets: 3,204


### Date Span of Tweets
- Earliest Tweet: 2016-10-10
- Latest Tweet: 2018-09-11



### Date Span of Tesla Quotes
- Earliest Quote: 2015-10-22
- Latest Quote: 2018-11-07


### Tweets by Month
|   Month   |    Year   |  Tweets   |
|-----------|-----------|-----------|
| 10 | 2016 |62 |
| 11 | 2016 |82 |
| 12 | 2016 |50 |
| 01 | 2017 |107 |
| 02 | 2017 |135 |
| 03 | 2017 |117 |
| 04 | 2017 |46 |
| 05 | 2017 |131 |
| 06 | 2017 |214 |
| 07 | 2017 |133 |
| 08 | 2017 |138 |
| 09 | 2017 |55 |
| 10 | 2017 |92 |
| 11 | 2017 |43 |
| 12 | 2017 |102 |
| 01 | 2018 |57 |
| 02 | 2018 |101 |
| 03 | 2018 |122 |
| 04 | 2018 |93 |
| 05 | 2018 |419 |
| 06 | 2018 |399 |
| 07 | 2018 |309 |
| 08 | 2018 |168 |
| 09 | 2018 |29 |


[(55, '2018-08-07', 343.84, 379.57, 387.46, 339.1501, '1026914941004001280', '2018-08-07', 'Investor support is confirmed. Only reason why this is not certain is that it’s contingent on a shareholder vote. https://t.co/bIH4Td5fED')]
1
***************************
[('@EvotoRentals Yes', '2018-08-07', '2018-08-07', 343.84, 379.57), ('@FredericLambert I don’t have a controlling vote now &amp; wouldn’t expect any shareholder to have one if we go private. I won’t be selling in either scenario.', '2018-08-07', '2018-08-07', 343.84, 379.57), ('@FredericLambert No change', '2018-08-07', '2018-08-07', 343.84, 379.57), ('@Gfilche My hope is *all* current investors remain with Tesla even if we’re private. Would create special purpose fund enabling anyone to stay with Tesla. Already do this with Fidelity’s SpaceX investment.', '2018-08-07', '2018-08-07', 343.84, 379.57), ('@LizClaman @Tesla 420', '2018-08-07', '2018-08-07', 343.84, 379.57)]
30
