In [1]:
%%capture
# Install cron, twint, & update fastai
!apt install  -qq cron
!pip install -Uqq fastai
!git clone --depth=1 https://github.com/twintproject/twint.git
!cd /content/twint && pip install -qq . -r requirements.txt
!pip install aiohttp==3.7.0
!rm -r twint

In [2]:
# Write the directory we will store the CSVs
!mkdir csvs

In [3]:
%%writefile scrape_tweets.py
#!/usr/bin/env python
import twint
import nest_asyncio
from datetime import datetime
import pandas as pd

nest_asyncio.apply()

c = twint.Config()
c.Limit = 10
c.Lang = "en"
c.Store_csv = True
c.Search = "apple"
c.Output = f"/content/csvs/en_apple-{datetime.now()}.csv"
twint.run.Search(c)

In [4]:
# Start crontab
!service cron start

In [9]:
%%writefile crontab.e
* * * * * python3 /content/scrape_tweets.py >> /content/cron.log 2>&1



In [10]:
!crontab crontab.e

In [11]:
!crontab -l

In [12]:
!sleep 120
!ls csvs

In [None]:
!tail -n 10 /content/cron.log

# Exercise 20.1

### Train and save your classifier here

Use this section to train a RNN to classify text by sentiment using a dataset of your choice.
We don't particularly care about the predictive performance since accuracy isn't the goal of this lesson, but it should perform better than random chance.

<!-- startquestion -->

In [None]:
# Create datasets/dataloaders

In [None]:
# Train or fine-tune a model

In [None]:
# Save the model

# Exercise 20.2

### Write your inference Python file in the cell below

The cell below writes out a python file that should be executed every minute after tweets are collected.
This script should:

* Load the traine model from Exercise 20.1
* Load the most recent CSV of tweets into a DataFrame
* Make inferences on the tweets in the data from the previous step, storing results in a `"sentiment"` column
* Count the negative and positive tweets for the log
* Write the DataFrame with the additional column back to the original CSV

Once this .py file is complete, we will update our crontab so that this script runs every minute right after tweets are scraped.

<!-- startquestion -->

In [None]:
%%writefile inference.py
# Imports
import pandas as pd
from datetime import datetime

def load_model():
    """
    Loads and returns a trained model.
    """
    raise NotImplementedError()

def load_most_recent_csv():
    """
    Loads and returns the most recent CSV of tweet data for inference, 
    and the path to that CSV.
    """
    raise NotImplementedError()

def add_sentiment_inference_column(data, model):
    """
    Uses a model inferences to add a 'sentiment' column to the data
    """
    raise NotImplementedError()

def count_neg_pos_tweets(data):
    """
    Returns the number of negative and number of positive tweets
    """
    raise NotImplementedError()

def write_results(data, path):
    """
    Overwrites the CSV file from the dataframe 
    that includes the inference results.
    """

def main():
    # Load the trained model
    print(f'{datetime.now()}: Loading model...')
    model = load_model()

    # Load the most recent CSV
    print(f'{datetime.now()}: Loading CSV...')
    data, path = load_most_recent_csv()

    # Write predictions to a new column called "sentiment"
    print(f'{datetime.now()}: Making inferences...')
    data_with_inferences = add_sentiment_inference_column(data, model)

    # Print a statement with the number of negative tweets found for our logs
    n_negative_tweets, n_positive_tweets = count_neg_pos_tweets(data_with_inferences)
    print(f'{datetime.now()}: We recorded {n_negative_tweets} negative tweets out of {n_total_tweets} tweets ')

    # Write out the CSV to back to the original file
    write_results(data, path)

if __name__ == "__main__":
    main()

In [None]:
%%writefile crontab.e
* * * * * python3 /content/scrape_tweets.py >> /content/cron.log 2>&1
* * * * * python3 /content/inference.py >> /content/inference.log 2>&1



In [None]:
!crontab crontab.e

In [None]:
!crontab -l

# Exercise 20.3

In this exercise, you will check the outputs of at least one CSV to understand if it's working as expected.
In the cells below, please:

* use the `!head` and `!tail` bash commands to inspect the most recent CSV. Does it contain the `sentiment` column?
* load the most recent CSV with `pandas`. Take a look at a few of the inferences. Did your model perform OK in a qualitative sense? Note that we shouldn't expect perfect performance if the training set wasn't tweets. 

<!-- startquestion -->

In [None]:
# Your work here