## Tensorflow Hub

Tensorflow Hub provides reusable components (modules). These are tensorflow graphs reusable across other similar tasks

[ Sentiment Analysis on the IMDB dataset](http://ai.stanford.edu/~amaas/data/sentiment/)

In [6]:
import tensorflow as tf
import numpy as np

import matplotlib.pyplot as plt

import re
import os

In [5]:
import seaborn

ModuleNotFoundError: No module named 'seaborn'

In [2]:
# constants

n_classes = 2 # Positive, Negative
hidden_units = [500, 100]
lr = 1e-4

#### Acquire and preprocess the review data

In [15]:
dir_name = 'imdb_set'

def fetch_data():
    """
        Downloads the required data files
    """
    data_path = tf.keras.utils.get_file(
        fname=dir_name,
        origin='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz',
        extract=True
    )

In [None]:
# Download the data

d_path = fetch_data()

In [None]:
# Get train and test data frames

train_frame = load_data(os.path.join(d_path, dir_name, 'train'))
test_frame = load_data(os.path.join(d_path, dir_name, 'test'))

In [12]:
# File naming format: file_sentiment e.g xx_3.txt -> sentiment: 2

def create_frame(dir_path):
    """
        Creates a DataFrame from the loaded files in
        the given directory path
    """
    data = {'description': [],
           'sentiment': []
           }
    pattern = '\d+_(\d+)\.txt'
    pattern = re.compile(pattern)
    
    for file in os.listdir(dir_path):
        with tf.io.gfile.GFile(file) as f:
            data.get('description').append(f.read())
            data.get('sentiment').append(pattern.match(file)).group(1)
    
    return pd.DataFrame.from_dict(data)

In [14]:
def load_data(data_path):
    """
        Gets the positive and negative data samples
        and concatenates them
    """
    
    positive_frame = create_frame(os.path.join(data_path, 'pos'))
    neg_frame = create_frame(os.path.join(data_path, 'neg'))
    
    # Add polarities
    positive_frame['polarity'] = 1
    neg_frame['polarity'] = 0
    
    frame = pd.concat([positive_frame, neg_frame])
    
    # Shuffle and insert new numerical index
    return frame.sample(frac=1).reset_index(drop=True)