# Natural Language Processing (NLP) Process

This notebook will outline the process of NLP techniques for analysis on text data.

In [7]:
#Import the relevand section of the NLP library
from nltk import word_tokenize
from nltk import sent_tokenize
import nltk
nltk.download('punkt')

from mpl_toolkits.mplot3d import Axes3D
from sklearn.manifold import TSNE

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

[nltk_data] Downloading package punkt to /Users/Alex/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [8]:
# I will be using the press release of the iPad Air from Apple
# https://www.apple.com/newsroom/2019/03/all-new-ipad-air-and-ipad-mini-deliver-dramatic-power-and-capability/

title = "All-new iPad Air and iPad mini deliver dramatic power and capability"
first_sentence = "Apple today introduced the all-new iPad Air in an ultra-thin 10.5-inch design, offering the latest innovations including Apple Pencil support and high-end performance at a breakthrough price."
second_sentence = "With the A12 Bionic chip with Apple’s Neural Engine, the new iPad Air delivers a 70 percent boost in performance and twice the graphics capability, and the advanced Retina display with True Tone technology is nearly 20 percent larger with over half a million more pixels."
third_sentence = "Apple today also introduced the new 7.9-inch iPad mini, a major upgrade for iPad mini fans who love a compact, ultra-portable design packed with the latest technology."
fourth_sentence = "With the A12 Bionic chip, the new iPad mini is a powerful multi-tasking machine, delivering three times the performance and nine times faster graphics."
fifth_sentence = "The advanced Retina display with True Tone technology and wide color support is 25 percent brighter and has the highest pixel density of any iPad, delivering an immersive visual experience in any setting."
sixth_sentence = "And with Apple Pencil support, the new iPad mini is the perfect take-anywhere notepad for sketching and jotting down thoughts on the go."
seventh_sentence = "The new iPads are available to order starting today and in stores next week."

## Cleaning The Text Data

In [16]:
def clean_sentence(sentence):
    cleaned_sentence = []
    for line in sentence:
        if not '[' in line and  not ']' in line:
            for symbol in ",.?!'-":
                line = line.replace(symbol, '').lower()
            cleaned_sentence.append(line)

    return cleaned_sentence

In [17]:
clean_sentence(first_sentence)

['a',
 'p',
 'p',
 'l',
 'e',
 ' ',
 't',
 'o',
 'd',
 'a',
 'y',
 ' ',
 'i',
 'n',
 't',
 'r',
 'o',
 'd',
 'u',
 'c',
 'e',
 'd',
 ' ',
 't',
 'h',
 'e',
 ' ',
 'a',
 'l',
 'l',
 '',
 'n',
 'e',
 'w',
 ' ',
 'i',
 'p',
 'a',
 'd',
 ' ',
 'a',
 'i',
 'r',
 ' ',
 'i',
 'n',
 ' ',
 'a',
 'n',
 ' ',
 'u',
 'l',
 't',
 'r',
 'a',
 '',
 't',
 'h',
 'i',
 'n',
 ' ',
 '1',
 '0',
 '',
 '5',
 '',
 'i',
 'n',
 'c',
 'h',
 ' ',
 'd',
 'e',
 's',
 'i',
 'g',
 'n',
 '',
 ' ',
 'o',
 'f',
 'f',
 'e',
 'r',
 'i',
 'n',
 'g',
 ' ',
 't',
 'h',
 'e',
 ' ',
 'l',
 'a',
 't',
 'e',
 's',
 't',
 ' ',
 'i',
 'n',
 'n',
 'o',
 'v',
 'a',
 't',
 'i',
 'o',
 'n',
 's',
 ' ',
 'i',
 'n',
 'c',
 'l',
 'u',
 'd',
 'i',
 'n',
 'g',
 ' ',
 'a',
 'p',
 'p',
 'l',
 'e',
 ' ',
 'p',
 'e',
 'n',
 'c',
 'i',
 'l',
 ' ',
 's',
 'u',
 'p',
 'p',
 'o',
 'r',
 't',
 ' ',
 'a',
 'n',
 'd',
 ' ',
 'h',
 'i',
 'g',
 'h',
 '',
 'e',
 'n',
 'd',
 ' ',
 'p',
 'e',
 'r',
 'f',
 'o',
 'r',
 'm',
 'a',
 'n',
 'c',
 'e',
 ' ',
 'a'