In [80]:
import nltk
import os
import pandas as pd
import warnings
import re
warnings.filterwarnings('ignore')

from nltk.probability import FreqDist
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfTransformer

# Python Console Commands to set up
# import nltk
# nltk.download('punkt')
# nltk.download('stopwords')
# nltk.download('averaged_perceptron_tagger')

In [81]:
articlePath = (os.getcwd() + '/Articles/Article_Info.csv')
articles = pd.read_csv(articlePath)
articles.head()

Unnamed: 0,URL,Title,Author,Date_Published,Time_Published,Article
0,https://www.ibtimes.com/tesla-news-can-model-3...,Tesla News: Can Model 3 Deliveries Keep Rising?,Daniel Sparks,06/29/19,1:11 AM,"[""Next week,\xa0Tesla\xa0(NASDAQ:TSLA) investo..."
1,https://www.ibtimes.com/how-elon-musk-will-cel...,This Is How Elon Musk Will Celebrate His 48th ...,Dawn Geske,06/28/19,12:09 PM,['As Elon Musk celebrates his 48th birthday on...
2,https://www.ibtimes.com/spacex-ceo-elon-musk-m...,SpaceX CEO Elon Musk Mocked For Tweet Mixing U...,Wesley Dockery,06/24/19,1:36 PM,['Tesla (TSLA) and SpaceX CEO Elon Musk was sl...
3,https://www.ibtimes.com/tesla-news-ceo-elon-mu...,Tesla News: CEO Elon Musk Said He Deleted His ...,Wesley Dockery,06/17/19,12:19 PM,['Tesla (TSLA) CEO Elon Musk on Monday morning...
4,https://www.ibtimes.com/where-will-tesla-be-1-...,Where Will Tesla Be In 1 Year?,John Bromels,06/16/19,10:14 PM,['To hear\xa0Tesla\xa0(NASDAQ:TSLA)\xa0CEO Elo...


In [82]:
articles['Date_Published']= pd.to_datetime(articles['Date_Published'])
articles = articles.sort_values(by='Date_Published') 

In [83]:
articles.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 567 entries, 566 to 0
Data columns (total 6 columns):
URL               567 non-null object
Title             567 non-null object
Author            567 non-null object
Date_Published    567 non-null datetime64[ns]
Time_Published    567 non-null object
Article           567 non-null object
dtypes: datetime64[ns](1), object(5)
memory usage: 31.0+ KB


In [84]:
articles.head()

Unnamed: 0,URL,Title,Author,Date_Published,Time_Published,Article
566,https://www.ibtimes.com/tesla-raise-185-millio...,"Tesla to raise $185 million from IPO, private ...",IBT Staff Reporter,2010-06-16,2:47 AM,"[""The company said the IPO of 11.1 million sha..."
565,https://www.ibtimes.com/tesla-increases-size-i...,Tesla increases size of IPO by 20 percent,IBT Staff Reporter,2010-06-28,2:56 PM,"[""Electric carmaker Tesla Motors raised the nu..."
564,https://www.ibtimes.com/top-pre-market-nasdaq-...,"Top Pre-Market NASDAQ Gainers (MGIC, TSLA, DEC...",Balasubramanyam Seshan,2010-11-24,1:55 PM,['The top pre-market NASDAQ stock market gaine...
563,https://www.ibtimes.com/top-pre-market-nasdaq-...,"Top Pre-Market NASDAQ Losers (SBAY, ONTY, ASEI...",Balasubramanyam Seshan,2010-12-23,2:03 PM,"[""The top pre-market NASDAQ stock market loser..."
561,https://www.ibtimes.com/top-pre-market-nasdaq-...,"Top Pre-Market NASDAQ Gainers (SNTS, MNKD, LOG...",Balasubramanyam Seshan,2010-12-28,1:23 PM,['The top pre-market NASDAQ stock market gaine...


In [85]:
def replace_contractions(word):
    contractions = { 
        "ain't": "am not",
        "aren't": "are not",
        "can't": "cannot",
        "can't've": "cannot have",
        "'cause": "because",
        "could've": "could have",
        "couldn't": "could not",
        "couldn't've": "could not have",
        "didn't": "did not",
        "doesn't": "does not",
        "don't": "do not",
        "hadn't": "had not",
        "hadn't've": "had not have",
        "hasn't": "has not",
        "haven't": "have not",
        "he'd": "he would",
        "he'd've": "he would have",
        "he'll": "he will",
        "he'll've": "he will have",
        "he's": "he is",
        "how'd": "how did",
        "how'd'y": "how do you",
        "how'll": "how will",
        "how's": "how has",
        "I'd": "I would",
        "I'd've": "I would have",
        "I'll": "I will",
        "I'll've": "I will have",
        "I'm": "I am",
        "I've": "I have",
        "isn't": "is not",
        "it'd": "it would",
        "it'd've": "it would have",
        "it'll": "it will",
        "it'll've": "it will have",
        "it's": "it is",
        "let's": "let us",
        "ma'am": "madam",
        "mayn't": "may not",
        "might've": "might have",
        "mightn't": "might not",
        "mightn't've": "might not have",
        "must've": "must have",
        "mustn't": "must not",
        "mustn't've": "must not have",
        "needn't": "need not",
        "needn't've": "need not have",
        "o'clock": "of the clock",
        "oughtn't": "ought not",
        "oughtn't've": "ought not have",
        "shan't": "shall not",
        "sha'n't": "shall not",
        "shan't've": "shall not have",
        "she'd": "she would",
        "she'd've": "she would have",
        "she'll": "she will",
        "she'll've": "she will have",
        "she's": "she is",
        "should've": "should have",
        "shouldn't": "should not",
        "shouldn't've": "should not have",
        "so've": "so have",
        "so's": "so is",
        "that'd": "that would",
        "that'd've": "that would have",
        "that's": "that is",
        "there'd": "there would",
        "there'd've": "there would have",
        "there's": "there is",
        "they'd": "they would",
        "they'd've": "they would have",
        "they'll": "they will",
        "they'll've": "they will have",
        "they're": "they are",
        "they've": "they have",
        "to've": "to have",
        "wasn't": "was not",
        "we'd": "we would",
        "we'd've": "we would have",
        "we'll": "we will",
        "we'll've": "we will have",
        "we're": "we are",
        "we've": "we have",
        "weren't": "were not",
        "what'll": "what will",
        "what'll've": "what will have",
        "what're": "what are",
        "what's": "what is",
        "what've": "what have",
        "when's": "when has",
        "when've": "when have",
        "where'd": "where did",
        "where's": "where has",
        "where've": "where have",
        "who'll": "who will",
        "who'll've": "who will have",
        "who's": "who has",
        "who've": "who have",
        "why's": "why has",
        "why've": "why have",
        "will've": "will have",
        "won't": "will not",
        "won't've": "will not have",
        "would've": "would have",
        "wouldn't": "would not",
        "wouldn't've": "would not have",
        "y'all": "you all",
        "y'all'd": "you all would",
        "y'all'd've": "you all would have",
        "y'all're": "you all are",
        "y'all've": "you all have",
        "you'd": "you would",
        "you'd've": "you would have",
        "you'll": "you will",
        "you'll've": "you will have",
        "you're": "you are",
        "you've": "you have"}
    if contractions[word]:
        return contractions[word]
    else:
        return word

In [86]:
def clean_articles(article):
    article = re.sub('\W+',' ', article)
    print(article)
    for word in article.split():
        #word = replace_contractions(word.lower())
        #print(word)
        pass
        
    lem = WordNetLemmatizer()
    ps = PorterStemmer()
    tfidf_transformer = TfidfTransformer()
    return None

In [87]:
for i in range(len(articles)):
    articles.iloc[i].Article = clean_articles(articles.iloc[i].Article)
    
    break

 The company said the IPO of 11 1 million shares will be priced at 14 to 16 per share Tesla becomes the first U S company to go public in more than half a century since Henry Ford s Ford Motor Co made its share debut in 1956 The IPO represents a landmark in the resurgence of electric car technology that most car makers until recently had dismissed as impractical Immediately after close of the IPO Toyota will purchase 50 million of Tesla shares at the IPO price the company said in a regulatory filing Toyota s move is expected to give the Japanese automaker a chance to repair its dented public image and vault the California start up on to the world stage The David and Goliath handshake highlights a changing dynamic in the auto industry where newcomers such as Tesla and BYD Co of China are challenging established players in the uncharted field of mass produced all electric vehicles Tesla aims to tap Toyota s expertise in mass production while Toyota wants to win back public support hurt b