# Installation Instructions

Run the below code to confirm that all packages are installed for each lesson and homework

##### Author: Alex Sherman | alsherman@deloitte.com

### Lesson 1 - Configuration

In [None]:
from IPython.display import Image
from IPython.core.display import display, HTML
import configparser
from configparser import ConfigParser, ExtendedInterpolation

### Lesson 2 - Automation

In [None]:
from IPython.display import Image
from IPython.core.display import display, HTML
from configparser import ConfigParser, ExtendedInterpolation
import requests
from bs4 import BeautifulSoup
from urllib import robotparser

#### Bonus Material Selenium - Web Browser Automation

Installation
Download ChromeDriver:https://chromedriver.storage.googleapis.com/index.html?path=2.37/

In [None]:
In Git Bash Type the following:

conda install -c conda-forge selenium
conda install -c conda-forge googlemaps

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import json
import googlemaps

#### Scrappy
import scrapy

In [None]:
Run the following from Git Bash:
scrapy runspider annual_reports_spider.py -o annual_reports.json

### Lesson 2 Homework - Automation

In [None]:
from configparser import ConfigParser, ExtendedInterpolation
import os
import time
import requests
from bs4 import BeautifulSoup
from IPython.core.display import display, HTML
from urllib import robotparser

### Lesson 3 - SQLAlchemy

##### Required Installation
In Git Bash, type the following to install package(s) for the lesson:
- pip install mysqlclient

In [None]:
import os
import pandas as pd
import datetime as dt
import zipfile
from IPython.core.display import display, HTML
from configparser import ConfigParser, ExtendedInterpolation
from sqlalchemy import create_engine
from sqlalchemy import func
from sqlalchemy import Column, Text, Integer
from sqlalchemy.sql import text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.engine import reflection
from sqlalchemy.ext.declarative import declarative_base

# Installation Sources:
# pandas: conda install -c anaconda pandas
# sqlalchemy: conda install -c anaconda sqlalchemy 

### Lesson 3 Homework - SQLAlchemy

In [None]:
import pandas as pd
from configparser import ConfigParser, ExtendedInterpolation
from sqlalchemy import Column, Text, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

### Lesson 4 - Text Extraction

##### Required Installation
In Git Bash, type the following to install package(s) for the lesson:
- conda install -c conda-forge pdfminer.six

In [None]:
import os
import subprocess
from IPython.display import Image
from IPython.core.display import display, HTML
from configparser import ConfigParser, ExtendedInterpolation
import docx
from bs4 import BeautifulSoup
import zipfile
import lxml

# Installation Sources:
# docx: conda install -c conda-forge python-docx 
# lxml: conda install -c anaconda lxml

### Lesson 4 Homework - Text Extraction

In [None]:
import hashlib
import os
import docx
from configparser import ConfigParser, ExtendedInterpolation

### Lesson 5 - AWS Data Processing

In [None]:
import os
import requests
import time
from bs4 import BeautifulSoup
from configparser import ConfigParser, ExtendedInterpolation
import boto3
import time
import PyPDF2


### Lesson 6 - Text Preprocessing

In [None]:
import os
from IPython.core.display import display, HTML
from configparser import ConfigParser, ExtendedInterpolation
import pandas as pd
from sqlalchemy import create_engine

### SpaCy

In [None]:
#### Installation:
- Windows: Download Microsoft Visual C++: http://landinghub.visualstudio.com/visual-cpp-build-tools
- conda install -c conda-forge spacy
- python -m spacy download en

##### if you run into an error try the following:
- python -m spacy link en_core_web_sm en
- SOURCE: https://github.com/explosion/spaCy/issues/950

##### Optional to install a convolutional neural network model:
- python -m spacy download en_core_web_lg

In [None]:
import sys
import spacy
from spacy import displacy
from spacy.lang.en.stop_words import STOP_WORDS
from collections import Counter
from spacy.matcher import Matcher
from collections import defaultdict
from spacy.pipeline import Pipe
import seaborn as sns

from numpy import nanmin, nanmax, zeros, NaN
from itertools import takewhile
from spacy.parts_of_speech import CONJ, DET, NOUN, VERB
from spacy.tokens.span import Span as SpacySpan
from IPython.display import Image
Image("../raw_data/images/spacy_installation_error.png", width=500, height=700)

### Lesson 6 Homework - Text Preprocessing

In [None]:
SpaCy Installation
Run the following using git bash as an administrator (i.e. right click on the git bash logo and select 'Run as Admin')

conda install -c conda-forge spacy
python -m spacy download en
if you run into an error try the following:
python -m spacy link en_core_web_sm en
SOURCE: https://github.com/explosion/spaCy/issues/950
Optional to install a convolutional neural network model:
python -m spacy download en_core_web_lg
Test the following code from git bash (even if previous step failed):
start python

python -i
test if SpaCy was downloaded

import spacy
approach 1: test if model downloaded

nlp = spacy.load('en')
appraoch 2: test this if spacy.load('en') failed

import en_core_web_sm
nlp = en_core_web_sm.load()
Optional to install a convolutional neural network model (~800MB). This is the model I will use in class

python -m spacy download en_core_web_lg
exit Python

exit()
Optional - install on an AWS EC2 instance
Instance: Amazon Linux 2 LTS Candidate 2 AMI (HVM), SSD Volume Type

#!/bin/bash

sudo yum update -y

sudo yum groupinstall 'Development Tools' -y

sudo easy_install pip

sudo yum install python-devel -y

sudo pip install spacy

sudo python -m spacy download en_core_web_lg

In [None]:
import os
from IPython.core.display import display, HTML
from IPython.display import Image
from configparser import ConfigParser, ExtendedInterpolation
import pandas as pd
from sqlalchemy import create_engine
import sys
import spacy
from spacy import displacy
from spacy.lang.en.stop_words import STOP_WORDS
from collections import defaultdict
from collections import Counter
from itertools import combinations
import matplotlib.pyplot as plt
import networkx as nx
from spacy.matcher import Matcher
from spacy.pipeline import Pipe
%matplotlib inline
import seaborn as sns
from numpy import nanmin, nanmax, zeros, NaN
from itertools import takewhile
from spacy.parts_of_speech import CONJ, DET, NOUN, VERB
from spacy.tokens.span import Span as SpacySpan

### Lesson 7 Homework - Phrase Detection

In [None]:
import spacy
import pandas as pd
from sqlalchemy import create_engine
from spacy.matcher import Matcher
from spacy.matcher import PhraseMatcher
from collections import defaultdict
from spacy.lang.en.stop_words import STOP_WORDS
from IPython.core.display import display, HTML
from configparser import ConfigParser, ExtendedInterpolation
from spacy.lang.en.stop_words import STOP_WORDS
from itertools import combinations
from collections import defaultdict
from gensim.sklearn_api.phrases import PhrasesTransformer
from gensim.models.phrases import Phrases
from gensim.models.phrases import Phraser

### Lesson 8 Homework - Text Vectorization

In [None]:
from IPython.core.display import display, HTML
from IPython.display import Image
from gensim.summarization.bm25 import get_bm25_weights
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import normalize
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import normalize
import seaborn as sns

### Lesson 9 Homework - Object Oriented Python

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
from collections import defaultdict 

