# Bojack Horeseman and Netflix Stock Prices

### Instructions


* **Extraction**

  * Put each CSV into a pandas DataFrame.

* **Transform**

  * Copy only the columns needed into a new DataFrame.

  * Rename columns to fit the tables created in the database.

  * Handle any duplicates. **HINT:** some locations have the same name but each license number is unique.

  * Set index to the previously created primary key.

* **Load**

  * Create a connection to database.

  * Check for a successful connection to the database and confirm that the tables have been created.

  * Append DataFrames to tables. Be sure to use the index set earlier.



In [1]:
import pandas as pd
from sqlalchemy import create_engine
import os

### Read *Bojack Horeseman Season 1-5 Release Date and Ratings* CSV

In [3]:
bojack_csv = os.path.join("..", "Resources", "Bojack_Ratings.csv")
bojack_df = pd.read_csv(bojack_csv)

bojack_df["Release Date"] = bojack_df['AIR MONTH'].map(str) + "/" + bojack_df['AIR DAY'].map(str) + "/" + bojack_df['YEAR'].map(str)

bojack_df.head()

Unnamed: 0,SEASON,EPISODE,EPISODE TIME (NETFLIX),RATING (IMDB),YEAR,AIR MONTH,AIR DAY,Release Date
0,1,1,25.38,7.1,2014,8,22,8/22/2014
1,1,2,25.34,7.7,2014,8,22,8/22/2014
2,1,3,25.59,7.4,2014,8,22,8/22/2014
3,1,4,25.36,7.4,2014,8,22,8/22/2014
4,1,5,25.32,7.4,2014,8,22,8/22/2014


### Read *Netflix* CSV

In [4]:
netflix_csv = os.path.join("..", "Resources", "NFLX_Stock.csv")
netflix_df = pd.read_csv(netflix_csv)

netflix_df.head()

Unnamed: 0,year,date,open,high,low,close,volume
0,2014,1/2/14,52.4014,52.5114,51.5429,51.8314,12325600
1,2014,1/3/14,52.0,52.4956,51.8429,51.8714,10817100
2,2014,1/6/14,51.89,52.0443,50.4757,51.3671,15501500
3,2014,1/7/14,49.6843,49.6986,48.1529,48.5,36167600
4,2014,1/8/14,48.1043,49.4257,48.0743,48.7129,20001100


In [37]:
netflix_df['Month']=[int(d.split('/')[0]) for d in netflix_df.date]
netflix_df['Day']=[int(d.split('/')[1]) for d in netflix_df.date]
netflix_df['Year']=[int(d.split('/')[2]) for d in netflix_df.date]

netflix_df.head()

Unnamed: 0,year,date,open,high,low,close,volume,Month,Day,Year
0,2014,1/2/14,52.4014,52.5114,51.5429,51.8314,12325600,1,2,14
1,2014,1/3/14,52.0,52.4956,51.8429,51.8714,10817100,1,3,14
2,2014,1/6/14,51.89,52.0443,50.4757,51.3671,15501500,1,6,14
3,2014,1/7/14,49.6843,49.6986,48.1529,48.5,36167600,1,7,14
4,2014,1/8/14,48.1043,49.4257,48.0743,48.7129,20001100,1,8,14


### Transform Bojack DataFrame

In [38]:
clean_bojack_df = bojack_df[["SEASON","EPISODE","RATING (IMDB)","YEAR","Release Date"]].copy()

# Rename the column headers
clean_bojack_df = clean_bojack_df.rename(columns={"Season": "SEASON",
                                                    "Episode": "EPISODE",
                                                    "IMDB Rating": "RATING (IMDB)",
                                                    "Year": "YEAR"})
# Display new df
clean_bojack_df.head(100)

Unnamed: 0,SEASON,EPISODE,RATING (IMDB),YEAR,Release Date
0,1,1,7.1,2014,8/22/2014
1,1,2,7.7,2014,8/22/2014
2,1,3,7.4,2014,8/22/2014
3,1,4,7.4,2014,8/22/2014
4,1,5,7.4,2014,8/22/2014
5,1,6,7.9,2014,8/22/2014
6,1,7,8.0,2014,8/22/2014
7,1,8,8.2,2014,8/22/2014
8,1,9,7.9,2014,8/22/2014
9,1,10,7.8,2014,8/22/2014


### Transform Netflix Stock DataFrame

In [41]:
# Create new dataFrame with specific columns
clean_netflix_df = netflix_df[["date","Month", "Day","year","open","high","low","close"]].copy()


# Rename the column headers
clean_netflix_df = clean_netflix_df.rename(columns={"date": "Date",
                                                    "Year": "Year",
                                                    "open": "Opening Price",
                                                    "high": "High Price",
                                                    "low": "Low Price",
                                                    "close": "Closing Price"})
# Display clean Netflix df
clean_netflix_df.head()

Unnamed: 0,Date,Month,Day,year,Opening Price,High Price,Low Price,Closing Price
0,1/2/14,1,2,2014,52.4014,52.5114,51.5429,51.8314
1,1/3/14,1,3,2014,52.0,52.4956,51.8429,51.8714
2,1/6/14,1,6,2014,51.89,52.0443,50.4757,51.3671
3,1/7/14,1,7,2014,49.6843,49.6986,48.1529,48.5
4,1/8/14,1,8,2014,48.1043,49.4257,48.0743,48.7129


### Create database connection

In [42]:
pg_user = 'postgres'
pg_password = 'postgres'
db_name = 'bojack_db'

connection_string = f"{pg_user}:{pg_password}@localhost:5432/{db_name}"
engine = create_engine(f'postgresql://{connection_string}')

### Confirm tables

In [43]:
engine.table_names()

[]

### Load Clean Netflix dataFrame into into bojack_db

In [44]:
clean_netflix_df.to_sql(name='clean_netflix_db', con=engine, if_exists='append', index=True)

### Load Stock Prices from 1 week prior/after S1 release

In [47]:
pd.read_sql_query('select * from clean_netflix_db where year = 2014 and "Month" = 8 and "Day" between 8 and 29', con=engine).head(11)

Unnamed: 0,index,Date,Month,Day,year,Opening Price,High Price,Low Price,Closing Price
0,151,8/8/14,8,8,2014,64.3143,64.4243,63.15,63.6929
1,152,8/11/14,8,11,2014,64.1743,65.3786,64.1014,64.5057
2,153,8/12/14,8,12,2014,64.4786,64.7143,63.3443,63.7729
3,154,8/13/14,8,13,2014,64.0857,64.8879,63.8314,64.5043
4,155,8/14/14,8,14,2014,64.6014,65.0,64.0314,64.41
5,156,8/15/14,8,15,2014,64.4986,66.0,64.0857,65.5843
6,157,8/18/14,8,18,2014,66.0086,67.0714,65.8929,66.5714
7,158,8/19/14,8,19,2014,66.7271,67.2143,66.0614,66.8786
8,159,8/20/14,8,20,2014,66.7143,67.6786,66.363,67.4557
9,160,8/21/14,8,21,2014,67.3257,68.0214,66.8101,67.4357


### Load Stock Prices from 1 week prior/after S2 release

In [50]:
pd.read_sql_query('select * from clean_netflix_db where year = 2015 and "Month" = 7 and "Day" between 10 and 24', con=engine).head(11)

Unnamed: 0,index,Date,Month,Day,year,Opening Price,High Price,Low Price,Closing Price
0,382,7/10/15,7,10,2015,97.5229,98.5028,96.9,97.2286
1,383,7/13/15,7,13,2015,98.0986,102.3086,98.0788,101.0871
2,384,7/14/15,7,14,2015,101.2714,101.6357,99.6529,100.3714
3,385,7/15/15,7,15,2015,99.97,100.75,97.05,98.13
4,386,7/16/15,7,16,2015,111.02,116.49,107.68,115.81
5,387,7/17/15,7,17,2015,117.34,117.88,114.24,114.77
6,388,7/20/15,7,20,2015,114.7,114.7,110.14,110.55
7,389,7/21/15,7,21,2015,110.21,113.71,109.32,112.51
8,390,7/22/15,7,22,2015,112.14,113.88,110.5608,111.5
9,391,7/23/15,7,23,2015,110.91,112.18,109.84,110.1


### Load Stock Prices from 1 week prior/after S3 release

In [51]:
pd.read_sql_query('select * from clean_netflix_db where year = 2016 and "Month" = 7 and "Day" between 15 and 29', con=engine).head(11)

Unnamed: 0,index,Date,Month,Day,year,Opening Price,High Price,Low Price,Closing Price
0,638,7/15/16,7,15,2016,98.52,98.7,97.41,98.39
1,639,7/18/16,7,18,2016,98.43,99.84,97.24,98.81
2,640,7/19/16,7,19,2016,85.43,86.75,84.5,85.84
3,641,7/20/16,7,20,2016,86.67,88.49,85.82,87.91
4,642,7/21/16,7,21,2016,88.3,88.38,85.21,85.99
5,643,7/22/16,7,22,2016,86.48,86.5,85.11,85.89
6,644,7/25/16,7,25,2016,85.73,87.87,85.01,87.66
7,645,7/26/16,7,26,2016,91.03,93.1,90.9,91.41
8,646,7/27/16,7,27,2016,91.5,92.06,90.1,92.04
9,647,7/28/16,7,28,2016,91.92,92.21,90.68,91.65


### Load Stock Prices from 1 week prior/after S4 release

In [52]:
pd.read_sql_query('select * from clean_netflix_db where year = 2017 and "Month" = 9 and "Day" between 1 and 15', con=engine).head(11)

Unnamed: 0,index,Date,Month,Day,year,Opening Price,High Price,Low Price,Closing Price
0,924,9/1/17,9,1,2017,175.55,176.48,173.92,174.74
1,925,9/5/17,9,5,2017,173.4,175.88,172.44,174.52
2,926,9/6/17,9,6,2017,175.25,179.46,173.73,179.25
3,927,9/7/17,9,7,2017,178.8,180.35,177.1,179.0
4,928,9/8/17,9,8,2017,178.45,180.39,176.25,176.42
5,929,9/11/17,9,11,2017,178.1,182.47,178.03,181.74
6,930,9/12/17,9,12,2017,182.55,185.33,180.6435,185.15
7,931,9/13/17,9,13,2017,184.07,184.4995,182.55,183.64
8,932,9/14/17,9,14,2017,183.25,185.2882,182.07,182.63
9,933,9/15/17,9,15,2017,182.73,184.93,181.43,182.35


### Load Stock Prices from 1 week prior/after S5 release

In [53]:
pd.read_sql_query('select * from clean_netflix_db where year = 2017 and "Month" = 9 and "Day" between 7 and 21', con=engine).head(11)

Unnamed: 0,index,Date,Month,Day,year,Opening Price,High Price,Low Price,Closing Price
0,927,9/7/17,9,7,2017,178.8,180.35,177.1,179.0
1,928,9/8/17,9,8,2017,178.45,180.39,176.25,176.42
2,929,9/11/17,9,11,2017,178.1,182.47,178.03,181.74
3,930,9/12/17,9,12,2017,182.55,185.33,180.6435,185.15
4,931,9/13/17,9,13,2017,184.07,184.4995,182.55,183.64
5,932,9/14/17,9,14,2017,183.25,185.2882,182.07,182.63
6,933,9/15/17,9,15,2017,182.73,184.93,181.43,182.35
7,934,9/18/17,9,18,2017,183.61,185.45,182.73,184.62
8,935,9/19/17,9,19,2017,184.98,186.23,184.17,185.68
9,936,9/20/17,9,20,2017,186.1,186.5,183.2,185.51
