# Data Wrangling
Using the Alpha Vantage API to pull raw data and technical indicators for the model.

### Import Packages

In [1]:
import pandas as pd
import requests

In [2]:
api_key = 'ECI18FG0ET71Z3U0'

### Define functions to pull stock data

In [3]:
def get_stock_data(symbol, start_date='2010-01-01', end_date='2018-12-31'):
    
    """
    function to pull adjusted stock prices from the alphavantage API.
    
    symbol is a string representing a stock symbol, e.g. 'AAPL'
    
    start_date is the start date of the time series, defaults to '1/1/2010'
    
    end_date is the end date of the time series, defaults to '12/31/2018'
    
    start_date & end_date must be in string format
    """
    
    url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=' + symbol + '&outputsize=full&apikey=' + api_key
        
    r = requests.get(url)
    
    df = r.json()
    
    df = pd.DataFrame(df['Time Series (Daily)']).T
    
    df = df[(df.index > start_date) & (df.index < end_date)]
 
    return df

### Create dataset

In [11]:
msft_df = get_stock_data('MSFT')

In [12]:
msft_df.head()

Unnamed: 0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient
2018-12-28,102.09,102.41,99.52,100.39,99.9609,38169312,0.0,1.0
2018-12-27,99.3,101.19,96.4,101.18,100.7475,49498509,0.0,1.0
2018-12-26,95.14,100.69,93.96,100.56,100.1301,51634793,0.0,1.0
2018-12-24,97.68,97.97,93.98,94.13,93.7276,43935192,0.0,1.0
2018-12-21,101.63,103.0,97.46,98.23,97.8101,111242070,0.0,1.0


### Re-order dataframe
flip order of data frames so oldest dates are first.

In [25]:
def reorder(df):
    return df.sort_index(ascending=True, inplace=True)

In [26]:
dataframes = [apple_df, google_df, amzn_df, msft_df, msft_ma7, msft_ma21, 
              msft_macd, msft_bbands]

for df in dataframes:
    reorder(df)

### Load raw data to flat file

In [27]:
msft_df.to_csv('/Users/jessemailhot/Documents/GitHub/springboard/Capstone 2/raw data/msft.csv')