# Acquire Notebook

In [1]:
import pandas as pd
import numpy as np

import os
import requests

import warnings
warnings.filterwarnings("ignore")

1. Using the code from the lesson as a guide, create a dataframe named items that has all of the data for items.

In [2]:
def get_items_data():
    '''
    Create a dataframe named items that has all of the data for items.
    '''
    base_url = 'https://python.zach.lol'    
    response = requests.get('https://python.zach.lol/api/v1/items')
    data = response.json()
    items_df = pd.DataFrame(data['payload']['items'])
    
    if os.path.isfile('items_df.csv'):
        items_df = pd.read_csv('items_df.csv', index_col = 0)
    else:
        for x in range(0, data['payload']['max_page']):
            response = requests.get(base_url + data['payload']['next_page'])
            data = response.json()
            items_df = pd.concat([df, pd.DataFrame(data['payload']['items'])], ignore_index = True)
            if data['payload']['next_page'] == None:
                return items_df
        items_df = items_df.reset_index()
        
    return items_df

2. Do the same thing, but for stores.

In [3]:
def get_store_data():
    '''
    Create a dataframe named store that has all of the data for store.
    '''
    
    base_url = 'https://python.zach.lol'
    response = requests.get('https://python.zach.lol/api/v1/stores')
    data = response.json()
    store_df = pd.DataFrame(data['payload']['stores'])
    
    if os.path.isfile('stores_df.csv'):
        store_df = pd.read_csv('stores_df.csv', index_col = 0)
    else:
        if data['payload']['next_page'] == None:
            return store_df
        else:
            for x in range(0, data['payload']['max_page']):
                response = requests.get(base_url + data['payload']['next_page'])
                data = response.json()
                store_df = pd.concat([store_df, pd.DataFrame(data['payload']['stores'])], ignore_index = True)
            return store_df
        store_df = store_df.reset_index()
    return store_df

3. Extract the data for sales. There are a lot of pages of data here, so your code will need to be a little more complex. Your code should continue fetching data from the next page until all of the data is extracted.

In [4]:
def get_sales_data():
    
    base_url = 'https://python.zach.lol'
    response = requests.get('https://python.zach.lol/api/v1/sales')
    data = response.json()
    data.keys()
    print('max_page: %s' % data['payload']['max_page'])
    print('next_page: %s' % data['payload']['next_page'])
    
    sales_df = pd.DataFrame(data['payload']['sales'])
    
    
    if os.path.isfile('sales_df.csv'):
        df = pd.read_csv('sales_df.csv', index_col = 0)
    else:
        while data['payload']['next_page'] != "None":
            response = requests.get(base_url + data['payload']['next_page'])
            data = response.json()
            print('max_page: %s' % data['payload']['max_page'])
            print('next_page: %s' % data['payload']['next_page'])

            sales_df = pd.concat([sales_df, pd.DataFrame(data['payload']['sales'])])

            if data['payload']['next_page'] == None:
                break

        sales_df = sales_df.reset_index()
    print('full_shape', sales_df.shape)
    return sales_df

4. Save the data in your files to local csv files so that it will be faster to access in the future.

5. Combine the data from your three separate dataframes into one large dataframe.

In [5]:
def get_all_data():
    '''
    Combine items, store, and sales dataframes.
    '''
    
    base_url = 'https://python.zach.lol'
    
    
    if os.path.isfile('items_df.csv'):
        item_list = pd.read_csv('items_df.csv', index_col = 0)
    else:
        item_list = get_items_data()
    print(item_list.shape)

    if os.path.isfile('stores_df.csv'):
        store_list = pd.read_csv('stores_df.csv', index_col=0)
    else:
        store_list = get_stores_list()
    print(store_list.shape)

    if os.path.isfile('sales_df.csv'):
        sales_list = pd.read_csv('stores_df.csv', index_col=0)
    else:
        sales_list = get_sales_data()
    print(sales_list.shape)
    
    # Rename columns:
    store_list.rename(columns = {'store_id': 'store'}, inplace = True)
    print('renamed columns')
    
    # Merge the three dataframes:
    left_merge = pd.merge(sales_list, item_list, how = 'left', on = 'item')
    all_df = pd.merge(left_merge, store_list, how = 'left', on = 'store')
    
    all_df.to_csv('store_data.csv', index = False)


    return all_df

6. Acquire the Open Power Systems Data for Germany, which has been rapidly expanding its renewable energy production in recent years. The data set includes country-wide totals of electricity consumption, wind power production, and solar power production for 2006-2017. You can get the data here: https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv

In [6]:
def opsd_germany_daily():
    """
    This function uses or creates the 
    opsd_germany_daily csv and returns a df.
    """
    if os.path.isfile('opsd_germany_daily.csv'):
        df = pd.read_csv('opsd_germany_daily.csv', index_col=0)
    else:
        url = 'https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv'
        df = pd.read_csv(url)
        df.to_csv('opsd_germany_daily.csv')
    return df

7. Make sure all the work that you have done above is reproducible. That is, you should put the code above into separate functions in the acquire.py file and be able to re-run the functions and get the same data.